fibers.texi 57 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424
  1. \input texinfo @c -*-texinfo-*-
  2. @c %**start of header
  3. @setfilename fibers.info
  4. @settitle Fibers
  5. @c %**end of header
  6. @set VERSION 1.1.0
  7. @set UPDATED 6 August 2017
  8. @copying
  9. This manual is for Fibers (version @value{VERSION}, updated
  10. @value{UPDATED})
  11. Copyright 2016-2017 Andy Wingo
  12. Copyright 2021 Maxime Devos
  13. @quotation
  14. @c For more information, see COPYING.docs in the fibers
  15. @c distribution.
  16. Permission is granted to copy, distribute and/or modify this document
  17. under the terms of the GNU Free Documentation License, Version 1.3 or
  18. any later version published by the Free Software Foundation; with no
  19. Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
  20. @end quotation
  21. @end copying
  22. @dircategory The Algorithmic Language Scheme
  23. @direntry
  24. * Fibers: (fibers.info). Lightweight concurrency for Guile.
  25. @end direntry
  26. @titlepage
  27. @title Fibers
  28. @subtitle version @value{VERSION}, updated @value{UPDATED}
  29. @author Andy Wingo
  30. @page
  31. @vskip 0pt plus 1filll
  32. @insertcopying
  33. @end titlepage
  34. @ifnottex
  35. @node Top
  36. @top Fibers
  37. @insertcopying
  38. @menu
  39. * Introduction:: What's this all about?
  40. * Reference:: API reference.
  41. * Pitfalls:: Stay on the happy path.
  42. * Examples:: Starting points for a hack.
  43. * Status:: Fibers is a work in progress.
  44. @end menu
  45. @end ifnottex
  46. @iftex
  47. @shortcontents
  48. @end iftex
  49. @node Introduction
  50. @chapter Introduction
  51. Fibers is a facility for lightweight concurrency in Guile.
  52. @menu
  53. * Context:: How do other systems handle concurrency?
  54. * Design:: Fibers' point in the design space.
  55. * Parallelism:: Faster throughput via more cores.
  56. @end menu
  57. @node Context
  58. @section A brief history of language facilities for concurrency
  59. Modern machines have the raw capability to serve hundreds of thousands
  60. of simultaneous long-lived connections, but it's often hard to manage
  61. this at the software level. Fibers tries to solve this problem in a
  62. nice way. Before discussing the approach taken in Fibers, it's worth
  63. spending some time on history to see how we got here.
  64. One of the most dominant patterns for concurrency these days is
  65. ``callbacks'', notably in the Twisted library for Python and the
  66. Node.js run-time for JavaScript. The basic observation in the
  67. callback approach to concurrency is that the efficient way to handle
  68. tens of thousands of connections at once is with low-level operating
  69. system facilities like @code{poll} or @code{epoll}. You add all of
  70. the file descriptors that you are interested in to a ``poll set'' and
  71. then ask the operating system which ones are readable or writable, as
  72. appropriate. Once the operating system says ``yes, file descriptor
  73. 7145 is readable'', you can do something with that socket; but what?
  74. With callbacks, the answer is ``call a user-supplied closure'': a
  75. callback, representing the continuation of the computation on that
  76. socket.
  77. Building a network service with a callback-oriented concurrency system
  78. means breaking the program into little chunks that can run without
  79. blocking. Wherever a program could block, instead of just continuing
  80. the program, you register a callback. Unfortunately this requirement
  81. permeates the program, from top to bottom: you always pay the mental
  82. cost of inverting your program's control flow by turning it into
  83. callbacks, and you always incur run-time cost of closure creation,
  84. even when the particular I/O could proceed without blocking. It's a
  85. somewhat galling requirement, given that this contortion is required
  86. of the programmer, but could be done by the compiler. We Schemers
  87. demand better abstractions than manual, obligatory
  88. continuation-passing-style conversion.
  89. Callback-based systems also encourage unstructured concurrency, as in
  90. practice callbacks are not the only path for data and control flow in
  91. a system: usually there is mutable global state as well. Without
  92. strong patterns and conventions, callback-based systems often exhibit
  93. bugs caused by concurrent reads and writes to global state.
  94. Some of the problems of callbacks can be mitigated by using
  95. ``promises'' or other library-level abstractions; if you're a Haskell
  96. person, you can think of this as lifting all possibly-blocking
  97. operations into a monad. If you're not a Haskeller, that's cool,
  98. neither am I! But if your typey spidey senses are tingling, it's for
  99. good reason: with promises, your whole program has to be transformed
  100. to return promises-for-values instead of values anywhere it would
  101. block.
  102. An obvious solution to the control-flow problem of callbacks is to use
  103. threads. In the most generic sense, a thread is a language feature
  104. which denotes an independent computation. Threads are created by
  105. other threads, but fork off and run independently instead of returning
  106. to their caller. In a system with threads, there is implicitly a
  107. scheduler somewhere that multiplexes the threads so that when one
  108. suspends, another can run.
  109. In practice, the concept of threads is often conflated with a
  110. particular implementation, @dfn{kernel threads}. Kernel threads are
  111. very low-level abstractions that are provided by the operating system.
  112. The nice thing about kernel threads is that they can use any CPU that
  113. is the kernel knows about. That's an important factor in today's
  114. computing landscape, where Moore's law seems to be giving us more
  115. cores instead of more gigahertz.
  116. However, as a building block for a highly concurrent system, kernel
  117. threads have a few important problems.
  118. One is that kernel threads simply aren't designed to be allocated in
  119. huge numbers, and instead are more optimized to run in a
  120. one-per-CPU-core fashion. Their memory usage is relatively high for
  121. what should be a lightweight abstraction: some 10 kilobytes at least
  122. and often some megabytes, in the form of the thread's stack. There
  123. are ongoing efforts to reduce this for some systems but we cannot
  124. expect wide deployment in the next 5 years, if ever. Even in the best
  125. case, a hundred thousand kernel threads will take at least a gigabyte
  126. of memory, which seems a bit excessive for book-keeping overhead.
  127. Kernel threads can be a bit irritating to schedule, too: when one
  128. thread suspends, it's for a reason, and it can be that user-space
  129. knows a good next thread that should run. However because kernel
  130. threads are scheduled in the kernel, it's rarely possible for the
  131. kernel to make informed decisions. There are some ``user-mode
  132. scheduling'' facilities that are in development for some systems, but
  133. again only for some systems.
  134. The other significant problem is that building non-crashy systems on
  135. top of kernel threads is hard to do, not to mention ``correct''
  136. systems. It's an embarrassing situation. For one thing, the
  137. low-level synchronization primitives that are typically provided with
  138. kernel threads, mutexes and condition variables, are not composable.
  139. Also, as with callback-oriented concurrency, one thread can silently
  140. corrupt another via unstructured mutation of shared state. It's worse
  141. with kernel threads, though: a kernel thread can be interrupted at any
  142. point, not just at I/O. And though callback-oriented systems can
  143. theoretically operate on multiple CPUs at once, in practice they
  144. don't. This restriction is sometimes touted as a benefit by
  145. proponents of callback-oriented systems, because in such a system, the
  146. callback invocations have a single, sequential order. With multiple
  147. CPUs, this is not the case, as multiple threads can run at the same
  148. time, in parallel.
  149. Kernel threads can work. The Java virtual machine does at least
  150. manage to prevent low-level memory corruption and to do so with high
  151. performance, but still, even Java-based systems that aim for maximum
  152. concurrency avoid using a thread per connection because threads use
  153. too much memory.
  154. In this context it's no wonder that there's a third strain of
  155. concurrency: shared-nothing message-passing systems like Erlang.
  156. Erlang isolates each thread (called @dfn{processes} in the Erlang
  157. world), giving each it its own heap and ``mailbox''. Processes can
  158. spawn other processes, and the concurrency primitive is
  159. message-passing. A process that tries receive a message from an empty
  160. mailbox will ``block'', from its perspective. In the meantime the
  161. system will run other processes. Message sends never block, oddly;
  162. instead, sending to a process with many messages pending makes it more
  163. likely that Erlang will pre-empt the sending process. It's a strange
  164. trade off, but it makes sense when you realize that Erlang was designed
  165. for network transparency: the same message send/receive interface can
  166. be used to send messages to processes on remote machines as well.
  167. No network is truly transparent, however. At the most basic level,
  168. the performance of network sends should be much slower than local
  169. sends. Whereas a message sent to a remote process has to be written
  170. out byte-by-byte over the network, there is no need to copy immutable
  171. data within the same address space. The complexity of a remote
  172. message send is O(n) in the size of the message, whereas a local
  173. immutable send is O(1). This suggests that hiding the different
  174. complexities behind one operator is the wrong thing to do. And
  175. indeed, given byte read and write operators over sockets, it's
  176. possible to implement remote message send and receive as a process
  177. that serializes and parses messages between a channel and a byte sink
  178. or source. In this way we get cheap local channels, and network shims
  179. are under the programmer's control. This is the approach that the Go
  180. language takes, and is the one we use in Fibers.
  181. Structuring a concurrent program as separate threads that communicate
  182. over channels is an old idea that goes back to Tony Hoare's work on
  183. ``Communicating Sequential Processes'' (CSP). CSP is an elegant tower
  184. of mathematical abstraction whose layers form a pattern language for
  185. building concurrent systems that you can still reason about.
  186. Interestingly, it does so without any concept of time at all, instead
  187. representing a thread's behavior as a @dfn{trace} of instantaneous
  188. events. Threads themselves are like functions that unfold over the
  189. possible events to produce the actual event trace seen at run-time.
  190. This view of events as instantaneous happenings extends to
  191. communication as well. In CSP, one communication between two threads
  192. is modelled as an instantaneous event, partitioning the traces of the
  193. two threads into ``before'' and ``after'' segments.
  194. Practically speaking, this has ramifications in the Go language, which
  195. was heavily inspired by CSP. You might think that a channel is just a
  196. an asynchronous queue that blocks when writing to a full queue, or
  197. when reading from an empty queue. That's a bit closer to the Erlang
  198. conception of how things should work, though as we mentioned, Erlang
  199. simply slows down writes to full mailboxes rather than blocking them
  200. entirely. However, that's not what Go and other systems in the CSP
  201. family do; sending a message on a channel will block until there is a
  202. receiver available, and vice versa. The threads are said to
  203. ``rendezvous'' at the event.
  204. Unbuffered channels have the interesting property that you can
  205. @code{select} between sending a message on channel @var{a} or channel
  206. @var{b}, and in the end only one message will be sent; nothing happens
  207. until there is a receiver ready to take the message. In this way
  208. messages are really owned by threads and never by the channels
  209. themselves. You can of course add buffering if you like, simply by
  210. making a thread that waits on either sends or receives on a channel,
  211. and which buffers sends and makes them available to receives. It's
  212. also possible to add explicit support for buffered channels, as Go
  213. does, which can reduce the number of context switches as there is no
  214. explicit buffer thread.
  215. Whether to buffer or not to buffer is a tricky choice. It's possible
  216. to implement singly-buffered channels in a system like Erlang via an
  217. explicit send/acknowledge protocol, though it seems difficult to
  218. implement completely unbuffered channels. As we mentioned, it's
  219. possible to add buffering to an unbuffered system by the introduction
  220. of explicit buffer threads. In the end though in Fibers we follow
  221. CSP's lead so that we can implement the nice @code{select} behavior
  222. that we mentioned above.
  223. As a final point, @code{select} is OK but is not a great language
  224. abstraction. Say you call a function and it returns some kind of
  225. asynchronous result which you then have to @code{select} on. It could
  226. return this result as a channel, and that would be fine: you can add
  227. that channel to the other channels in your @code{select} set and you
  228. are good. However, what if what the function does is receive a
  229. message on a channel, then do something with the message? In that
  230. case the function should return a channel, plus a continuation (as a
  231. closure or something). If @code{select} results in a message being
  232. received over that channel, then we call the continuation on the
  233. message. Fine. But, what if the function itself wanted to
  234. @code{select} over some channels? It could return multiple channels
  235. and continuations, but that becomes unwieldy.
  236. What we need is an abstraction over asynchronous operations, and that
  237. is the main idea of a CSP-derived system called ``Concurrent ML''
  238. (CML). Originally implemented as a library on top of Standard ML of
  239. New Jersey by John Reppy, CML provides this abstraction, which in
  240. Fibers is called an @dfn{operation}@footnote{CML uses the term
  241. @dfn{event}, but we find this to be a confusing name.}. Calling
  242. @code{send-operation} on a channel returns an operation, which is just
  243. a value. Operations are like closures in a way; a closure wraps up
  244. code in its environment, which can be later called many times or not
  245. at all. Operations likewise can be @dfn{performed}@footnote{In CML,
  246. @dfn{synchronized}.} many times or not at all; performing an operation
  247. is like calling a function. The interesting part is that you can
  248. compose operations via the @code{wrap-operation} and
  249. @code{choice-operation} combinators. The former lets you bundle up an
  250. operation and a continuation. The latter lets you construct an
  251. operation that chooses over a number of operations. Calling
  252. @code{perform-operation} on a choice operation will perform one and
  253. only one of the choices. Performing an operation will call its
  254. @code{wrap-operation} continuation on the resulting values.
  255. While it's possible to implement Concurrent ML in terms of Go's
  256. channels and baked-in @code{select} statement, it's more expressive to
  257. do it the other way around, as that also lets us implement other
  258. operations types besides channel send and receive, for example
  259. timeouts and condition variables.
  260. @node Design
  261. @section Fibers design
  262. In Fibers, the unit of computation is the @dfn{fiber}, a lightweight
  263. thread managed by Guile. A fiber communicates with the outside world
  264. via normal Guile ports: @code{get-bytevector}, @code{put-string}, and
  265. all that. Within a single Guile process fibers communicate by sending
  266. and receiving Scheme values over @dfn{channels}.
  267. Whenever a fiber tries to read but no data is available, or tries to
  268. write but no data can be written, Guile will suspend the fiber and
  269. arrange for it to be resumed when the port or channel operation can
  270. proceed. In the meantime, Guile will run other fibers. When no fiber
  271. is runnable, Guile will use efficient system facilities to sleep until
  272. input or output can proceed.
  273. When a fiber would block, it suspends to the scheduler from the
  274. current thread. The scheduler will arrange to re-start the fiber when
  275. the port or channel becomes readable or writable, as appropriate. For
  276. ports, the scheduler adds the file descriptor associated with the port
  277. to an @code{epoll} set. In either case, the scheduler remembers which
  278. fibers are waiting and for what, so that the user can inspect the
  279. state of their system.
  280. Currently in Fibers there is no ambient scheduler running; an error is
  281. signalled if a user calls @code{spawn-fiber} while not inside a
  282. @code{run-fibers} invocation. However it is possible to communicate
  283. with fibers via channels or other Concurrent ML-like operations, even
  284. outside of a @code{run-fibers} invocation. If an operation would
  285. block, it suspends the entire kernel thread until the operation can
  286. proceed.
  287. On the Scheme level, a fiber is a delimited continuation. When a
  288. scheduler runs a fiber, it does so within a prompt; when the fiber
  289. suspends, it suspends to the prompt. The scheduler saves the
  290. resulting continuation as part of the fiber's state. In this way the
  291. per-fiber computational state overhead is just the size of the pending
  292. stack frames of the fiber, which can be just a handful of words.
  293. By default, Fibers takes advantage of all available cores on your
  294. system. @xref{Parallelism}, for full details.
  295. Ports are how fibers communicate with the world; channels are how
  296. fibers communicate with each other. Channels are meeting places
  297. between fibers, or between threads. A fiber or thread that goes to
  298. send a message over a channel will block until there is a fiber or
  299. thread ready to receive the message, and vice versa. Once both
  300. parties are ready, the message is exchanged and both parties resume.
  301. There can be multiple fibers and threads waiting to read and write on
  302. a channel, allowing channels to express not only pipelines but also
  303. common concurrency patterns such as fan-in and fan-out.
  304. Unlike Erlang channels, channels in Fibers are purely local and do not
  305. attempt to provide the illusion of network transparency. This does
  306. have the positive advantage that we are able to provide better
  307. backpressure support than Erlang, blocking when no receiver is
  308. available to handle a message instead of letting the sender keep
  309. sending many messages.
  310. To avoid starvation, a fiber can only run once within a ``turn''.
  311. Each turn starts with a poll on file descriptors of interest and marks
  312. the associated fibers as runnable. If no fiber is runnable at the
  313. start of the poll, the poll call will ask the kernel to wait for a
  314. runnable descriptor. Otherwise the poll call will still check for
  315. runnable file descriptors, but also ask the kernel to return
  316. immediately. There is an additional FD added to the poll set that is
  317. used to interrupt a blocking poll, for example if a fiber becomes
  318. runnable due to I/O on a channel from a separate kernel thread while
  319. the first scheduler was still polling.
  320. If a fiber runs for too long (by default, 10 milliseconds), it will be
  321. @dfn{preempted}: interrupted and rescheduled for the next turn. The
  322. preemption frequency can be tuned by the user or turned off for a
  323. fully cooperative scheduling model.
  324. To enable expressive cross-kernel-thread communications, channel sends
  325. and receives are atomic and thread-safe.
  326. @node Parallelism
  327. @section Parallelism
  328. By default, Fibers will take advantage of all CPU cores available to
  329. it. The degree of parallelism is controlled by the
  330. @code{#:parallelism} keyword argument to @code{run-fibers}, which
  331. defaults to @code{(current-processor-count)}.
  332. @xref{Threads,,,guile.info,Guile Reference Manual}, for more
  333. information on @code{current-processor-count}. Pass a different
  334. argument to @code{#:parallelism} to choose a different degree of
  335. parallelism, for example @code{1} for single-threaded operation. To
  336. allocate specific cores to a Guile process, use the @code{taskset}
  337. command-line utility.
  338. A newly spawned fiber will be scheduled on the kernel thread in which
  339. it was created, unless @code{#:parallel? #t} was passed to the
  340. @code{spawn-fiber} invocation, in which case its initial kernel thread
  341. will be selected at random. In this way the default is to preserve
  342. locality of memory access and minimize cross-thread coordination.
  343. Additionally, after a scheduler has exhausted its run queue for the
  344. current turn, if it has nothing scheduled for the next turn it will
  345. try to steal work from other schedulers. This @dfn{work stealing}
  346. allows a set of parallel schedulers to automatically rebalance and
  347. burn through the current global run queue as fast as possible.
  348. After processing its current run queue, possibly including stolen work
  349. if its next run queue was empty, a scheduler will then ask the
  350. operating system for any file descriptors that have pending activity.
  351. The scheduler puts a time limit on this sleep phase if there are
  352. pending timeouts, but otherwise the sleep will only wake up when a
  353. file descriptor becomes readable or writable, or if another thread
  354. wakes up the scheduler. Schedulers that are sleeping do not
  355. participate in work stealing. For this reason there is another source
  356. of work rebalancing in Fibers, @dfn{work sharing}. As mentioned
  357. above, to schedule a fiber on a random remote scheduler, use
  358. @code{spawn-fiber} with the @code{#:parallel? #t} keyword argument.
  359. The specifics of the scheduling algorithm may change, and it may be
  360. that there is no global ``best scheduler''. We look forward to
  361. experimenting and finding not only a good default algorithm, but also
  362. a library that you can use to find your own local maximum in the
  363. scheduling space.
  364. As far as performance goes, we have found that computationally
  365. intensive tasks parallelize rather well. Expect near-linear speedup
  366. as you make more cores available to fibers.
  367. On the other hand, although allocation rate improves with additional
  368. cores, it currently does not scale linearly, and works best when all
  369. cores are on the same NUMA node. This is due to details about how
  370. Guile manages its memory.
  371. In general there may be many bottlenecks that originate in Guile,
  372. Fibers, and in your application, and these bottlenecks constrain the
  373. ability of an application to scale linearly.
  374. Probably the best way to know if Fibers scales appropriately for your
  375. use case is to make some experiments. To restrict the set of cores
  376. available to Guile, run Guile from within @code{taskset -c}. See
  377. @code{taskset}'s manual page. For machines with multiple sockets you
  378. will probably want to use @code{numactl --membind} as well. Then to
  379. test scalability on your machine, run @code{./env guile
  380. tests/speedup.scm} from within your Fibers build directory, or
  381. benchmark your application directly. In time we should be able to
  382. develop some diagnostic facilities to help the Fibers user determine
  383. where a scaling bottleneck is in their application.
  384. @node Reference
  385. @chapter API reference
  386. Fibers is a library built on Guile. It consists of a public
  387. interface, base support for asynchronous operations, implementations
  388. of operations for channels and timers, and an internals interface.
  389. @menu
  390. * Using Fibers:: User-facing interface to fibers
  391. * Operations:: Composable abstractions for concurrency.
  392. * Channels:: Share memory by communicating.
  393. * Timers:: Operations on time.
  394. * Conditions:: Waiting for simple state changes.
  395. * Port Readiness:: Waiting until a port is ready for I/O.
  396. * REPL Commands:: Experimenting with Fibers at the console.
  397. * Schedulers and Tasks:: Fibers are built from lower-level primitives.
  398. @end menu
  399. @node Using Fibers
  400. @section Using Fibers
  401. The public interface of fibers right now is quite minimal. To use it,
  402. import the @code{(fibers)} module:
  403. @example
  404. (use-modules (fibers))
  405. @end example
  406. To create a new fibers scheduler and run it in the current Guile
  407. thread, use @code{run-fibers}.
  408. @defun run-fibers [init-thunk=@code{#f}] @
  409. [#:install-suspendable-ports?=@code{#t}] @
  410. [#:scheduler=@code{#f}] @
  411. [#:parallelism=@code{(current-processor-count)}] @
  412. [#:cpus=@code{(getaffinity 0)}] @
  413. [#:hz=@code{100}] [#:drain?=@code{#f}]
  414. Run @var{init-thunk} within a fiber in a fresh scheduler, blocking
  415. until @var{init-thunk} returns. Return the value(s) returned by the
  416. call to @var{init-thunk}.
  417. For example:
  418. @example
  419. (run-fibers (lambda () 1))
  420. @result{} 1
  421. (run-fibers
  422. (lambda ()
  423. (spawn-fiber (lambda () (display "hey!\n"))))
  424. #:drain? #t)
  425. @print{} hey!
  426. @end example
  427. Calling @code{run-fibers} will ensure that Guile's port implementation
  428. allows fibers to suspend if a read or a write on a port would block.
  429. @xref{Non-Blocking I/O,,,guile.info,Guile Reference Manual}, for more
  430. details on suspendable ports. If for some reason you want port reads
  431. or writes to prevent other fibers from running, pass @code{#f} as the
  432. @code{#:install-suspendable-ports?} keyword argument.
  433. By default, @code{run-fibers} will create a fresh scheduler, and
  434. destroy it after @code{run-fibers} finishes. If you happen to have a
  435. pre-existing scheduler (because you used the low-level scheduler
  436. interface to create one), you can pass it to @code{run-fibers} using
  437. the @code{#:scheduler} keyword argument. In that case the scheduler
  438. will not be destroyed when @code{run-fibers} finishes.
  439. @code{run-fibers} will return when the @var{init-thunk} call returns.
  440. To make it additionally wait until there are no more runnable fibers
  441. or pending timeouts, specify the @code{#:drain? #t} keyword argument.
  442. If @code{run-fibers} creates a scheduler on your behalf, it will
  443. arrange for a number of ``peer'' schedulers to also be created, up to
  444. a total scheduler count controlled by the @var{parallelism} keyword
  445. argument. These peer schedulers will be run in separate threads and
  446. will participate in work rebalancing. The fibers will be run on the
  447. CPUs specified by @var{cpus}. @xref{Parallelism}.
  448. By default @var{hz} is 100, indicating that running fibers should be
  449. preempted 100 times per every second of CPU time (not wall-clock
  450. time). Note that preemption will only occur if the fiber can actually
  451. be suspended; @xref{Barriers}, for more information. Pass @code{0}
  452. for @var{hz} to disable preemption, effectively making scheduling
  453. fully cooperative.
  454. @end defun
  455. @defun spawn-fiber thunk [scheduler=@code{(require-current-scheduler)}] @
  456. [#:parallel?=@code{#f}]
  457. Spawn a new fiber that will run @var{thunk}. Return the new fiber.
  458. The new fiber will run concurrently with other fibers.
  459. The fiber will be added to the current scheduler, which is usually
  460. what you want. It's also possible to spawn the fiber on a specific
  461. scheduler, which is useful to ensure that the fiber runs on a
  462. different kernel thread. In that case, pass the optional
  463. @code{scheduler} argument.
  464. If @var{parallel?} is true, the fiber will be started not
  465. (necessarily) on @var{scheduler}, but on a random member of the peer
  466. set of @var{scheduler}. @xref{Parallelism}. Note that every
  467. scheduler is a member of its own peer set.
  468. The fiber will inherit the fluid--value associations (the dynamic
  469. state) in place when @code{spawn-fiber} is called. Any
  470. @code{fluid-set!} or parameter set within the fiber will not affect
  471. fluid or parameter bindings outside the fiber.
  472. @end defun
  473. @defun sleep seconds
  474. Wake up the current fiber after @var{seconds} of wall-clock time have
  475. elapsed. This definition will replace the binding for @code{sleep} in
  476. the importing module, effectively overriding Guile's ``core''
  477. definition.
  478. @end defun
  479. @node Operations
  480. @section Operations
  481. Operations are first-class abstractions for asynchronous events.
  482. There are primitive operation types, such as waiting for a timer
  483. (@pxref{Timers}) or waiting for a message on a channel
  484. (@pxref{Channels}). Operations can also be combined and transformed
  485. using the @code{choice-operation} and @code{wrap-operation} from this module:
  486. @example
  487. (use-modules (fibers operations))
  488. @end example
  489. @defun wrap-operation op f
  490. Given the operation @var{op}, return a new operation that, if and when
  491. it succeeds, will apply @var{f} to the values yielded by performing
  492. @var{op}, and yield the result as the values of the wrapped operation.
  493. @end defun
  494. @defun choice-operation . ops
  495. Given the operations @var{ops}, return a new operation that if it
  496. succeeds, will succeed with one and only one of the sub-operations
  497. @var{ops}.
  498. @end defun
  499. Finally, once you have an operation, you can perform it using
  500. @code{perform-operation}.
  501. @defun perform-operation op
  502. Perform the operation @var{op} and return the resulting values. If the
  503. operation cannot complete directly, block until it can complete.
  504. @end defun
  505. @xref{Introduction}, for more on the ``Concurrent ML'' system that
  506. introduced the concept of the operation abstraction. In the context
  507. of Fibers, ``blocking'' means to suspend the current fiber, or to
  508. suspend the current kernel thread if the operation is performed
  509. outside of a fiber.
  510. There is also a low-level constructor for other modules that implement
  511. primitive operation types:
  512. @defun make-base-operation wrap-fn try-fn block-fn
  513. Make a fresh base operation.
  514. @end defun
  515. This is a low-level constructor, though; if you ever feel the need to
  516. call @code{make-base-operation}, make sure you're familiar with the
  517. Concurrent ML literature. Godspeed!
  518. @node Channels
  519. @section Channels
  520. Channels are the way to communicate between fibers. To use them, load
  521. the channels module:
  522. @example
  523. (use-modules (fibers channels))
  524. @end example
  525. @defun make-channel
  526. Make a fresh channel.
  527. @end defun
  528. @defun channel? obj
  529. Return @code{#t} if @var{obj} is a channel, or @code{#f} otherwise.
  530. @end defun
  531. @defun put-operation channel message
  532. Make an operation that if and when it completes will rendezvous with a
  533. receiving operation to send @var{message} over @var{channel}.
  534. @end defun
  535. @defun get-operation channel
  536. Make an operation that if and when it completes will rendezvous with a
  537. sending operation to receive one value from @var{channel}.
  538. @end defun
  539. @defun put-message channel message
  540. Send @var{message} on @var{channel}, and return zero values. If there
  541. is already a receiver waiting to receive a message on this channel,
  542. give it our message and continue. Otherwise, block until a receiver
  543. becomes available.
  544. Equivalent to:
  545. @example
  546. (perform-operation (put-operation channel message))
  547. @end example
  548. @end defun
  549. @defun get-message channel
  550. Receive a message from @var{channel} and return it. If there is
  551. already a receiver waiting to send a message on this channel, take its
  552. message directly. Otherwise, block until a sender becomes available.
  553. Equivalent to:
  554. @example
  555. (perform-operation (get-operation channel))
  556. @end example
  557. @end defun
  558. Channels are thread-safe; you can use them to send and receive values
  559. between fibers on different kernel threads.
  560. @node Timers
  561. @section Timers
  562. Timers are a kind of operation that, you guessed it, let you sleep
  563. until a certain time.
  564. @example
  565. (use-modules (fibers timers))
  566. @end example
  567. @defun sleep-operation seconds
  568. Make an operation that will succeed with no values when @var{seconds}
  569. have elapsed.
  570. @end defun
  571. @defun timer-operation expiry
  572. Make an operation that will succeed when the current time is greater
  573. than or equal to @var{expiry}, expressed in internal time units. The
  574. operation will succeed with no values.
  575. @end defun
  576. @defun sleep seconds
  577. Block the calling fiber or kernel thread until @var{seconds} have
  578. elapsed.
  579. @end defun
  580. @node Conditions
  581. @section Conditions
  582. Condition variables are a simple one-bit form of concurrent
  583. communication. A condition variable has two states: it starts in the
  584. @dfn{unsignalled} state and later may transition to the
  585. @dfn{signalled} state. When a condition becomes signalled, any
  586. associated waiting operations complete.
  587. @example
  588. (use-modules (fibers conditions))
  589. @end example
  590. @defun make-condition
  591. Make a new condition variable.
  592. @end defun
  593. @defun condition? obj
  594. Return @code{#t} if @var{obj} is a condition variable, or @code{#f}
  595. otherwise.
  596. @end defun
  597. @defun signal-condition! cvar
  598. Signal @var{cvar}, notifying all waiting fibers and preventing
  599. blocking of future fibers waiting on this condition.
  600. @end defun
  601. @defun wait-operation cvar
  602. Make an operation that will succeed with no values when @var{cvar}
  603. becomes signalled.
  604. @end defun
  605. @defun wait cvar
  606. Block the calling fiber or kernel thread until @var{cvar} becomes
  607. signalled. Equivalent to @code{(perform-operation (wait-operation
  608. cvar))}.
  609. @end defun
  610. @node Port Readiness
  611. @section Port Readiness
  612. These two operations can be used on file ports to wait until
  613. they are readable or writable. Spurious wake-ups are possible.
  614. This is complementary to Guile's suspendable ports.
  615. @example
  616. (use-modules (fibers io-wakeup))
  617. @end example
  618. @defun wait-until-port-readable-operation port
  619. Make an operation that will succeed with no values when the input
  620. port @var{port} becomes readable. For passive sockets, this operation
  621. succeeds when a connection becomes available.
  622. @end defun
  623. @defun wait-until-port-writable-operation
  624. Make an operation that will succeed with no values when the output
  625. port @var{port} becomes writable.
  626. @end defun
  627. @node REPL Commands
  628. @section REPL Commands
  629. Fibers implements some basic extensions to the Guile command-line
  630. interface (its Read-Eval-Print Loop, or the REPL). Prefix these
  631. commands with a comma (@code{,}) to run them at the REPL; see
  632. @code{,help fibers} for full details, once you have loaded the
  633. @code{(fibers)} module of course.
  634. @deffn {REPL Command} scheds
  635. Show a list of all schedulers.
  636. @end deffn
  637. @deffn {REPL Command} spawn-sched
  638. Create a new scheduler for fibers, and run it on a new kernel thread.
  639. @end deffn
  640. @deffn {REPL Command} kill-sched name
  641. Shut down the scheduler named @var{name}. Use @code{,scheds} to list
  642. scheduler names.
  643. @end deffn
  644. @deffn {REPL Command} spawn-fiber exp [sched]
  645. Spawn a new fiber that runs @var{exp}. If @var{sched} is given, the
  646. fiber will be spawned on the given scheduler.
  647. @end deffn
  648. @node Schedulers and Tasks
  649. @section Schedulers and Tasks
  650. Internally, fibers are built on top of schedulers and tasks.
  651. A scheduler runs tasks. A task is just a thunk (a function of no
  652. arguments) whose return value is ignored. A scheduler runs tasks in
  653. batches, once per turn. Each turn, a scheduler takes all tasks from
  654. its ``next'' run-queue and adds them to its ``current'' run-queue, and
  655. then runs the tasks on the current run-queue in order. The scheduler
  656. then goes to the next turn, unless its ``finished?'' function returns
  657. true.
  658. Both the ``next'' and the ``current'' run-queues are public atomic
  659. data structures. Scheduling a task adds it to the ``next'' run-queue.
  660. Scheduling a task is a thread-safe operation; it can be done by any
  661. thread. Scheduling a task on a scheduler running on a remote thread
  662. will ensure that the thread wakes up from any sleeping operation it
  663. might be currently engaged in.
  664. There is normally just one scheduler for each kernel thread that runs
  665. fibers. Several schedulers can be made aware of each other so that
  666. they can one can spread out the load when spawning tasks that should
  667. run in parallel. Also, before moving to the next turn, a scheduler
  668. will try to steal work from other schedulers that it knows about,
  669. popping off an item from the remote scheduler's ``current'' run-queue.
  670. There are two additional sources of tasks for a scheduler: file
  671. descriptor events and timers. When gathering tasks to schedule for
  672. the next turn, a scheduler will call @code{epoll} to be notified of
  673. activity on file descriptors of interest. If there are no pending
  674. tasks on the next run-queue, the call to @code{epoll} will sleep until
  675. the scheduler is woken up, or until a timer expires.
  676. The capability that allows fibers to be built on schedulers is that
  677. tasks can suspend. Suspending a task calls a user-supplied
  678. after-suspend handler that is passed the continuation of the task.
  679. The user can then schedule that continuation at some later time. In
  680. this way a fiber starts as a single task run by a scheduler, but each
  681. time it suspends and is resumed, a fresh task consisting of the
  682. fiber's is scheduled. The fibers layer also uses other Guile
  683. mechanisms to isolate fibers from each other, such as dynamic states.
  684. All interfaces in this module are thread-safe except where marked
  685. otherwise.
  686. @example
  687. (use-modules (fibers scheduler))
  688. @end example
  689. @defun make-scheduler [#:parallelism=@code{#f}] @
  690. [#:prompt-tag=@code{(make-prompt-tag "fibers")}]
  691. Make a new scheduler in which to run fibers. If @var{parallelism} is
  692. true, it should be an integer indicating the number of schedulers to
  693. make. The resulting schedulers will all share the same prompt tag and
  694. will steal and share out work from among themselves.
  695. @end defun
  696. @defun run-scheduler sched finished?
  697. Run @var{sched} until calling the supplied @var{finished?} thunk
  698. returns true. Return zero values. Signal an error if @var{scheduler}
  699. is already running in some other kernel thread.
  700. @end defun
  701. @defun current-scheduler
  702. Return the current scheduler, or @code{#f} if no scheduler is current.
  703. @end defun
  704. @defun scheduler-kernel-thread sched
  705. Return the kernel thread that @var{sched} is running on, or @code{#f}
  706. if it is not currently running.
  707. @end defun
  708. @defun scheduler-runcount sched
  709. Return the number of tasks that have been run on @var{sched}, modulo
  710. @math{2^{32}}. This interface is useful as a lightweight check to see if
  711. a remote scheduler is making progress.
  712. @end defun
  713. @defun scheduler-remote-peers sched
  714. Return a list of peer schedulers of @var{sched}, not including
  715. @var{sched} itself.
  716. @end defun
  717. @defun scheduler-work-pending? sched
  718. Return @code{#t} if @var{sched} has any work pending: any runnable
  719. tasks or any pending timeouts.
  720. @end defun
  721. @defun choose-parallel-scheduler sched
  722. Return a random scheduler from @var{sched}'s peer set. Note that
  723. @var{sched}'s peer set includes @var{sched} itself.
  724. @end defun
  725. @defun destroy-scheduler sched
  726. Release any resources associated with @var{sched}.
  727. @end defun
  728. @defun schedule-task sched task
  729. Arrange to run @var{task}, a procedure of no arguments, on the next
  730. turn of @var{sched}. If @var{sched} is remote and sleeping, it will
  731. be woken up.
  732. @end defun
  733. @defun schedule-task-when-fd-readable sched fd task
  734. Arrange to schedule @var{task} when the file descriptor @var{fd}
  735. becomes readable. @emph{Not thread-safe.}
  736. @end defun
  737. @defun schedule-task-when-fd-writable sched fd task
  738. Arrange to schedule @var{task} on @var{sched} when the file descriptor
  739. @var{fd} becomes writable. @emph{Not thread-safe.}
  740. @end defun
  741. @defun schedule-task-at-time sched expiry task
  742. Arrange to schedule @var{task} on @var{sched} when the absolute real
  743. time is greater than or equal to @var{expiry}, expressed in internal
  744. time units. @emph{Not thread-safe.}
  745. @end defun
  746. @defun suspend-current-task after-suspend
  747. Suspend the current task to the current scheduler. After suspending,
  748. call the @var{after-suspend} callback with two arguments: the current
  749. scheduler, and the continuation of the current task. The continuation
  750. passed to the @var{after-suspend} handler is the continuation of the
  751. @code{suspend-current-task} call.
  752. @end defun
  753. @defun yield-current-task
  754. Yield control to the current scheduler. Like calling
  755. @code{(suspend-current-task schedule-task)} except that it avoids
  756. suspending if the current continuation isn't suspendable. Returns
  757. @code{#t} if the yield succeeded, or @code{#f} otherwise.
  758. @end defun
  759. @node Pitfalls
  760. @chapter Pitfalls
  761. Running Guile code within a fiber mostly ``just works''. There are a
  762. few pitfalls to be aware of though.
  763. @menu
  764. * Blocking:: Avoid calling blocking operations.
  765. * Barriers:: Avoid suspending inside continuation barriers.
  766. * Mutation:: Avoid unstructured mutation of shared data.
  767. * Mutexes:: Mutexes and fibers don't mix very well.
  768. @end menu
  769. @node Blocking
  770. @section Blocking
  771. When you run a program under fibers, the fibers library arranges to
  772. make it so that port operations can suspend the fiber instead of
  773. block. This generally works, with some caveats.
  774. @enumerate
  775. @item
  776. The port type has to either never block, or support non-blocking I/O.
  777. Currently the only kind of port in Guile are file ports (including
  778. sockets), and for them this condition is fulfilled. However notably
  779. non-blocking I/O is not supported for custom binary I/O ports, not yet
  780. anyway. If you need this, get it fixed in Guile :)
  781. @item
  782. You have to make sure that any file port you operate on is opened in
  783. nonblocking mode. @xref{Non-Blocking I/O,,,guile.info,Guile Reference
  784. Manual}, for the obscure @code{fcntl} incantation to use on your
  785. ports.
  786. @item
  787. You have to avoid any operation on ports that is not supported yet in
  788. Guile for non-blocking I/O. Since non-blocking I/O is new in Guile,
  789. only some I/O operations are expressed in terms of the primitive
  790. operations. Notably, Scheme @code{read}, @code{display}, and
  791. @code{write} are still implemented in C, which prevents any fiber that
  792. uses them from suspending and resuming correctly. What will happen
  793. instead is that the call blocks instead of suspending. If you find a
  794. situation like this, talk to Guile developers to get it fixed :)
  795. @item
  796. You can enable non-blocking I/O for local files, but Linux at least
  797. will always say that the local file is ready for I/O even if it has to
  798. page in data from a spinning-metal device. This is a well-known
  799. limitation for which the solution is apparently to do local I/O via a
  800. thread pool. We could implement this in Fibers, or in Guile... not
  801. sure what the right thing is!
  802. @end enumerate
  803. You also have to avoid any other library or system calls that would
  804. block. One common source of blocking is @code{getaddrinfo} and
  805. related network address resolution library calls. Again, apparently
  806. the solution is thread pools? Probably in Fibers we should implement
  807. a thread-pooled address resolver.
  808. The @code{(fibers)} module exports a @code{sleep} replacement. Code
  809. that sleeps should import the @code{(fibers)} module to be sure that
  810. they aren't using Guile's @code{sleep} function.
  811. Finally, a fiber itself has to avoid blocking other fibers; it must
  812. reach a ``yield point'' some time. A yield point includes a read or
  813. write on a port or a channel that would block, or a @code{sleep}.
  814. Other than that, nothing will pre-empt a fiber, at least not
  815. currently. If you need to yield to the scheduler, then at least do a
  816. @code{(sleep 0)} or something.
  817. @node Barriers
  818. @section Barriers
  819. When a fiber suspends, Fibers uses @code{abort-to-prompt} to save the
  820. fiber's continuation, saving each pending computation in that fiber to
  821. the heap. When the fiber resumes, Fibers invokes the saved
  822. continuation, effectively replaying these saved stack frames back onto
  823. the current stack. For this operation to succeed, the saved
  824. continuation needs to be @dfn{suspendable}. A suspendable
  825. continuation should be able to be resumed after the call to
  826. @code{abort-to-prompt}.
  827. Most continuations in Guile are suspendable. However, not all of them
  828. are. It's possible to explicitly instate a continuation barrier
  829. (@pxref{Continuation Barriers,,,guile.info,Guile Reference Manual})
  830. that will allow the continuation to be aborted but not reinstated:
  831. @example
  832. ;; If put-message suspends, we will never resume!
  833. (run-fibers
  834. (lambda ()
  835. (with-continuation-barrier
  836. (lambda () (put-message channel 42)))))
  837. @end example
  838. If the @code{put-message} call can't succeed directly, then the fiber
  839. will suspend. However when the fiber becomes runnable again, it can't
  840. be rewound because of the barrier. Because this is the case, when
  841. Fibers goes to suspend a computation but realizes that the suspended
  842. fiber could never be resumed, it throws an error instead.
  843. @code{with-continuation-barrier} is the only function in Guile that
  844. establishes a continuation barrier on purpose. However there are
  845. number of other functions that accidentally establish a continuation
  846. barrier by recursing into C code and then back to Scheme. (Guile can
  847. only rewind the state of a saved computation if Guile created the
  848. corresponding stack frame, and that's not the case for the
  849. intermediate stack frame created by the C compiler.)
  850. Accidental continuation barriers are bugs, and the Guile developers
  851. have been working on removing them over the years. By now, most of
  852. the high-priority accidental barriers are gone. Those that are left
  853. include:
  854. @itemize
  855. @item The body thunk of @code{call-with-blocked-asyncs}
  856. @item GOOPS methods attached to a primitive-generic like @code{+} or
  857. @code{equal?}
  858. @item Dynwind entry/exit handlers, but only when called due to nonlocal
  859. entry or exit
  860. @item R6RS custom binary port callbacks
  861. @item Legacy ``soft port'' callbacks
  862. @item R5RS ``delay'' callbacks
  863. @item Many module system callbacks (module transformers, etc)
  864. @item SRFI-13 string and character-set callbacks
  865. @item Callbacks from some SRFI-1 functions
  866. @item Callbacks from @code{sort}
  867. @item Custom hash table assoc functions
  868. @item Calls to @code{load-from-path} (though, oddly, not @code{load})
  869. @item Object printers, e.g. custom record printers
  870. @item @code{call-with-vm}
  871. @item @code{array-map} and related array functions
  872. @end itemize
  873. This set will be reduced over time as more of @code{libguile} is
  874. rewritten in Scheme.
  875. Finally, for port operations, @xref{Non-Blocking
  876. I/O,,,guile.info,Guile Reference Manual}. When Guile tries to read
  877. from a file descriptor and nothing is available, normally it would
  878. call the current read waiter, which Fibers customizes to suspend the
  879. fiber and run another one in the meantime. However for procedures
  880. that have not been rewritten in terms of the ``suspendable port
  881. operations'', notably including @code{read}, @code{write}, and
  882. @code{display}, the nothing-to-read condition is handled in C, not
  883. Scheme, so Guile cannot create a resumable continuation. In this
  884. case, instead of erroring, Guile will wait until the file descriptor
  885. is readable or writable (as appropriate) and then continue. However
  886. in the meantime, which may be forever, this blocks other fibers from
  887. running. Therefore Fibers users sometimes have to be aware of the
  888. state of Guile's rewrite of port operations in terms of
  889. suspendable-port primitives, and to help out if things aren't moving
  890. fast enough :)
  891. @node Mutation
  892. @section Mutation
  893. Although code run within fibers looks like normal straight-up Scheme,
  894. it runs concurrently with other fibers. This means that if you mutate
  895. shared state and other fibers mutate the same shared state using
  896. normal Scheme procedures like @code{set!}, @code{vector-set!}, or the
  897. like, then probably you're going to have a bad time. This is
  898. especially the case considering that the default is to run as many
  899. schedulers in parallel as your machine has cores, and also to preempt
  900. fibers at any time.
  901. Even if you explicitly choose a cooperative scheduling mode by
  902. disabling interrupts and parallelism, multi-step transactions may be
  903. suspended if your code reaches a yield point in the middle of
  904. performing the transaction.
  905. The best way around this problem is to avoid unstructured mutation,
  906. and to instead share data by communicating over channels. Using
  907. channels to communicate data produces much more robust, safe systems.
  908. If you need to mutate global data, the best way is to use an atomic
  909. variable. If that is not possible, then consider spawning a fiber to
  910. manage the mutable data, and communicating with that fiber over
  911. channels. Mutexes are also an option but are difficult to use
  912. correctly; see the considerations from the following section.
  913. @node Mutexes
  914. @section Mutexes
  915. Mutexes are low-level synchronization primitives provided by Guile.
  916. Used properly, they can be used to build concurrent systems that
  917. concurrently access data without corrupting it.
  918. @xref{Mutexes and Condition Variables,,,guile.info,Guile Reference
  919. Manual}, for some reasons why mutexes aren't so great for Guile in
  920. general.
  921. Guile's mutexes are an even worse solution with a Fibers system. It
  922. is a bad idea for a fiber to grab a Guile mutex, because if the mutex
  923. is not available, Guile will suspend not just the fiber that is
  924. running but the entire kernel thread. If the mutex is available, the
  925. fiber obtains it, cool; but if it the fiber suspends while holding a
  926. mutex, that's bad news. Any fiber trying to acquire a mutex while a
  927. suspended fiber from the same thread already has the mutex will result
  928. in an error: as Guile thinks that the mutex has already been acquired
  929. by the current thread, it detects recursion and bails out.
  930. With condition variables, similar problems arise: waiting on a
  931. condition variable will block indefinitely, if the condition can only
  932. be signalled by another fiber in the current kernel thread.
  933. The root of this problem is that Guile associates mutexes with kernel
  934. threads, not fibers. It would be possible however to make a
  935. Fibers-appropriate implementation of mutexes, but we suggest that
  936. users try atomic boxes or channels instead. If you do use mutexes,
  937. make sure you disable preemption (possibly by a local call to
  938. @code{call-with-blocked-asyncs}), and take care to never suspend a
  939. fiber while it owns any mutex.
  940. @node Examples
  941. @chapter Examples
  942. Here are some small examples to get you started.
  943. @menu
  944. * Ping:: An echo server and client.
  945. * Memcached:: A simple memcached server and client.
  946. * Web Server Backend:: A backend for Guile's web server.
  947. * Concurrent Web Server:: A more concurrent web server.
  948. @end menu
  949. More examples would be great, especially demonstrating interesting
  950. things that can be done with channels.
  951. @node Ping
  952. @section Ping
  953. @subsection Server
  954. This simple server listens on a TCP port, echoing lines back to any
  955. user that connects. This file can be found in
  956. @code{examples/ping-server.scm}, and can be run from the build dir as
  957. @code{./env guile examples/ping-server.scm}.
  958. First, we use some standard Guile modules, and the fibers module.
  959. @example
  960. (use-modules (rnrs bytevectors)
  961. (fibers)
  962. (ice-9 textual-ports)
  963. (ice-9 rdelim)
  964. (ice-9 match))
  965. @end example
  966. We run the server within a @code{run-fibers} call.
  967. @example
  968. (define* (run-ping-server #:key
  969. (host #f)
  970. (family AF_INET)
  971. (addr (if host
  972. (inet-pton family host)
  973. INADDR_LOOPBACK))
  974. (port 11211)
  975. (socket (make-default-socket family addr port)))
  976. (listen socket 1024)
  977. (sigaction SIGPIPE SIG_IGN)
  978. (socket-loop socket (make-hash-table)))
  979. (run-fibers run-ping-server)
  980. @end example
  981. Up to here, all good. Perhaps we should look at how to open a socket;
  982. here's a couple helper that appears often in applications that use
  983. suspendable ports. @xref{Non-Blocking I/O,,,guile.info,Guile
  984. Reference Manual}, for full details.
  985. @example
  986. (define (make-default-socket family addr port)
  987. (let ((sock (socket PF_INET SOCK_STREAM 0)))
  988. (setsockopt sock SOL_SOCKET SO_REUSEADDR 1)
  989. (fcntl sock F_SETFD FD_CLOEXEC)
  990. (fcntl sock F_SETFL (logior O_NONBLOCK (fcntl sock F_GETFL)))
  991. (bind sock family addr port)
  992. sock))
  993. @end example
  994. We hope to make this easier in the future; it's a bit too much
  995. ceremony. Now, the main dish is the server loop, that simply accepts
  996. new connections, forking off a fiber for each connection:
  997. @example
  998. (define (socket-loop socket store)
  999. (let loop ()
  1000. (match (accept socket SOCK_NONBLOCK)
  1001. ((client . addr)
  1002. (spawn-fiber (lambda () (client-loop client addr store)))
  1003. (loop)))))
  1004. @end example
  1005. Finally, the loop to handle a single client:
  1006. @example
  1007. (define (client-loop port addr store)
  1008. (setvbuf port 'block 1024)
  1009. ;; Disable Nagle's algorithm. We buffer ourselves.
  1010. (setsockopt port IPPROTO_TCP TCP_NODELAY 1)
  1011. (let loop ()
  1012. ;; TODO: Restrict read-line to 512 chars.
  1013. (let ((line (read-line port)))
  1014. (cond
  1015. ((eof-object? line)
  1016. (close-port port))
  1017. (else
  1018. (put-string port line)
  1019. (put-char port #\newline)
  1020. (force-output port)
  1021. (loop))))))
  1022. @end example
  1023. This ping server is fairly straightforward, and is only flawed in a
  1024. couple of ways: it doesn't limit the line size, and so is vulnerable
  1025. to memory exhaustion if the client gives it a very, very big line, and
  1026. additionally, it does not time out clients after inactivity, so the
  1027. poll set could get quite large.
  1028. In practice the limit for the number of connections is imposed by the
  1029. system in the form of a file descriptor limit. Use @code{ulimit -n}
  1030. to increase this limit on the console, or @code{setrlimit} to increase
  1031. it from Guile, within the hard limits imposed by the system.
  1032. @subsection Client
  1033. The client is similar to the server; see
  1034. @code{examples/ping-client.scm} for full details. It can be run as
  1035. @code{./env guile examples/ping-client.scm N M}, to make N concurrent
  1036. connections to the server and make a series of M requests on each
  1037. connection. It spawns a fiber per connection, and then uses a normal
  1038. Guile loop to make the serial requests.
  1039. @example
  1040. (define (run-ping-test num-clients num-connections)
  1041. ;; The getaddrinfo call blocks, unfortunately. Call it once before
  1042. ;; spawning clients.
  1043. (let ((addrinfo (car (getaddrinfo "localhost" "11211"))))
  1044. (let lp ((n 0))
  1045. (when (< n num-clients)
  1046. (spawn-fiber
  1047. (lambda ()
  1048. (client-loop addrinfo n num-connections)))
  1049. (lp (1+ n))))))
  1050. @end example
  1051. Running this on a laptop from 2015 yields results more or less like
  1052. this:
  1053. @example
  1054. $ time ./env guile examples/ping-client.scm 1000 100
  1055. real 0m1.647s
  1056. user 0m2.176s
  1057. sys 0m0.816s
  1058. @end example
  1059. That's a throughput of somewhere around 60000 fiber switches per
  1060. second on each side, which is none too shabby.
  1061. @node Memcached
  1062. @section Memcached
  1063. Similarly to the echo server, Fibers includes an example memcached
  1064. server and client. Run the server like this:
  1065. @example
  1066. $ ./env guile examples/memcached-server.scm
  1067. @end example
  1068. Run the client as with the ping client:
  1069. @example
  1070. $ time ./env guile examples/memcached-client.scm 1000 100
  1071. real 0m6.343s
  1072. user 0m9.868s
  1073. sys 0m1.808s
  1074. @end example
  1075. Here we see a throughput of around 16000 puts plus 16000 gets per
  1076. second on this 2-core, 2-thread-per-core machine; pretty OK for a
  1077. basic implementation.
  1078. @node Web Server Backend
  1079. @section Web Server Backend
  1080. Fibers includes a ``backend'' for Guile's built-in web server that
  1081. uses non-blocking fibers to read requests and write responses. Fibers
  1082. also includes a standalone web server that uses Guile's HTTP
  1083. facilities, but not its web server framework. @xref{Concurrent Web
  1084. Server}, for more on the standalone web server.
  1085. To run a web server that serves each client from
  1086. fibers, specify the @code{fibers} backend when running your web
  1087. server:
  1088. @example
  1089. (use-modules (web server))
  1090. (define (handler request body)
  1091. (values '((content-type . (text/plain)))
  1092. "Hello, World!"))
  1093. (run-server handler 'fibers)
  1094. @end example
  1095. Performance seems to be about 60% of the standard web server backend
  1096. implementation shipped with Guile, though it is not as
  1097. battle-hardened.
  1098. The fibers web server backend is an interesting case because it uses
  1099. channels to invert the inversion-of-control imposed on the backend by
  1100. the web server framework. The web server wants the backend to provide
  1101. ``read-request'' and ``write-response'' callbacks, whereas in fibers
  1102. we usually want to run dedicated REPL-like fibers over the client
  1103. sockets. The fibers backend enables this by passing a callback to the
  1104. per-client loops:
  1105. @example
  1106. (define (have-request response-channel request body)
  1107. (put-message request-channel
  1108. (vector response-channel request body))
  1109. (match (get-message response-channel)
  1110. (#(response body)
  1111. (values response body))))
  1112. (let loop ()
  1113. (match (accept socket SOCK_NONBLOCK)
  1114. ((client . sockaddr)
  1115. ;; ...
  1116. (spawn-fiber (lambda () (client-loop client have-request))
  1117. #:parallel? #t)
  1118. (loop))))
  1119. @end example
  1120. From the perspective of the @code{client-loop} fiber,
  1121. @code{have-request} is a normal function that takes a request and
  1122. returns a response, and the @code{client-loop} fiber is in control.
  1123. But from the perspective of the web server, the @code{server-read} and
  1124. @code{server-write} callbacks are straightforward and idiomatic too:
  1125. @example
  1126. (define (server-read server)
  1127. (match (get-message (server-request-channel server))
  1128. (#(response-channel request body)
  1129. (let ((client response-channel))
  1130. (values client request body)))))
  1131. (define (server-write server client response body)
  1132. (let ((response-channel client))
  1133. (put-message response-channel (vector response body)))
  1134. (values))
  1135. @end example
  1136. This takes advantage of the fact that we can use @code{get-message},
  1137. @code{put-message}, and other CML operations both inside and outside
  1138. of fibers, and it mostly just does the right thing.
  1139. Note also the @code{#:parallel? #t} on the @code{spawn-fiber}
  1140. invocation. The natural unit of parallelism in a web server is the
  1141. request (or the client), so it's at this point that we introduce work
  1142. sharing to our system, allowing us to naturally take advantage of
  1143. multiple cores on our server.
  1144. @node Concurrent Web Server
  1145. @section Concurrent Web Server
  1146. Guile's web server framework single-threads all web request handling.
  1147. The handler procedure can be passed a number of additional ``state''
  1148. arguments, and is expected to return a corresponding number of
  1149. additional values to use as the next state. This is sometimes what
  1150. you want, but it does limit concurrency; instead it would be nice to
  1151. be able to not only the input and output running concurrently, but
  1152. also handlers too.
  1153. For this reason, Fibers includes a simple standalone web server that
  1154. uses Guile's Guile's HTTP facilities, but not its web server
  1155. framework. To run a standalone web server, use the @code{(fibers web
  1156. server)} module:
  1157. @example
  1158. (use-modules (fibers web server))
  1159. (define (handler request body)
  1160. (values '((content-type . (text/plain)))
  1161. "Hello, World!"))
  1162. (run-server handler)
  1163. @end example
  1164. Compared to the Fibers web server backend (@pxref{Web Server
  1165. Backend}), using the standalone fibers web server enables more
  1166. parallelism, as the handlers can run in parallel when you have
  1167. multiple cores. Single-core performance of the standalone server is
  1168. slightly better than the web server backend, and unlike the backend it
  1169. scales with the number of cores available.
  1170. @node Status
  1171. @chapter Project status
  1172. Fibers is feature-complete and ready to go! It's early days but
  1173. things are solid enough to say without embarrassment or misgiving that
  1174. Guile now has a solid concurrency story. Use fibers, incorporate it
  1175. directly into your project, fork it, improve it: what happens now is
  1176. up to you. Happy hacking and godspeed!
  1177. @c @node Concept Index
  1178. @c @unnumbered Concept Index
  1179. @c @printindex cp
  1180. @c @node Function Index
  1181. @c @unnumbered Function Index
  1182. @c @printindex fn
  1183. @bye