ra-ra.texi 114 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901
  1. @c -*- mode: texinfo; coding: utf-8 -*-
  2. @c %**start of header
  3. @setfilename ra-ra.info
  4. @documentencoding UTF-8
  5. @settitle ra:: — An array library for C++20
  6. @c %**end of header
  7. @c Keep track of
  8. @c http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0834r0.html
  9. @c http://open-std.org/JTC1/SC22/WG21/docs/papers/2017/p0573r2.html
  10. @c http://open-std.org/JTC1/SC22/WG21/docs/papers/2017/p0356r2.html
  11. @c References to source [ma··] or [ma···] current last is 117.
  12. @set VERSION 28
  13. @set UPDATED 2024 March 20
  14. @copying
  15. @code{ra::} (version @value{VERSION}, updated @value{UPDATED})
  16. (c) Daniel Llorens 2005--2024
  17. @smalldisplay
  18. Permission is granted to copy, distribute and/or modify this document
  19. under the terms of the GNU Free Documentation License, Version 1.3 or
  20. any later version published by the Free Software Foundation; with no
  21. Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
  22. @end smalldisplay
  23. @end copying
  24. @dircategory C++ libraries
  25. @direntry
  26. * ra-ra: (ra-ra.info). Expression template and multidimensional array library for C++.
  27. @end direntry
  28. @include my-bib-macros.texi
  29. @mybibuselist{Sources}
  30. @titlepage
  31. @title ra::
  32. @subtitle version @value{VERSION}, updated @value{UPDATED}
  33. @author Daniel Llorens
  34. @page
  35. @vskip 0pt plus 1filll
  36. @insertcopying
  37. @end titlepage
  38. @ifnottex
  39. @node Top
  40. @top @code{ra::}
  41. @insertcopying
  42. @code{ra::}@footnote{/ə'ɹ-eɪ/} is a general purpose multidimensional array and expression template library for C++20. This manual is a work in progress.
  43. @menu
  44. * Overview:: Array programming and C++.
  45. * Usage:: Everything you can do with @code{ra::}.
  46. * Extras:: Additional libraries provided with @code{ra::}.
  47. * Hazards:: User beware.
  48. * Internals:: For all the world to see.
  49. * The future:: Could be even better.
  50. * Reference:: Systematic list of types and functions.
  51. * @mybibnode{}:: It's been done before.
  52. * Indices:: Or try the search function.
  53. * Notes:: Technically...
  54. @end menu
  55. @end ifnottex
  56. @iftex
  57. @shortcontents
  58. @end iftex
  59. @c ------------------------------------------------
  60. @node Overview
  61. @chapter Overview
  62. @c ------------------------------------------------
  63. @cindex length
  64. @cindex rank
  65. @cindex shape
  66. A multidimensional array is a container whose elements can be looked up using a multi-index (i₀, i₁, ...). Each of the indices i₀, i₁, ... has a constant range [0, n₀), [0, n₁), ... independent of the values of the other indices, so the array is ‘rectangular’. The number of indices in the multi-index is the @dfn{rank} of the array, and the list of all the @dfn{lengths} (n₀, n₁, ... nᵣ₋₁) is the @dfn{shape} of the array. We speak of a rank-@math{r} array or of an @math{r}-array.
  67. Often we deal with multidimensional @emph{expressions} where the elements aren't stored anywhere, but are computed on demand when the expression is looked up. In this general sense, an ‘array’ is just a function of integers with a rectangular domain.@footnote{@c
  68. @cindex arity
  69. This leads to ‘rank’ being called ‘arity’ in some contexts, especially when there is risk of confusion with the meaning of ‘rank’ in linear algebra.}
  70. Arrays (as a representation of @dfn{matrices}, @dfn{vectors}, or @dfn{tensors}) are common objects in math and programming, and it's very useful to be able to manipulate arrays as individual entities rather than as aggregates. Not only is
  71. @verbatim
  72. A = B+C;
  73. @end verbatim
  74. much more compact and easier to read than
  75. @verbatim
  76. for (int i=0; i!=m; ++i)
  77. for (int j=0; j!=n; ++j)
  78. for (int k=0; k!=p; ++k)
  79. A(i, j, k) = B(i, j, k)+C(i, j, k);
  80. @end verbatim
  81. but it's also safer and less redundant. For example, the order of the loops may be something you don't really care about.
  82. However, if array operations are implemented naively, a piece of code such as @code{A=B+C} may result in the creation of a temporary to hold @code{B+C} which is then assigned to @code{A}. This is wasteful if the arrays involved are large.
  83. @cindex Blitz++
  84. Fortunately the problem is almost as old as aggregate data types, and other programming languages have addressed it with optimizations such as @url{https://en.wikipedia.org/wiki/Loop_fission_and_fusion, ‘loop fusion’}, ‘drag along’ @mybibcite{Abr70}, or ‘deforestation’ @mybibcite{Wad90}. In the C++ context the technique of ‘expression templates’ was pioneered in the late 90s by libraries such as Blitz++ @mybibcite{bli17}. It works by making @code{B+C} into an ‘expression object’ which holds references to its arguments and performs the sum only when its elements are looked up. The compiler removes the temporary expression objects during optimization, so that @code{A=B+C} results (in principle) in the same generated code as the complicated loop nest above.
  85. @menu
  86. * Rank polymorphism:: What makes arrays special.
  87. * Drag along and beating:: The basic array optimizations.
  88. * Why C++:: High level, low level.
  89. * Guidelines:: How @code{ra::} tries to do things.
  90. * Other libraries:: Inspiration and desperation.
  91. @end menu
  92. @c ------------------------------------------------
  93. @node Rank polymorphism
  94. @section Rank polymorphism
  95. @c ------------------------------------------------
  96. @dfn{Rank polymorphism} is the ability to treat an array of rank @math{r} as an array of lower rank where the elements are themselves arrays.
  97. @cindex cell
  98. @cindex frame
  99. For example, think of a matrix A, a 2-array with shape (n₀, n₁) where the elements A(i₀, i₁) are numbers. If we consider the subarrays A(0, ...), A(1, ...), ..., A(n₀-1, ...) as individual elements, then we have a new view of A as a 1-array of length n₀ with those rows as elements. We say that the rows A(i₀)≡A(i₀, ...) are the 1-@dfn{cells} of A, and the numbers A(i₀, i₁) are 0-cells of A. For an array of arbitrary rank @math{r} the (@math{r}-1)-cells of A are called its @dfn{items}. The prefix of the shape (n₀, n₁, ... nₙ₋₁₋ₖ) that is not taken up by the k-cell is called the k-@dfn{frame}.
  100. An obvious way to store an array in linearly addressed memory is to place its items one after another. So we would store a 3-array as
  101. @quotation
  102. A: [A(0), A(1), ...]
  103. @end quotation
  104. and the items of A(i₀), etc. are in turn stored in the same way, so
  105. @quotation
  106. A: [A(0): [A(0, 0), A(0, 1) ...], ...]
  107. @end quotation
  108. and the same for the items of A(i₀, i₁), etc.
  109. @quotation
  110. A: [[A(0, 0): [A(0, 0, 0), A(0, 0, 1) ...], A(0, 1): [A(0, 1, 0), A(0, 1, 1) ...]], ...]
  111. @end quotation
  112. @cindex order, row-major
  113. This way to lay out an array in memory is called @dfn{row-major order} or @dfn{C-order}, since it's the default order for built-in arrays in C (@pxref{Other libraries}). A row-major array A with shape (n₀, n₁, ... nᵣ₋₁) can be looked up like this:
  114. @anchor{x-steps}
  115. @quotation
  116. A(i₀, i₁, ...) = (storage-of-A) [(((i₀n₁ + i₁)n₂ + i₂)n₃ + ...)+iᵣ₋₁] = (storage-of-A) [o + s₀i₀ + s₁i₁ + ...]
  117. @end quotation
  118. @cindex step
  119. @cindex stride
  120. where the numbers (s₀, s₁, ...) are called the @dfn{steps}@footnote{Sometimes `strides'. Cf. @url{https://en.wikipedia.org/wiki/Dope_vector, @dfn{dope vector}}}. Note that the ‘linear’ or ‘raveled’ address [o + s₀i₀ + s₁i₁ + ...] is an affine function of (i₀, i₁, ...). If we represent an array as a tuple
  121. @quotation
  122. A ≡ ((storage-of-A), o, (s₀, s₁, ...))
  123. @end quotation
  124. then any affine transformation of the indices can be achieved simply by modifying the numbers (o, (s₀, s₁, ...)), with no need to touch the storage. This includes common operations such as: @ref{x-transpose,transposing} axes, @ref{x-reverse,reversing} the order along an axis, most cases of @ref{Slicing,slicing}, and sometimes even reshaping or tiling the array.
  125. A basic example is obtaining the i₀-th item of A:
  126. @quotation
  127. A(i₀) ≡ ((storage-of-A), o+s₀i₀, (s₁, ...))
  128. @end quotation
  129. Note that we can iterate over these items by simply bumping the pointer o+s₀i₀. This means that iterating over (k>0)-cells doesn't cost any more than iterating over 0-cells (@pxref{Cell iteration}).
  130. @c ------------------------------------------------
  131. @node Drag along and beating
  132. @section Drag along and beating
  133. @c ------------------------------------------------
  134. These two fundamental array optimizations are described in @mybibcite{Abr70}.
  135. @dfn{Drag-along} is the process that delays evaluation of array operations. Expression templates can be seen as an implementation of drag-along. Drag-along isn't an optimization in and of itself; it simply preserves the necessary information up to the point where the expression can be executed efficiently.
  136. @dfn{Beating} is the implementation of certain array operations on the array @ref{Containers and views,view} descriptor instead of on the array contents. For example, if @code{A} is a 1-array, one can implement @ref{x-reverse,@code{reverse(A, 0)}} by negating the @ref{x-steps,step} and moving the offset to the other end of the array, without having to move any elements. More generally, beating applies to any function-of-indices (generator) that can take the place of an array in an array expression. For instance, an expression such as @ref{x-iota,@code{1+iota(3, 0)}} can be beaten into @code{iota(3, 1)}, and this can enable further optimizations.
  137. @c ------------------------------------------------
  138. @node Why C++
  139. @section Why C++
  140. @c ------------------------------------------------
  141. Of course the main reason is that (this being a personal project) I'm more familiar with C++ than with other languages to which the following might apply.
  142. C++ supports the low level control that is necessary for interoperation with external libraries and languages, but still has the abstraction power to create the features we want even though the language has no native support for most of them.
  143. @cindex APL
  144. @cindex J
  145. The classic array languages, APL @mybibcite{FI73} and J @mybibcite{Ric08}, have array support baked in. The same is true for other languages with array facilities such as Fortran or Octave/Matlab. Array libraries for general purpose languages usually depend heavily on C extensions. In Numpy's case @mybibcite{num17} this is both for reasons of flexibility (e.g. to obtain predictable memory layout and machine types) and of performance.
  146. On the other extreme, an array library for C would be hampered by the limited means of abstraction in the language (no polymorphism, no metaprogramming, etc.) so the natural choice of C programmers is to resort to code generators, which eventually turn into new languages.
  147. In C++, a library is enough.
  148. @c ------------------------------------------------
  149. @node Guidelines
  150. @section Guidelines
  151. @c ------------------------------------------------
  152. @code{ra::} attempts to be general, consistent, and transparent.
  153. @c @cindex J # TODO makeinfo can't handle an entry appearing more than once (it creates multiple entries in the index).
  154. Generality is achieved by removing arbitrary restrictions and by adopting the rank extension mechanism of J. @code{ra::} supports array operations with an arbitrary number of arguments. Any of the arguments in an array expression can be read from or written to. Arrays or array expressions can be of any rank. Slicing operations work for subscripts of any rank, as in APL. You can use your own types as array elements.
  155. Consistency is achieved by having a clear set of concepts and having the realizations of those concepts adhere to the concept as closely as possible. @code{ra::} offers a few different types of views and containers, but it should be possible to use them interchangeably whenever it makes sense. For example, it used to be the case that you couldn't create a higher rank iterator on a @code{ViewSmall}, even though you could do it on a @code{View}; this was a bug.
  156. Sometimes consistency requires a choice. For example, given array views A and B, @code{A=B} copies the contents of view @code{B} into view @code{A}. To change view @code{A} instead (to treat @code{A} as a pointer) would be the default meaning of @code{A=B} for C++ types, and result in better consistency with the rest of the language, but I have decided that having consistency between views and containers (which ‘are’ their contents in a sense that views aren't) is more important.
  157. Transparency is achieved by avoiding unnecessary abstraction. An array view consists of a pointer and a list of steps and I see no point in hiding it. Manipulating the steps directly is often useful. A container consists of storage and a view and that isn't hidden either. Some of the types have an obscure implementation but I consider that a defect. Ideally you should be able to rewrite expressions on the fly, or plug in your own traversal methods or storage handling.
  158. That isn't to mean that you need to be in command of a lot of internal detail to be able to use the library. I hope to have provided a high level interface to most operations and a reasonably usable syntax. However, transparency is critical to achieve interoperation with external libraries and languages. When you need to, you'll be able to guarantee that an array is stored in compact columns, or that the real parts are interleaved with the imaginary parts.
  159. @c ------------------------------------------------
  160. @node Other libraries
  161. @section Other array libraries
  162. @c ------------------------------------------------
  163. Here I try to list the C++ array libraries that I know of, or libraries that I think deserve a mention for the way they deal with arrays. It is not an extensive review, since I have only used a few of these libraries myself. Please follow the links if you want to be properly informed.
  164. Since the C++ standard library doesn't offer a standard multidimensional array type, some libraries for specific tasks (linear algebra operations, finite elements, optimization) offer an accessory array library, which may be more or less general. Other libraries have generic array interfaces without needing to provide an array type. FFTW is a good example, maybe because it isn't C++!
  165. @subsection Standard C++
  166. The C++ language offers multidimensional arrays as a legacy feature from C, e.g. @code{int a[3][4]}. These decay to pointers when you do nearly anything with them, don't know their own shape or rank at runtime, and are generally too limited.
  167. The C++ standard library also offers a number of contiguous storage containers that can be used as 1-arrays: @code{<array>}, @code{<vector>} and @code{<valarray>}. Neither supports higher ranks out of the box, but @code{<valarray>} offers array operations for 1-arrays. @code{ra::} makes use of @code{<array>} and @code{<vector>} internally and for storage.
  168. @code{ra::} accepts built-in arrays and standard library types as array objects (@pxref{Compatibility}).
  169. @subsection Blitz++
  170. @cindex Blitz++
  171. Blitz++ @mybibcite{bli17} pioneered the use of expression templates in C++. It supported higher rank arrays, as high as it was practical in C++98, but not runtime rank. It also supported small arrays with compile time shape (@code{Tiny}), and convenience features such as Fortran-order constructors and arbitrary lower bounds for the array indices (both of which @code{ra::} chooses not to support). It placed a strong emphasis on performance, with array traversal methods such as blocking, space filling curves, etc.
  172. However, the implementation had to fight the limitations of C++98, and it offered no general rank extension mechanism.
  173. One important difference between Blitz++ and @code{ra::} is that Blitz++'s arrays were reference counted. @code{ra::} doesn't do any memory management on its own: the default container (data-owning) types are values, and views are distinct types. You can select your own storage for the data-owning objects, including reference-counted storage (@code{ra::} declares a type using @code{std::shared_ptr}), but this is not the default.
  174. @subsection Other C++ libraries
  175. @c TODO
  176. @subsection Other languages
  177. @c TODO Maybe review other languages, at least the big ones (Fortran / APL / J / Matlab / Python-Numpy).
  178. @c ------------------------------------------------
  179. @node Usage
  180. @chapter Usage
  181. @c ------------------------------------------------
  182. This is an extended exposition of the features of @code{ra::} and is probably best read in order. For details on specific functions or types, please @pxref{Reference}.
  183. @menu
  184. * Using the library:: @code{ra::} is a header-only library.
  185. * Containers and views:: Data objects.
  186. * Array operations:: Building and traversing expressions.
  187. * Rank extension:: How array operands are matched.
  188. * Cell iteration:: At any rank.
  189. * Slicing:: Subscripting is a special operation.
  190. * Special objects:: Not arrays, yet arrays.
  191. * Functions:: Ready to go.
  192. * The rank conjunction:: J comes to C++.
  193. * Compatibility:: With the STL and other libraries.
  194. * Extension:: Using your own types and more.
  195. * Error handling:: What to check and what to do.
  196. @end menu
  197. @c ------------------------------------------------
  198. @node Using the library
  199. @section Using @code{ra::}
  200. @c ------------------------------------------------
  201. @code{ra::} is a header only library with no dependencies other than the standard library, so you just need to place the @samp{ra/} folder somewhere in your include path and add @code{#include "ra/ra.hh"} at the top of your sources.
  202. A compiler with C++20 support is required. For the current version this means at least @b{gcc 11} with @option{-std=c++20}. Some C++23 features are available with @option{-std=c++2b}. Check the top @code{README.md} for more up-to-date information.
  203. Here is a minimal program@footnote{Examples given without context assume that one has declared @code{using std::cout;}, etc.}:
  204. @example @c readme.cc [ma101]
  205. @verbatim
  206. #include "ra/ra.hh"
  207. #include <iostream>
  208. int main()
  209. {
  210. ra::Big<char, 2> A({2, 5}, "helloworld");
  211. std::cout << ra::noshape << format_array(transpose<1, 0>(A), {.sep0="|"}) << std::endl;
  212. }
  213. @end verbatim
  214. @print{} h|w
  215. e|o
  216. l|r
  217. l|l
  218. d|d
  219. @end example
  220. The following headers are @emph{not} included by default:
  221. @itemize
  222. @item @code{"ra/dual.hh"}:
  223. A dual number type for simple uses of automatic differentiation.
  224. @item @code{"ra/test.hh"}:
  225. Used by the test suite.
  226. @end itemize
  227. The header @code{"ra/base.hh"} can be used to configure @ref{Error handling}. You don't need to modify the header, but the configuration depends on including @code{"ra/base.hh"} before the rest of @code{ra::} in order to override the default handler. All other headers are for internal use by @code{ra::}.
  228. The following @code{#define}s affect the behavior of @code{ra::}.
  229. @itemize
  230. @c FIXME The flag should only apply to dynamic checks.
  231. @item @code{RA_DO_CHECK} (default 1)
  232. If 1, check shape agreement (e.g. @code{Big<int, 1> @{2, 3@} + Big<int, 1> @{1, 2, 3@}}) and random array accesses (e.g. @code{Small<int, 2> a = 0; int i = 10; a[i] = 0;}). See @ref{Error handling}.
  233. @item @code{RA_DO_OPT_SMALLVECTOR} (default 0)
  234. If 1, perform immediately certain operations on @code{ra::Small} objects, using small vector intrinsics. Currently this only works on @b{gcc} and doesn't necessarily result in improved performance.
  235. @item @code{RA_DO_FMA} (default @code{FP_FAST_FMA} if defined, else 0)
  236. If 1, use @code{fma} in certain array reductions such as @ref{x-dot,@code{dot}}, @ref{x-gemm,@code{gemm}}, etc.
  237. @end itemize
  238. @cindex container
  239. @c ------------------------------------------------
  240. @node Containers and views
  241. @section Containers and views
  242. @c ------------------------------------------------
  243. @code{ra::} offers two kinds of data objects. The first kind, the @dfn{container}, owns its data. Creating a container uses up memory and destroying it causes that memory to be freed.
  244. @cindex compile-time
  245. @cindex ct
  246. @cindex runtime
  247. @cindex rt
  248. There are three kinds of containers (ct: compile-time, rt: runtime): 1) ct size, 2) ct rank/rt shape, and 3) rt rank; rt rank implies rt shape. Some rt size arrays can be resized but rt rank arrays cannot normally have their rank changed. Instead, you create a new container or view with the rank you want.
  249. For example:
  250. @example
  251. @verbatim
  252. {
  253. ra::Small<double, 2, 3> a(0.); // a ct size 2x3 array
  254. ra::Big<double, 2> b({2, 3}, 0.); // a rt size 2x3 array
  255. ra::Big<double> c({2, 3}, 0.); // a rt rank 2x3 array
  256. // a, b, c destroyed at end of scope
  257. }
  258. @end verbatim
  259. @end example
  260. Using the right kind of container can result in better performance. Ct shapes do not need to be stored in memory, which matters when you have many small arrays. Ct shape or ct rank arrays are also safer to use; sometimes @code{ra::} will be able to detect errors in the shapes or ranks of array operands at compile time, if the appropriate types are used.
  261. Container constructors come in two forms. The first form takes a single argument which is copied into the new container. This argument provides shape information if the container type requires it.@footnote{The brace-list constructors of rank 2 and higher aren't supported on types of rt rank, because in the C++ grammar, a nested initializer list doesn't always define a rank unambiguously.}
  262. @c [ma111]
  263. @example
  264. @verbatim
  265. using ra::Small, ra::Big;
  266. Small<int, 2, 2> a = {{1, 2}, {3, 4}}; // explicit contents
  267. Big<int, 2> a1 = {{1, 2}, {3, 4}}; // explicit contents
  268. Small<int, 2, 2> a2 = {{1, 2}}; // error: bad shape
  269. Small<int, 2, 2> b = 7; // 7 is copied into b
  270. Small<int, 2, 2> c = a; // the contents of a are copied into c
  271. Big<int> d = a; // d takes the shape of a and a is copied into d
  272. Big<int> e = 0; // e is a 0-array with one element f()==0.
  273. @end verbatim
  274. @end example
  275. The second form takes two arguments, one giving the shape, the second the contents.
  276. @cindex @code{none}
  277. @cindex uninitialized container
  278. @example
  279. @verbatim
  280. ra::Big<double, 2> a({2, 3}, 1.); // a has shape [2 3], filled with 1.
  281. ra::Big<double> b({2, 3}, ra::none); // b has shape [2 3], default initialized
  282. ra::Big<double> c({2, 3}, a); // c has shape [2 3], a is copied into c
  283. @end verbatim
  284. @end example
  285. The last example may result in an error if the shape of @code{a} and (2,@w{ }3) don't match. Here the shape of @code{1.} [which is ()] matches (2,@w{ }3) by a mechanism of rank extension (@pxref{Rank extension}). The special value @code{ra::none} can be used to request @url{https://en.cppreference.com/w/cpp/language/default_initialization, default initialization} of the container's elements.
  286. The shape argument can have rank 0 only for rank 1 arrays.
  287. @cindex @code{none}
  288. @cindex uninitialized container
  289. @example
  290. @verbatim
  291. ra::Big<int> c(3, 0); // ok {0, 0, 0}, same as ra::Big<int> c({3}, 0)
  292. ra::Big<int, 1> c(3, 0); // ok {0, 0, 0}, same as ra::Big<int, 1> c({3}, 0)
  293. ra::Big<int, 2> c({3}, 0); // error: bad length for shape
  294. ra::Big<int, 2> c(3, 0); // error: bad length for shape
  295. @end verbatim
  296. @end example
  297. When the content argument is a pointer or a 1D brace list, it's handled especially, not for shape@footnote{You can still use pointers or @code{std::initializer_list}s for shape by wrapping them in the functions @code{ptr} or @code{vector}, respectively.}, but only as the (row-major) ravel of the content. The pointer constructor is unsafe —use at your own risk!@footnote{The brace-list constructors aren't rank extending, because giving the ravel is incompatible with rank extension. They are shape-strict —you must give every element.}
  298. @cindex order, column-major
  299. @example
  300. @verbatim
  301. Small<int, 2, 2> aa = {1, 2, 3, 4}; // ravel of the content
  302. ra::Big<double, 2> a({2, 3}, {1, 2, 3, 4, 5, 6}); // same as a = {{1, 2, 3}, {4, 5, 6}}
  303. @end verbatim
  304. @end example
  305. @c [ma112]
  306. @example
  307. @verbatim
  308. double bx[6] = {1, 2, 3, 4, 5, 6}
  309. ra::Big<double, 2> b({3, 2}, bx); // {{1, 2}, {3, 4}, {5, 6}}
  310. double cx[4] = {1, 2, 3, 4}
  311. ra::Big<double, 2> c({3, 2}, cx); // *** WHO NOSE ***
  312. @end verbatim
  313. @end example
  314. @c [ma114]
  315. @example
  316. @verbatim
  317. using lens = mp::int_list<2, 3>;
  318. using steps = mp::int_list<1, 2>;
  319. ra::SmallArray<double, lens, steps> a {{1, 2, 3}, {4, 5, 6}}; // stored column-major: 1 4 2 5 3 6
  320. @end verbatim
  321. @end example
  322. These produce compile time errors:
  323. @example
  324. @verbatim
  325. Big<int, 2> b = {1, 2, 3, 4}; // error: shape cannot be deduced from ravel
  326. Small<int, 2, 2> b = {1, 2, 3, 4 5}; // error: bad size
  327. Small<int, 2, 2> b = {1, 2, 3}; // error: bad size
  328. @end verbatim
  329. @end example
  330. @anchor{x-scalar-char-star}
  331. Sometimes the pointer constructor gets in the way (see @ref{x-scalar,@code{scalar}}): @c [ma102]
  332. @example
  333. @verbatim
  334. ra::Big<char const *, 1> A({3}, "hello"); // error: try to convert char to char const *
  335. ra::Big<char const *, 1> A({3}, ra::scalar("hello")); // ok, "hello" is a single item
  336. cout << ra::noshape << format_array(A, {.sep0="|"}) << endl;
  337. @end verbatim
  338. @print{} hello|hello|hello
  339. @end example
  340. @cindex view
  341. A @dfn{view} is similar to a container in that it points to actual data in memory. However, the view doesn't own that data and destroying the view won't affect it. For example:
  342. @example
  343. @verbatim
  344. ra::Big<double> c({2, 3}, 0.); // a rt rank 2x3 array
  345. {
  346. auto c1 = c(1); // the second row of array c
  347. // c1 is destroyed here
  348. }
  349. cout << c(1, 1) << endl; // ok
  350. @end verbatim
  351. @end example
  352. The data accessed through a view is the data of the ‘root’ container, so modifying the former will be reflected in the latter.
  353. @example
  354. @verbatim
  355. ra::Big<double> c({2, 3}, 0.);
  356. auto c1 = c(1);
  357. c1(2) = 9.; // c(1, 2) = 9.
  358. @end verbatim
  359. @end example
  360. Just as for containers, there are separate types of views depending on whether the shape is known at compile time, the rank is known at compile time but the shape is not, or neither the shape nor the rank are known at compile time. @code{ra::} has functions to create the most common kinds of views:
  361. @example
  362. @verbatim
  363. ra::Big<double> c {{1, 2, 3}, {4, 5, 6}};
  364. auto ct = transpose<1, 0>(c); // {{1, 4}, {2, 5}, {3, 6}}
  365. auto cr = reverse(c, 0); // {{4, 5, 6}, {1, 2, 3}}
  366. @end verbatim
  367. @end example
  368. However, views can point to anywhere in memory and that memory doesn't have to belong to an @code{ra::} container. For example:
  369. @example
  370. @verbatim
  371. int raw[6] = {1, 2, 3, 4, 5, 6};
  372. ra::View<int> v1({{2, 3}, {3, 1}}, raw); // view with shape [2, 3] steps [3, 1]
  373. ra::View<int> v2({2, 3}, raw); // same, default C (row-major) steps
  374. @end verbatim
  375. @end example
  376. Containers can be treated as views of the same kind (rt or ct) . If you declare a function
  377. @example
  378. @verbatim
  379. void f(ra::View<int, 3> & v);
  380. @end verbatim
  381. @end example
  382. you may pass it an object of type @code{ra::Big<int, 3>}.
  383. @c ------------------------------------------------
  384. @node Array operations
  385. @section Array operations
  386. @c ------------------------------------------------
  387. To apply an operation to each element of an array, use the function @ref{x-for_each,@code{for_each}}. The array is traversed in an order that is decided by the library.
  388. @example
  389. @verbatim
  390. ra::Small<double, 2, 3> a = {{1, 2, 3}, {4, 5, 6}};
  391. double s = 0.;
  392. for_each([&s](auto && a) { s+=a; }, a);
  393. @end verbatim
  394. @result{} s = 21.
  395. @end example
  396. To construct an array expression but stop short of traversing it, use the function @code{map}. The expression will be traversed when it's assigned to a view, printed out, etc.
  397. @example
  398. @verbatim
  399. using T = ra::Small<double, 2, 2>;
  400. T a = {{1, 2}, {3, 4}};
  401. T b = {{10, 20}, {30, 40}};
  402. T c = map([](auto && a, auto && b) { return a+b; }, a, b); // (1)
  403. @end verbatim
  404. @result{} c = @{@{11, 22@}, @{33, 44@}@}
  405. @end example
  406. Expressions may take any number of arguments and be nested arbitrarily.
  407. @example
  408. @verbatim
  409. T d = 0;
  410. for_each([](auto && a, auto && b, auto && d) { d = a+b; },
  411. a, b, d); // same as (1)
  412. for_each([](auto && ab, auto && d) { d = ab; },
  413. map([](auto && a, auto && b) { return a+b; },
  414. a, b),
  415. d); // same as (1)
  416. @end verbatim
  417. @end example
  418. The operator of an expression may return a reference and you may assign to an expression in that case. @code{ra::} will complain if the expression is somehow not assignable.
  419. @example
  420. @verbatim
  421. T d = 0;
  422. map([](auto & d) -> decltype(auto) { return d; }, d) // just pass d along
  423. = map([](auto && a, auto && b) { return a+b; }, a, b); // same as (1)
  424. @end verbatim
  425. @end example
  426. @code{ra::} defines many shortcuts for common array operations. You can of course just do:
  427. @example
  428. @verbatim
  429. T c = a+b; // same as (1)
  430. @end verbatim
  431. @end example
  432. @c ------------------------------------------------
  433. @node Rank extension
  434. @section Rank extension
  435. @c ------------------------------------------------
  436. Rank extension is the mechanism that allows @code{R+S} to be defined even when @code{R}, @code{S} may have different ranks. The idea is an interpolation of the following basic cases.
  437. Suppose first that @code{R} and @code{S} have the same rank. We require that the shapes be the same. Then the shape of @code{R+S} will be the same as the shape of either @code{R} or @code{S} and the elements of @code{R+S} will be
  438. @quotation
  439. @code{(R+S)(i₀ i₁ ... i₍ᵣ₋₁₎) = R(i₀ i₁ ... i₍ᵣ₋₁₎) + S(i₀ i₁ ... i₍ᵣ₋₁₎)}
  440. @end quotation
  441. where @code{r} is the rank of @code{R}.
  442. Now suppose that @code{S} has rank 0. The shape of @code{R+S} is the same as the shape of @code{R} and the elements of @code{R+S} will be
  443. @quotation
  444. @code{(R+S)(i₀ i₁ ... i₍ᵣ₋₁₎) = R(i₀ i₁ ... i₍ᵣ₋₁₎) + S()}.
  445. @end quotation
  446. The two rules above are supported by all primitive array languages, e.g. Matlab @mybibcite{Mat}. But suppose that @code{S} has rank @code{s}, where @code{0<s<r}. Looking at the expressions above, it seems natural to define @code{R+S} by
  447. @quotation
  448. @code{(R+S)(i₀ i₁ ... i₍ₛ₋₁₎ ... i₍ᵣ₋₁₎) = R(i₀ i₁ ... i₍ₛ₋₁₎ ... i₍ᵣ₋₁₎) + S(i₀ i₁ ... i₍ₛ₋₁₎)}.
  449. @end quotation
  450. That is, after we run out of indices in @code{S}, we simply repeat the elements. We have aligned the shapes so:
  451. @quotation
  452. @verbatim
  453. [n₀ n₁ ... n₍ₛ₋₁₎ ... n₍ᵣ₋₁₎]
  454. [n₀ n₁ ... n₍ₛ₋₁₎]
  455. @end verbatim
  456. @end quotation
  457. @cindex shape agreement, prefix
  458. @cindex shape agreement, suffix
  459. @c @cindex J
  460. @cindex Numpy
  461. This rank extension rule is used by the J language @mybibcite{J S} and is known as @dfn{prefix agreement}. The opposite rule of @dfn{suffix agreement} is used, for example, in Numpy @mybibcite{num17}@footnote{Prefix agreement is chosen for @code{ra::} because of the availability of a @ref{The rank conjunction,rank conjunction} @mybibcite{Ber87} and @ref{Cell iteration, cell iterators of arbitrary rank}. This allows rank extension to be performed at multiple axes of an array expression.}.
  462. As you can verify, the prefix agreement rule is distributive. Therefore it can be applied to nested expressions or to expressions with any number of arguments. It is applied systematically throughout @code{ra::}, even in assignments. For example,
  463. @example
  464. @verbatim
  465. ra::Small<int, 3> x {3, 5, 9};
  466. ra::Small<int, 3, 2> a = x; // assign x(i) to each a(i, j)
  467. @end verbatim
  468. @result{} a = @{@{3, 3@}, @{5, 5@}, @{9, 9@}@}
  469. @end example
  470. @example
  471. @verbatim
  472. ra::Small<int, 3> x(0.);
  473. ra::Small<int, 3, 2> a = {{1, 2}, {3, 4}, {5, 6}};
  474. x += a; // sum the rows of a
  475. @end verbatim
  476. @result{} x = @{3, 7, 11@}
  477. @end example
  478. @example
  479. @verbatim
  480. ra::Big<double, 3> a({5, 3, 3}, ra::_0);
  481. ra::Big<double, 1> b({5}, 0.);
  482. b += transpose<0, 1, 1>(a); // b(i) = ∑ⱼ a(i, j, j)
  483. @end verbatim
  484. @result{} b = @{0, 3, 6, 9, 12@}
  485. @end example
  486. @cindex Numpy
  487. @cindex broadcasting, singleton, newaxis
  488. An weakness of prefix agreement is that the axes you want to match aren't always the prefix axes. Other array systems offer a feature similar to rank extension called ‘broadcasting’ that is a bit more flexible. For example, in the way it's implemented in Numpy @mybibcite{num17}, an array of shape [A B 1 D] will match an array of shape [A B C D]. The process of broadcasting consists in inserting so-called ‘singleton dimensions’ (axes with length one) to align the axes that one wishes to match. You can think of prefix agreement as a particular case of broadcasting where the singleton dimensions are added to the end of the shorter shapes automatically.
  489. A drawback of singleton broadcasting is that it muddles the distinction between a scalar and a vector of length 1. Sometimes, an axis of length 1 is no more than that, and if 2≠3 is a size mismatch, it isn't obvious why 1≠2 shouldn't be. To avoid this problem, @code{ra::} supports broadcasting with undefined length axes (see @ref{x-insert,@code{insert}}).
  490. @example
  491. @verbatim
  492. ra::Big<double, 3> a({5, 3}, ra::_0);
  493. ra::Big<double, 1> b({3}, 0.);
  494. ra::Big<double, 3> c({1, 3}, ra::_0);
  495. // b(?, i) += a(j, i) → b(i) = ∑ⱼ a(j, i) (sum columns)
  496. b(ra::insert<1>) += a;
  497. c = a; // 1 ≠ 5, still an agreement error
  498. @end verbatim
  499. @end example
  500. Still another way to align array axes is provided by the @ref{The rank conjunction,rank conjunction}.
  501. Even with axis insertion, it is still necessary that the axes one wishes to match are in the same order in all the arguments.
  502. @ref{x-transpose,Transposing} the axes before extension is a possible workaround.
  503. @c ------------------------------------------------
  504. @node Cell iteration
  505. @section Cell iteration
  506. @c ------------------------------------------------
  507. @code{map} and @code{for_each} apply their operators to each element of their arguments; in other words, to the 0-cells of the arguments. But it is possible to specify directly the rank of the cells that one iterates over:
  508. @example
  509. @verbatim
  510. ra::Big<double, 3> a({5, 4, 3}, ra::_0);
  511. for_each([](auto && b) { /* b has shape (5 4 3) */ }, iter<3>(a));
  512. for_each([](auto && b) { /* b has shape (4 3) */ }, iter<2>(a));
  513. for_each([](auto && b) { /* b has shape (3) */ }, iter<1>(a));
  514. for_each([](auto && b) { /* b has shape () */ }, iter<0>(a)); // elements
  515. for_each([](auto && b) { /* b has shape () */ }, a); // same as iter<0>(a); default
  516. @end verbatim
  517. @end example
  518. One may specify the @emph{frame} rank instead:
  519. @example
  520. @verbatim
  521. for_each([](auto && b) { /* b has shape () */ }, iter<-3>(a)); // same as iter<0>(a)
  522. for_each([](auto && b) { /* b has shape (3) */ }, iter<-2>(a)); // same as iter<1>(a)
  523. for_each([](auto && b) { /* b has shape (4 3) */ }, iter<-1>(a)); // same as iter<2>(a)
  524. @end verbatim
  525. @end example
  526. In this way it is possible to match shapes in various ways. Compare
  527. @example
  528. @verbatim
  529. ra::Big<double, 2> a = {{1, 2, 3}, {4, 5, 6}};
  530. ra::Big<double, 1> b = {10, 20};
  531. ra::Big<double, 2> c = a * b; // multiply (each item of a) by (each item of b)
  532. @end verbatim
  533. @result{} a = @{@{10, 20, 30@}, @{80, 100, 120@}@}
  534. @end example
  535. with
  536. @example @c [ma105]
  537. @verbatim
  538. ra::Big<double, 2> a = {{1, 2, 3}, {4, 5, 6}};
  539. ra::Big<double, 1> b = {10, 20, 30};
  540. ra::Big<double, 2> c({2, 3}, 0.);
  541. iter<1>(c) = iter<1>(a) * iter<1>(b); // multiply (each item of a) by (b)
  542. @end verbatim
  543. @result{} a = @{@{10, 40, 90@}, @{40, 100, 180@}@}
  544. @end example
  545. Note that in this case we cannot construct @code{c} directly from @code{iter<1>(a) * iter<1>(b)}, since the constructor for @code{ra::Big} matches its argument using (the equivalent of) @code{iter<0>(*this)}. See @ref{x-iter,@code{iter}} for more examples.
  546. Cell iteration is appropriate when the operations take naturally operands of rank > 0; for instance, the operation ‘determinant of a matrix’ is naturally of rank 2. When the operation is of rank 0, such as @code{*} above, there may be faster ways to rearrange shapes for matching (@pxref{The rank conjunction}).
  547. FIXME More examples.
  548. @c ------------------------------------------------
  549. @node Slicing
  550. @section Slicing
  551. @c ------------------------------------------------
  552. Slicing is an array extension of the subscripting operation. However, tradition and convenience have given it a special status in most array languages, together with some peculiar semantics that @code{ra::} supports.
  553. The form of the scripting operator @code{A(i₀, i₁, ...)} makes it plain that @code{A} is a function of @code{rank(A)} integer arguments@footnote{The multi-argument square bracket form @code{A[i₀, i₁, ...]} is supported under C++23 compilers (e.g. gcc ≥ 12 with @code{-std=c++2b}), with the same meaning as @code{A(i₀, i₁, ...)}. Under C++20 only a single-argument square bracket form @code{A[i₀]} is available.}. An array extension is immediately available through @code{map}. For example:
  554. @example
  555. @verbatim
  556. ra::Big<double, 1> a = {1., 2., 3., 4.};
  557. ra::Big<int, 1> i = {1, 3};
  558. map(a, i) = 77.;
  559. @end verbatim
  560. @result{} a = @{1., 77., 3, 77.@}
  561. @end example
  562. Just as with any use of @code{map}, array arguments are subject to the prefix agreement rule.
  563. @example
  564. @verbatim
  565. ra::Big<double, 2> a({2, 2}, {1., 2., 3., 4.});
  566. ra::Big<int, 1> i = {1, 0};
  567. ra::Big<double, 1> b = map(a, i, 0);
  568. @end verbatim
  569. @result{} b = @{3., 1.@} // @{a(1, 0), a(0, 0)@}
  570. @end example
  571. @example
  572. @verbatim
  573. ra::Big<int, 1> j = {0, 1};
  574. b = map(a, i, j);
  575. @end verbatim
  576. @result{} b = @{3., 2.@} // @{a(1, 0), a(0, 1)@}
  577. @end example
  578. The latter is a form of sparse subscripting.
  579. Most array operations (e.g. @code{+}) are defined through @code{map} in this way. For example, @code{A+B+C} is defined as @code{map(+, A, B, C)} (or the equivalent @code{map(+, map(+, A, B), C)}). Not so for the subscripting operation:
  580. @example
  581. @verbatim
  582. ra::Big<double, 2> A {{1., 2.}, {3., 4.}};
  583. ra::Big<int, 1> i = {1, 0};
  584. ra::Big<int, 1> j = {0, 1};
  585. // {{A(i₀, j₀), A(i₀, j₁)}, {A(i₁, j₀), A(i₁, j₁)}}
  586. ra::Big<double, 2> b = A(i, j);
  587. @end verbatim
  588. @result{} b = @{@{3., 4.@}, @{1., 2.@}@}
  589. @end example
  590. @anchor{x-subscript-outer-product}
  591. @code{A(i, j, ...)} is defined as the @emph{outer product} of the indices @code{(i, j, ...)} with operator @code{A}, because this operation sees much more use in practice than @code{map(A, i, j ...)}.
  592. @cindex elision, index
  593. You may give fewer subscripts than the rank of the array. The full extent is assumed for the missing subscripts (cf @ref{x-all,@code{all}} below):
  594. @example
  595. @verbatim
  596. ra::Big<int, 3> a({2, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8});
  597. auto a0 = a(0); // same as a(0, ra::all, ra::all)
  598. auto a10 = a(1, 0); // same as a(1, 0, ra::all)
  599. @end verbatim
  600. @result{} a0 = @{@{1, 2@}, @{3, 4@}@}
  601. @result{} a10 = @{5, 6@}
  602. @end example
  603. This supports the notion (@pxref{Rank polymorphism}) that a 3-array is also an 2-array where the elements are 1-arrays themselves, or a 1-array where the elements are 2-arrays. This important property is directly related to the mechanism of rank extension (@pxref{Rank extension}).
  604. Besides, when the subscripts @code{i, j, ...} are scalars or integer sequences of the form @code{(o, o+s, ..., o+s*(n-1))} (@dfn{linear ranges}), the subscripting can be performed inmediately at constant cost, and without needing to construct an expression object. This optimization is called @ref{Drag along and beating,@dfn{beating}}.
  605. @code{ra::} isn't smart enough to know when an arbitrary expression might be a linear range, so the following special objects are provided:
  606. @anchor{x-iota}
  607. @deffn @w{Special object} iota count [start:0 [step:1]]
  608. Create a linear range @code{start, start+step, ... start+step*(count-1)}.
  609. This can used anywhere an array expression is expected.
  610. @example
  611. @verbatim
  612. ra::Big<int, 1> a = ra::iota(4, 3 -2);
  613. @end verbatim
  614. @result{} a = @{3, 1, -1, -3@}
  615. @end example
  616. Here, @code{b} and @code{c} are @code{View}s (@pxref{Containers and views}).
  617. @example
  618. @verbatim
  619. ra::Big<int, 1> a = {1, 2, 3, 4, 5, 6};
  620. auto b = a(iota(3));
  621. auto c = a(iota(3, 3));
  622. @end verbatim
  623. @result{} a = @{1, 2, 3@}
  624. @result{} a = @{4, 5, 6@}
  625. @end example
  626. @cindex TensorIndex
  627. @code{iota()} by itself is an expression of rank 1 and undefined length. It must be used with other terms whose lengths are defined, so that the overall shape of the array expression can be determined. In general, @code{iota<n>()} is an array expression of rank @code{n}+1 that represents the @code{n}-th index of an array expression. This is similar to Blitz++'s @code{TensorIndex}.
  628. @code{ra::} offers the shortcut @code{ra::_0} for @code{ra::iota<0>()}, etc.
  629. @example
  630. @verbatim
  631. ra::Big<int, 1> v = {1, 2, 3};
  632. cout << (v - ra::_0) << endl; // { 1-0, 2-1, 3-2 }
  633. // cout << (ra::_0) << endl; // error: undefined length
  634. // cout << (v - ra::_1) << endl; // error: undefined length on axis 1
  635. ra::Big<int, 2> a({3, 2}, 0);
  636. cout << (a + ra::_0 - ra::_1) << endl; // {{0, -1, -2}, {1, 0, -1}, {2, 1, 0}}
  637. @end verbatim
  638. @end example
  639. When undefined length @code{iota()} is used as a subscript by itself, the result isn't a @code{View}. This allows @code{view(iota())} to match with expressions of different lengths, as in the following example.
  640. @example
  641. @verbatim
  642. ra::Big<int, 1> a = {1, 2, 3, 4, 5, 6};
  643. ra::Big<int, 1> b = {1, 2, 3};
  644. cout << (b + a(iota())) << endl; // a(iota()) is not a View
  645. @end verbatim
  646. @print{} 3
  647. 2 4 6
  648. @end example
  649. Note the difference between
  650. @itemize
  651. @item @code{ra::iota<3>()} —
  652. an expression of rank 4 and undefined length, representing a linear sequence over the tensor index of axis 3
  653. @item @code{ra::iota(3)} ≡ @code{ra::iota<0>(3)} —
  654. an expression of rank 1, representing the sequence @code{0, 1, 2}.
  655. @end itemize
  656. @end deffn
  657. @anchor{x-all}
  658. @deffn @w{Special object} all
  659. Create a linear range @code{0, 1, ... (nᵢ-1)} when used as a subscript at the @var{i}-th place of a subscripting expression. This might not be the @var{i}-th argument; see @ref{x-insert,@code{insert}}, @ref{x-dots,@code{dots}}.
  660. This object cannot stand alone as an array expression. All the examples below result in @code{View} objects:
  661. @example
  662. @verbatim
  663. ra::Big<int, 2> a({3, 2}, {1, 2, 3, 4, 5, 6});
  664. auto b = a(ra::all, ra::all); // (1) a view of the whole of a
  665. auto c = a(iota(3), iota(2)); // same as (1)
  666. auto d = a(iota(3), ra::all); // same as (1)
  667. auto e = a(ra:all, iota(2)); // same as (1)
  668. auto f = a(0, ra::all); // first row of a
  669. auto g = a(ra::all, 1); // second column of a
  670. auto g = a(ra::all, ra::dots<0>, 1); // same
  671. @end verbatim
  672. @end example
  673. @code{all} is a special case (@code{dots<1>}) of the more general object @code{dots}.
  674. @end deffn
  675. @anchor{x-dots}
  676. @deffn @w{Special object} dots<n>
  677. Equivalent to as many instances of @code{ra::all} as indicated by @code{n}, which must not be negative. Each instance takes the place of one argument to the subscripting operation.
  678. If @var{n} is defaulted (@code{dots<>}), all available places will be used; this can only be done once in a given subscript list.
  679. This object cannot stand alone as an array expression. All the examples below result in @code{View} objects:
  680. @example
  681. @verbatim
  682. ra::Big<int, 3> a({3, 2, 4}, ...);
  683. auto h = a(); // all of a
  684. auto b = a(ra::all, ra::all, ra::all); // (1) all of a
  685. auto c = a(ra::dots<3>); // same as (1)
  686. auto d = a(ra::all, ra::dots<2>); // same as (1)
  687. auto e = a(ra::dots<2>, ra::all); // same as (1)
  688. auto f = a(ra::dots<>); // same as (1)
  689. auto j0 = a(0, ra::dots<2>); // first page of a
  690. auto j1 = a(0); // same
  691. auto j2 = a(0, ra::dots<>); // same
  692. auto k0 = a(ra::all, 0); // first row of a
  693. auto k1 = a(ra::all, 0, ra::all); // same
  694. auto k2 = a(ra::all, 0, ra::dots<>); // same
  695. auto k3 = a(ra::dots<>, 0, ra::all); // same
  696. // auto k = a(ra::dots<>, 0, ra::dots<>); // error
  697. auto l0 = a(ra::all, ra::all, 0); // first column of a
  698. auto l1 = a(ra::dots<2>, 0); // same
  699. auto l2 = a(ra::dots<>, 0); // same
  700. @end verbatim
  701. @end example
  702. This is useful when writing rank-generic code, see @code{examples/maxwell.cc} in the distribution for an example.
  703. @end deffn
  704. The following special objects aren't related to linear ranges, but they are meant to be used in a subscript context. Using them in other contexts will result in a compile time error.
  705. @cindex @code{len}
  706. @cindex @code{end}, Octave/Matlab
  707. @anchor{x-len}
  708. @deffn @w{Special object} len
  709. Represents the length of the @var{i}-th axis of a subscripted expression, when used at the @var{i}-th place of a subscripting expression.
  710. This works like @code{end} in Octave/Matlab, but note that @code{ra::} indices begin at 0, so the last element of a vector @code{a} is @code{a(ra::len-1)}.
  711. @example
  712. @verbatim
  713. ra::Big<int, 2> a({10, 10}, 100 + ra::_0 - ra::_1);
  714. auto a0 = a(ra::len-1); // last row of a; ra::len is a.len(0)
  715. auto a1 = a(ra::all, ra::len-1); // last column a; ra::len is a.len(1)
  716. auto a2 = a(ra::len-1, ra::len-1); // last element of last row; the first ra::len is a.len(0) and the second one is a.len(1)
  717. auto a3 = a(ra::all, ra::iota(2, ra::len-2)); // last two columns of a
  718. auto a4 = a(ra::iota(ra::len/2, 1, 2)); // odd rows of a
  719. a(ra::len - std::array {1, 3, 4}) = 0; // set to 0 the 1st, 3rd and 4th rows of a, counting from the end
  720. @end verbatim
  721. @end example
  722. @example
  723. @verbatim
  724. ra::Big<int, 3> b({2, 3, 4}, ...);
  725. auto b0 = b(ra::dots<2>, ra::len-1); // ra::len is a.len(2)
  726. auto b1 = b(ra::insert<1>, ra::len-1); // ra::len is a.len(0)
  727. @end verbatim
  728. @end example
  729. @end deffn
  730. @cindex @code{insert}
  731. @anchor{x-insert}
  732. @deffn @w{Special object} insert<n>
  733. Inserts @code{n} new axes at the subscript position. @code{n} must not be negative.
  734. The new axes have step 0 and undefined length, so they will match any length on those axes by repeating items. @code{insert} objects cannot stand alone as an array expression. The examples below result in @code{View} objects:
  735. @example
  736. @verbatim
  737. auto h = a(insert<0>); // same as (1)
  738. auto k = a(insert<1>); // shape [undefined, 3, 2]
  739. @end verbatim
  740. @end example
  741. @cindex broadcasting, singleton, Numpy
  742. @code{insert<n>} main use is to prepare arguments for broadcasting. In other array systems (e.g. Numpy) broadcasting is done with singleton dimensions, that is, dimensions of length one match dimensions of any length. In @code{ra::} singleton dimensions aren't special, so broadcasting requires the use of @code{insert}. For example: @c [ma115]
  743. @example
  744. @verbatim
  745. ra::Big<int, 1> x = {1, 10};
  746. // match shapes [2, U, U] with [U, 3, 2] to produce [2, 3, 2]
  747. cout << x(ra::all, ra::insert<2>) * a(insert<1>) << endl;
  748. @end verbatim
  749. @print{} 2 3 2
  750. 1 2
  751. 3 4
  752. 5 6
  753. 10 20
  754. 30 40
  755. 50 60
  756. @end example
  757. @end deffn
  758. We were speaking earlier of the outer product of the subscripts with operator @code{A}. Here's a way to perform the actual outer product (with operator @code{*}) of two @code{Views}, through broadcasting. All three lines are equivalent. See @ref{x-from,@code{from}} for a more general way to compute outer products.
  759. @example
  760. @verbatim
  761. cout << (A(ra::dots<A.rank()>, ra::insert<B.rank()>) * B(ra::insert<A.rank()>, ra::dots<B.rank()>)) << endl;
  762. cout << (A(ra::dots<>, ra::insert<B.rank()>) * B(ra::insert<A.rank()>, ra::dots<>)) << endl; // default dots<>
  763. cout << (A * B(ra::insert<A.rank()>)) << endl; // index elision + prefix matching
  764. @end verbatim
  765. @end example
  766. @subsection Subscripting and rank-0 views
  767. @cindex view, rank 0
  768. @cindex rank, runtime
  769. @cindex rank, compile-time
  770. When the result of the subscripting operation would have rank 0, the type returned is the type of the view @emph{element} and not a rank-0 view as long as the rank of the result can be determined at compile time. When that's not possible (for instance, the subscripted view has rt rank) then a rank-0 view is returned instead. An automatic conversion is defined for rank-0 views, but manual conversion may be needed in some contexts.
  771. @example
  772. @verbatim
  773. using T = std::complex<double>;
  774. int f(T &);
  775. Big<T, 2> a({2, 3}, 0); // ct rank
  776. Big<T> b({2, 3}, 0); // rt rank
  777. cout << a(0, 0).real_part() << endl; // ok, a(0, 0) returns complex &
  778. // cout << b(0, 0).real_part() << endl; // error, View<T> has no member real_part
  779. cout << ((T &)(b(0, 0))).real_part() << endl; // ok, manual conversion to T &
  780. cout << f(b(0, 0)) << endl; // ok, automatic conversion from View<T> to T &
  781. // cout << f(a(0)) << endl; // compile time error, conversion failed since ct rank of a(0) is not 0
  782. // cout << f(b(0)) << endl; // runtime error, conversion failed since rt rank of b(0) is not 0
  783. @end verbatim
  784. @end example
  785. @c ------------------------------------------------
  786. @node Functions
  787. @section Functions
  788. @c ------------------------------------------------
  789. You don't need to use @ref{Array operations,@code{map}} every time you want to do something with arrays in @code{ra::}. A number of array functions are already defined.
  790. @anchor{x-scalar-ops}
  791. @subsection Standard scalar operations
  792. @code{ra::} defines array extensions for @code{+}, @code{-} (both unary and binary), @code{*}, @code{/}, @code{!}, @code{&&}, @code{||}@footnote{@code{&&}, @code{||} are short-circuiting as array operations; the elements of the second operand won't be evaluated if the elements of the first one evaluate to @code{false} or @code{true}, respectively.
  793. Note that if both operands are of rank 0 and at least one of them is an @code{ra::} object, they is no way to preserve the behavior of @code{&&} and @code{||} with built in types and avoid evaluating both, since the overloaded operators @url{http://en.cppreference.com/w/cpp/language/operators, are normal functions}.}, @code{>}, @code{<}, @code{>=}, @code{<=}, @code{<=>}, @code{==}, @code{!=}, @code{pow}, @code{sqr}, @code{abs}, @code{cos}, @code{sin}, @code{exp}, @code{expm1}, @code{sqrt}, @code{log}, @code{log1p}, @code{log10}, @code{isfinite}, @code{isnan}, @code{isinf}, @code{max}, @code{min}, @code{asin}, @code{acos}, @code{atan}, @code{atan2}, @code{cosh}, @code{sinh}, @code{tanh}, @code{lerp}, and @code{fma}.
  794. Extending other scalar operations is straightforward; see @ref{x-new-array-operations,New array operations}. @code{ra::} also defines (and extends) the non-standard functions @code{odd}, @ref{x-sqr,@code{sqr}}, @ref{x-sqrm,@code{sqrm}}, @ref{x-conj,@code{conj}}, @ref{x-rel-error,@code{rel_error}}, and @ref{x-xi,@code{xi}}.
  795. For example:
  796. @example @c [ma110]
  797. @verbatim
  798. cout << exp(ra::Small<double, 3> {4, 5, 6}) << endl;
  799. @end verbatim
  800. @print{} 54.5982 148.413 403.429
  801. @end example
  802. @subsection Conditional operations
  803. @ref{x-map,@code{map}} evaluates all of its arguments before passing them along to its operator. This isn't always what you want. The simplest example is @code{where(condition, iftrue, iffalse)}, which returns an expression that will evaluate @code{iftrue} when @code{condition} is true and @code{iffalse} otherwise.
  804. @example
  805. @verbatim
  806. ra::Big<double> x ...
  807. ra::Big<double> y = where(x>0, expensive_expr_1(x), expensive_expr_2(x));
  808. @end verbatim
  809. @end example
  810. Here @code{expensive_expr_1} and @code{expensive_expr_2} are array expressions. So the computation of the other arm would be wasted if one were to do instead
  811. @example
  812. @verbatim
  813. ra::Big<double> y = map([](auto && w, auto && t, auto && f) -> decltype(auto) { return w ? t : f; }
  814. x>0, expensive_expr_1(x), expensive_function_2(x));
  815. @end verbatim
  816. @end example
  817. If the expressions have side effects, then @code{map} won't even give the right result.
  818. @c [ma109]
  819. @example
  820. @verbatim
  821. ra::Big<int, 1> o = {};
  822. ra::Big<int, 1> e = {};
  823. ra::Big<int, 1> n = {1, 2, 7, 9, 12};
  824. ply(where(odd(n), map([&o](auto && x) { o.push_back(x); }, n), map([&e](auto && x) { e.push_back(x); }, n)));
  825. cout << "o: " << ra::noshape << o << ", e: " << ra::noshape << e << endl;
  826. @end verbatim
  827. @print{} o: 1 7 9, e: 2 12
  828. @end example
  829. FIXME Artificial example.
  830. FIXME Do we want to expose ply(); this is the only example in the manual that uses it.
  831. When the choice is between more than two expressions, there's @ref{x-pick,@code{pick}}, which operates similarly, but accepts an integer instead of a boolean selector.
  832. @subsection Special operations
  833. Some operations are essentially scalar operations, but require special syntax and would need a lambda wrapper to be used with @code{map}. @code{ra::} comes with a few of these already defined.
  834. FIXME
  835. @subsection Elementwise reductions
  836. @code{ra::} defines the whole-array one-argument reductions @code{any}, @code{every}, @code{amax}, @code{amin}, @code{sum}, @code{prod} and the two-argument reductions @code{dot} and @code{cdot}. Note that @code{max} and @code{min} are two-argument scalar operations with array extensions, while @code{amax} and @code{amin} are reductions. @code{any} and @code{every} are short-circuiting.
  837. You can define reductions the same way @code{ra::} does:
  838. @example
  839. @verbatim
  840. template <class A>
  841. inline auto op_reduce(A && a)
  842. {
  843. T c = op_default;
  844. for_each([&c](auto && a) { c = op(c, a); }, a);
  845. return c;
  846. }
  847. @end verbatim
  848. @end example
  849. @anchor{x-axis-reductions}
  850. Often enough you need to reduce over particular axes. This is possible by combining assignment operators with the @ref{Rank extension,rank extension} mechanism, or using the @ref{The rank conjunction,rank conjunction}, or iterating over @ref{Cell iteration, cells of higher rank}. For example:
  851. @example
  852. @verbatim
  853. ra::Big<double, 2> a({m, n}, ...);
  854. ra::Big<double, 1> sum_rows({n}, 0.);
  855. iter<1>(sum_rows) += iter<1>(a);
  856. // for_each(ra::wrank<1, 1>([](auto & c, auto && a) { c += a; }), sum_rows, a) // alternative
  857. // sum_rows += transpose<1, 0>(a); // another
  858. ra::Big<double, 1> sum_cols({m}, 0.);
  859. sum_cols += a;
  860. @end verbatim
  861. @end example
  862. FIXME example with assignment op
  863. A few common operations of this type are already packaged in @code{ra::}.
  864. @subsection Special reductions
  865. @code{ra::} defines the following special reductions.
  866. FIXME
  867. @subsection Shortcut reductions
  868. Some reductions do not need to traverse the whole array if a certain condition is encountered early. The most obvious ones are the reductions of @code{&&} and @code{||}, which @code{ra::} defines as @code{every} and @code{any}.
  869. FIXME
  870. These operations are defined on top of another function @code{early}.
  871. FIXME early
  872. The following is often useful.
  873. FIXME lexicographical compare etc.
  874. @c ------------------------------------------------
  875. @node The rank conjunction
  876. @section The rank conjunction
  877. @c ------------------------------------------------
  878. We have seen in @ref{Cell iteration} that it is possible to treat an r-array as an array of lower rank with subarrays as its elements. With the @ref{x-iter,@code{iter<cell rank>}} construction, this ‘exploding’ is performed (notionally) on the argument; the operation of the array expression is applied blindly to these cells, whatever they turn out to be.
  879. @example
  880. @verbatim
  881. for_each(my_sort, iter<1>(A)); // (in ra::) my_sort is a regular function, cell rank must be given
  882. for_each(my_sort, iter<0>(A)); // (in ra::) error, bad cell rank
  883. @end verbatim
  884. @end example
  885. @c @cindex J
  886. The array language J has instead the concept of @dfn{verb rank}. Every function (or @dfn{verb}) has an associated cell rank for each of its arguments. Therefore @code{iter<cell rank>} is not needed.
  887. @example
  888. @verbatim
  889. for_each(sort_rows, A); // (not in ra::) will iterate over 1-cells of A, sort_rows knows
  890. @end verbatim
  891. @end example
  892. @c @cindex J
  893. @code{ra::} doesn't have ‘verb ranks’ yet. In practice one can think of @code{ra::}'s operations as having a verb rank of 0. However, @code{ra::} supports a limited form of J's @dfn{rank conjunction} with the function @ref{x-wrank,@code{wrank}}.
  894. @c @cindex J
  895. This is an operator that takes one verb (such operators are known as @dfn{adverbs} in J) and produces another verb with different ranks. These ranks are used for rank extension through prefix agreement, but then the original verb is used on the cells that result. The rank conjunction can be nested, and this allows repeated rank extension before the innermost operation is applied.
  896. A standard example is ‘outer product’.
  897. @example
  898. @verbatim
  899. ra::Big<int, 1> a = {1, 2, 3};
  900. ra::Big<int, 1> b = {40, 50};
  901. ra::Big<int, 2> axb = map(ra::wrank<0, 1>([](auto && a, auto && b) { return a*b; }),
  902. a, b)
  903. @end verbatim
  904. @result{} axb = @{@{40, 80, 120@}, @{50, 100, 150@}@}
  905. @end example
  906. It works like this. The verb @code{ra::wrank<0, 1>([](auto && a, auto && b) @{ return a*b; @})} has verb ranks (0, 1), so the 0-cells of @code{a} are paired with the 1-cells of @code{b}. In this case @code{b} has a single 1-cell. The frames and the cell shapes of each operand are:
  907. @example
  908. @verbatim
  909. a: 3 |
  910. b: | 2
  911. @end verbatim
  912. @end example
  913. Now the frames are rank-extended through prefix agreement.
  914. @example
  915. @verbatim
  916. a: 3 |
  917. b: 3 | 2
  918. @end verbatim
  919. @end example
  920. Now we need to perform the operation on each cell. The verb @code{[](auto && a, auto && b) @{ return a*b; @}} has verb ranks (0, 0). This results in the 0-cells of @code{a} (which have shape ()) being rank-extended to the shape of the 1-cells of @code{b} (which is (2)).
  921. @example
  922. @verbatim
  923. a: 3 | 2
  924. b: 3 | 2
  925. @end verbatim
  926. @end example
  927. This use of the rank conjunction is packaged in @code{ra::} as the @ref{x-from,@code{from}} operator. It supports any number of arguments, not only two.
  928. @example
  929. @verbatim
  930. ra::Big<int, 1> a = {1, 2, 3};
  931. ra::Big<int, 1> b = {40, 50};
  932. ra::Big<int, 2> axb = from([](auto && a, auto && b) { return a*b; }), a, b)
  933. @end verbatim
  934. @result{} axb = @{@{40, 80, 120@}, @{50, 100, 150@}@}
  935. @end example
  936. Another example is matrix multiplication. For 2-array arguments C, A and B with shapes C: (m, n) A: (m, p) and B: (p, n), we want to perform the operation C(i, j) += A(i, k)*B(k, j). The axis alignment gives us the ranks we need to use.
  937. @example
  938. @verbatim
  939. C: m | | n
  940. A: m | p |
  941. B: | p | n
  942. @end verbatim
  943. @end example
  944. First we'll align the first axes of C and A using the cell ranks (1, 1, 2). The cell shapes are:
  945. @example
  946. @verbatim
  947. C: m | n
  948. A: m | p
  949. B: | p n
  950. @end verbatim
  951. @end example
  952. Then we'll use the ranks (1, 0, 1) on the cells:
  953. @example
  954. @verbatim
  955. C: m | | n
  956. A: m | p |
  957. B: | p | n
  958. @end verbatim
  959. @end example
  960. The final operation is a standard operation on arrays of scalars. In actual @code{ra::} syntax:
  961. @example @c [ma103]
  962. @verbatim
  963. ra::Big A({3, 2}, {1, 2, 3, 4, 5, 6});
  964. ra::Big B({2, 3}, {7, 8, 9, 10, 11, 12});
  965. ra::Big C({3, 3}, 0.);
  966. for_each(ra::wrank<1, 1, 2>(ra::wrank<1, 0, 1>([](auto && c, auto && a, auto && b) { c += a*b; })), C, A, B);
  967. @end verbatim
  968. @result{} C = @{@{27, 30, 33@}, @{61, 68, 75@}, @{95, 106, 117@}@}
  969. @end example
  970. Note that @code{wrank} cannot be used to transpose axes in general.
  971. I hope that in the future something like @code{C(i, j) += A(i, k)*B(k, j)}, where @code{i, j, k} are special objects, can be automatically translated to the requisite combination of @code{wrank} and perhaps also @ref{x-transpose,@code{transpose}}. For the time being, you have to align or transpose the axes yourself.
  972. @c ------------------------------------------------
  973. @node Compatibility
  974. @section Compatibility
  975. @c ------------------------------------------------
  976. @subsection Using other C and C++ types with @code{ra::}
  977. @cindex foreign type
  978. @anchor{x-foreign-type}
  979. @code{ra::} accepts certain types from outside @code{ra::} (@dfn{foreign types}) as array expressions. Generally it is enough to mix the foreign type with a type from @code{ra::} and let deduction work.
  980. @example
  981. @verbatim
  982. std::vector<int> x = {1, 2, 3};
  983. ra::Small<int, 3> y = {6, 5, 4};
  984. cout << (x-y) << endl;
  985. @end verbatim
  986. @print{} -5 -3 -1
  987. @end example
  988. @cindex @code{start}
  989. Foreign types can be brought into @code{ra::} explicitly with the function @ref{x-start,@code{start}}.
  990. @example
  991. @verbatim
  992. std::vector<int> x = {1, 2, 3};
  993. // cout << sum(x) << endl; // error, sum not found
  994. cout << sum(ra::start(x)) << endl;
  995. cout << ra::sum(x) << endl;
  996. @end verbatim
  997. @print{} 6
  998. @end example
  999. The following types are accepted as foreign types:
  1000. @cindex built-in array
  1001. @itemize
  1002. @item Built-in arrays
  1003. produce an expression of positive rank and ct size.
  1004. @item @code{std::array}
  1005. produces an expression of rank 1 and ct size.
  1006. @item Other types conforming to @code{std::ranges::bidirectional_range}, such as @code{std::vector}, @code{std::string}, or @code{std::map}
  1007. produce an expression of rank 1 and rt size.
  1008. @end itemize
  1009. Raw pointers must be brought into @code{ra::} explicitly using the function @ref{x-ptr,@code{ptr}}, which produces an expression of rank 1 and @emph{undefined} size.
  1010. Compare:
  1011. @example @c [ma106]
  1012. @verbatim
  1013. int p[] = {1, 2, 3};
  1014. int * z = p;
  1015. ra::Big<int, 1> q {1, 2, 3};
  1016. q += p; // ok, q is ra::, p is foreign object with shape info
  1017. ra::start(p) += q; // can't redefine operator+=(int[]), foreign needs ra::start()
  1018. // z += q; // error: raw pointer needs ra::ptr()
  1019. ra::ptr(z) += p; // ok, shape is determined by foreign object p
  1020. @end verbatim
  1021. @end example
  1022. @anchor{x-is-scalar}
  1023. Some types are accepted automatically as scalars. These include non-pointer types for which @code{std::is_scalar_v} is true, like @code{char}, @code{int}, @code{double}, etc. as well as @code{std::complex<T>}. You can add your own types as scalar types with the following declaration:
  1024. @quotation
  1025. @verbatim
  1026. template <> constexpr bool ra::is_scalar_def<MYTYPE> = true;
  1027. @end verbatim
  1028. @end quotation
  1029. Otherwise, you can bring a scalar object into @code{ra::} on the spot, with the function @ref{x-scalar,@code{scalar}}.
  1030. @node Using @code{ra::} types with the STL
  1031. @subsection Using @code{ra::} types with the STL
  1032. STL compatible input/output iterators and ranges can be obtained from general @code{ra::} expressions through the functions @ref{x-begin,@code{begin}}, @ref{x-end,@code{end}}, and @ref{x-range,@code{range}}. These objects traverse the elements of the expression (0-cells) in row major order.@footnote{Unqualified @code{begin()} would find @code{std::begin} through ADL since @code{Big} is parameterized by @code{std::vector}. This still works since @code{std::begin} forwards to @code{A.begin()}. It's up to you if you want to rely on such things.}
  1033. @example @c [ma118]
  1034. @verbatim
  1035. ra::Big<int, 2> A = {{3, 0, 0}, {4, 5, 6}, {0, 5, 6}};
  1036. std::accumulate(ra::begin(A), ra::end(A), 0); // or just sum(A)
  1037. @end verbatim
  1038. @result{} 29
  1039. @end example
  1040. For temporary expressions that are stated once, the ranges interface is more appropriate.
  1041. @example @c [ma118]
  1042. @verbatim
  1043. cout << std::ranges::count(range(A>3), true) << endl; // or sum(cast<int>(A>3))
  1044. @end verbatim
  1045. @result{} 5
  1046. @end example
  1047. One can create ranges from higher rank @code{ra::} iterators and thus use STL algorithms over cells of any rank, but note that in the current version of @code{ra::}, @ref{x-iter,@code{iter<k>()}} only works on views, not on general expressions.
  1048. @example @c [ma118]
  1049. @verbatim
  1050. // count rows with 0s in them
  1051. cout << std::ranges::count_if(range(iter<1>(A)), [](auto const & x) { return any(x==0); }) << endl;
  1052. @end verbatim
  1053. @result{} 2
  1054. @end example
  1055. For @ref{Containers and views,containers}, @code{ra::} @code{begin}/@code{end}/@code{range} provide random access iterators and ranges, which is handy for functions such as @code{std::sort}. These could be provided for general expressions, but they wouldn't be efficient for ranks above 1, and I haven't implemented them. The container @code{std::random_access_iterator}s that are provided are in fact raw pointers.
  1056. @example @c [ma106]
  1057. @verbatim
  1058. ra::Big<int> x {3, 2, 1}; // x is a Container
  1059. auto y = x(); // y is a View on x
  1060. // std::sort(ra::begin(y), ra::end(y)); // error: begin(y) is not std::random_access_iterator
  1061. std::sort(ra::begin(x), ra::end(x)); // ok, we know x is stored in row-major order
  1062. @end verbatim
  1063. @result{} x = @{1, 2, 3@}
  1064. @end example
  1065. @cindex other libraries, interfacing with
  1066. @subsection Using @code{ra::} types with other libraries
  1067. When you have to pass arrays back and forth between your program and an external library, or perhaps another language, it is necessary for both sides to agree on memory layout. @code{ra::} gives you access to its own memory layout and allows you to obtain an @code{ra::} view on any piece of memory.
  1068. @subsubsection The good array citizen
  1069. @c FIXME Put these in examples/ and reference them here.
  1070. @cindex BLIS
  1071. The good array citizen describes its arrays with the same parameters as @code{ra::}: a pointer, plus a length and a step per dimension. You don't have to worry about special cases. Say @url{https://github.com/flame/blis, BLIS}:
  1072. @quotation
  1073. @verbatim
  1074. #include <blis.h>
  1075. ra::Big<double, 2> A({M, K}, ...);
  1076. ra::Big<double, 2> B({K, N}, ...);
  1077. ra::Big<double, 2> C({M, N}, ...);
  1078. double alpha = ...;
  1079. double beta = ...;
  1080. // C := βC + αAB
  1081. bli_dgemm(BLIS_NO_TRANSPOSE, BLIS_NO_TRANSPOSE, M, N, K, &alpha,
  1082. A.data(), A.step(0), A.step(1),
  1083. B.data(), B.step(0), B.step(1),
  1084. &beta, C.data(), C.step(0), C.step(1));
  1085. @end verbatim
  1086. @end quotation
  1087. @cindex FFTW
  1088. Another good array citizen, @url{http://fftw.org, FFTW} handles arbitrary rank:
  1089. @quotation
  1090. @verbatim
  1091. #include <fftw3.h>
  1092. ...
  1093. using complex = std::complex<double>;
  1094. static_assert(sizeof(complex)==sizeof(fftw_complex));
  1095. // forward DFT over the last r axes of a -> b
  1096. void fftw(int r, ra::View<complex> const a, ra::View<complex> b)
  1097. {
  1098. int const rank = a.rank();
  1099. assert(r>0 && r<=rank);
  1100. assert(every(ra::start(shape(a))==shape(b)));
  1101. fftw_iodim dims[r];
  1102. fftw_iodim howmany_dims[rank-r];
  1103. for (int i=0; i!=rank; ++i) {
  1104. if (i>=rank-r) {
  1105. dims[i-rank+r].n = a.len(i);
  1106. dims[i-rank+r].is = a.step(i);
  1107. dims[i-rank+r].os = b.step(i);
  1108. } else {
  1109. howmany_dims[i].n = a.len(i);
  1110. howmany_dims[i].is = a.step(i);
  1111. howmany_dims[i].os = b.step(i);
  1112. }
  1113. }
  1114. fftw_plan p;
  1115. p = fftw_plan_guru_dft(r, dims, rank-r, howmany_dims,
  1116. (fftw_complex *)(a.data()), (fftw_complex *)(b.data()),
  1117. FFTW_FORWARD, FFTW_ESTIMATE);
  1118. fftw_execute(p);
  1119. fftw_destroy_plan(p);
  1120. }
  1121. @end verbatim
  1122. @end quotation
  1123. @cindex Guile
  1124. Translating array descriptors from a foreign language should be fairly simple. For example, this is how to convert a @url{https://www.gnu.org/software/guile/manual/html_node/Accessing-Arrays-from-C.html#Accessing-Arrays-from-C,Guile} array view into an @code{ra::} view:
  1125. @quotation
  1126. @verbatim
  1127. SCM a; // say a is #nf64(...)
  1128. ...
  1129. scm_t_array_handle h;
  1130. scm_array_get_handle(a, &h);
  1131. scm_t_array_dim const * dims = scm_array_handle_dims(&h);
  1132. View<double> v(map([](int i) { return ra::Dim { dims[i].ubnd-dims[i].lbnd+1, dims[i].inc }; },
  1133. ra::iota(scm_array_handle_rank(&h))),
  1134. scm_array_handle_f64_writable_elements(&h));
  1135. ...
  1136. scm_array_handle_release(&h);
  1137. @end verbatim
  1138. @end quotation
  1139. @cindex Numpy
  1140. @cindex Python
  1141. Numpy's C API has the type @url{https://docs.scipy.org/doc/numpy/reference/c-api.array.html,@code{PyArrayObject}} which can be used in the same way as Guile's @code{scm_t_array_handle} in the example above.
  1142. It is usually simpler to let the foreign language handle the memory, even though there should be ways to transfer ownership (e.g. Guile has @url{https://www.gnu.org/software/guile/manual/html_node/SRFI_002d4-API.html#index-scm_005ftake_005ff64vector,@code{scm_take_xxx}}).
  1143. @subsubsection The bad array citizen
  1144. Unfortunately there are many libraries that don't accept arbitrary array parameters, or that do strange things with particular values of lengths and/or steps.
  1145. The most common case is that a library doesn't handle steps at all, and it only accepts unit step for rank 1 arrays, or packed row-major or column-major storage for higher rank arrays. In that case, you might be forced to copy your array before passing it along.
  1146. @c FIXME using is_c_order, etc.
  1147. @cindex BLAS
  1148. Other libraries do accept steps, but not arbitrary ones. For example @url{https://www.netlib.org/blas}' @code{cblas_dgemm} has this prototype:
  1149. @quotation
  1150. @verbatim
  1151. cblas_dgemm(order, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
  1152. @end verbatim
  1153. @end quotation
  1154. @code{A}, @code{B}, @code{C} are (pointers to) 2-arrays, but the routine accepts only one step argument for each (@code{lda}, etc.). CBLAS also doesn't understand @code{lda} as a arbitrary step, but rather as the dimension of a larger array that you're slicing @code{A} from, and some implementations will mishandle negative or zero @code{lda}.
  1155. Sometimes you can work around this by fiddling with @code{transA} and @code{transB}, but in general you need to check your array parameters and you may need to make copies.
  1156. @cindex OpenGL
  1157. OpenGL is another library that requires @url{https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/glVertexAttribPointer.xhtml,contortions:}. Quote:
  1158. @quotation
  1159. @verbatim
  1160. void glVertexAttribPointer(GLuint index,
  1161. GLint size,
  1162. GLenum type,
  1163. GLboolean normalized,
  1164. GLsizei step,
  1165. const GLvoid * pointer);
  1166. @end verbatim
  1167. [...]
  1168. @emph{step}
  1169. @quotation
  1170. Specifies the byte offset between consecutive generic vertex attributes. If step is 0, the generic vertex attributes are understood to be tightly packed in the array. The initial value is 0.
  1171. @end quotation
  1172. @end quotation
  1173. It isn't clear whether negative steps are legal, either. So just as with CBLAS, passing arbitrary array views may require copies.
  1174. @c ------------------------------------------------
  1175. @node Extension
  1176. @section Extension
  1177. @c ------------------------------------------------
  1178. @subsection New scalar types
  1179. @code{ra::} will let you construct arrays of arbitrary types out of the box. This is the same functionality you get with e.g. @code{std::vector}.
  1180. @example
  1181. @verbatim
  1182. struct W { int x; }
  1183. ra::Big<W, 2> w = {{ {4}, {2} }, { {1}, {3} }};
  1184. cout << W(1, 1).x << endl;
  1185. cout << amin(map([](auto && x) { return w.x; }, w)) << endl;
  1186. @end verbatim
  1187. @print{} 3
  1188. 1
  1189. @end example
  1190. However, if you want to mix arbitrary types in array operations, you'll need to tell @code{ra::} that that is actually what you want. This is to avoid conflicts with other libraries.
  1191. @example
  1192. @verbatim
  1193. template <> constexpr bool ra::is_scalar_def<W> = true;
  1194. ...
  1195. W ww {11};
  1196. for_each([](auto && x, auto && y) { cout << (x.x + y.y) << " "; }, w, ww); // ok
  1197. @end verbatim
  1198. @print{} 15 13 12 14
  1199. @end example
  1200. but
  1201. @example
  1202. @verbatim
  1203. struct U { int x; }
  1204. U uu {11};
  1205. for_each([](auto && x, auto && y) { cout << (x.x + y.y) << " "; }, w, uu); // error: can't find ra::start(U)
  1206. @end verbatim
  1207. @end example
  1208. @anchor{x-new-array-operations}
  1209. @subsection New array operations
  1210. @code{ra::} provides array extensions for standard operations such as @code{+}, @code{*}, @code{cos} @ref{x-scalar-ops,and so on}. You can add array extensions for your own operations in the obvious way, with @ref{x-map,@code{map}} (but note the namespace qualifiers):
  1211. @example
  1212. @verbatim
  1213. return_type my_fun(...) { };
  1214. ...
  1215. namespace ra {
  1216. template <class ... A> inline auto
  1217. my_fun(A && ... a)
  1218. {
  1219. return map(::my_fun, std::forward<A>(a) ...);
  1220. }
  1221. } // namespace ra
  1222. @end verbatim
  1223. @end example
  1224. @cindex overload set
  1225. If @code{my_fun} is an overload set, you can use@footnote{Simplified; see the references in @url{http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p1170r0.html}.}
  1226. @example
  1227. @verbatim
  1228. namespace ra {
  1229. template <class ... A> inline auto
  1230. my_fun(A && ... a)
  1231. {
  1232. return map([](auto && ... a) { return ::my_fun(a ...); }, std::forward<A>(a) ...);
  1233. }
  1234. } // namespace ra
  1235. @end verbatim
  1236. @end example
  1237. @cindex error
  1238. @cindex assert
  1239. @c ------------------------------------------------
  1240. @node Error handling
  1241. @section Error handling
  1242. @c ------------------------------------------------
  1243. @code{ra::} tries to detect a many errors as possible at compile time, but some errors, such as @ref{Rank extension,mismatch} between expressions of runtime shape or @ref{Slicing,out of range indices}, aren't apparent until runtime.
  1244. Runtime error handling in @code{ra::} is controlled by two macros.
  1245. @itemize
  1246. @item @code{RA_ASSERT(cond, ...)} ---
  1247. check that @code{cond} evaluates to true in the @code{ra::} namespace. The other arguments are informative.
  1248. @item @code{RA_DO_CHECK} ---
  1249. must have one of the values 0, 1, or 2.
  1250. @end itemize
  1251. They work as follows:
  1252. @itemize
  1253. @item If @code{RA_DO_CHECK} is 0, runtime checks are skipped.
  1254. @item If @code{RA_DO_CHECK} is not 0, runtime checks are done.
  1255. @itemize
  1256. @item If @code{RA_ASSERT} is defined, using @code{RA_ASSERT}.
  1257. @item If @code{RA_ASSERT} isn't defined, the method depends on the value of @code{RA_DO_CHECK}. The two options are 1 (plain @code{assert}) and 2 (prints the informative arguments and aborts). Other values are an error.
  1258. @end itemize
  1259. @end itemize
  1260. @code{ra::} contains uses of @code{assert} for checking invariants or for sanity checks that are separate from uses of @code{RA_ASSERT}. Those can be disabled in the usual way with @option{-DNDEBUG}, but note that @option{-DNDEBUG} will also disable any @code{assert}s that are a result of @code{RA_DO_CHECK=1}.
  1261. The performance cost of the runtime checks depends on the program. Without custom @code{RA_ASSERT}, @code{RA_DO_CHECK=1} usually has an acceptable cost, but @code{RA_DO_CHECK=2} may be more expensive. The default is @code{RA_DO_CHECK=1}.
  1262. The following example shows how errors might be reported depending on @code{RA_DO_CHECK}.
  1263. @cartouche
  1264. @verbatim
  1265. ra::Big<int, 2> a({10, 3}, 1);
  1266. ra::Big<int, 2> b({40, 3}, 2);
  1267. cout << sum(a+b) << endl;
  1268. @end verbatim
  1269. @end cartouche
  1270. @itemize
  1271. @item @code{RA_DO_CHECK=2}
  1272. @verbatim
  1273. *** ra::./ra/expr.hh:389,17 (check()) Mismatched shapes [10 3] [40 3]. ***}
  1274. @end verbatim
  1275. @item @code{RA_DO_CHECK=1}
  1276. @verbatim
  1277. ./ra/expr.hh:389: constexpr ra::Match<checkp, std::tuple<_UTypes ...>, std::tuple<std::integral_constant<int, I>...> >::Match(P ...)
  1278. [with bool checkp = true; P = {ra::Cell<int, const ra::SmallArray<ra::Dim, std::tuple<std::integral_constant<int, 2> >,
  1279. std::tuple<std::integral_constant<int, 1> >, std::tuple<ra::Dim, ra::Dim> >&, std::integral_constant<int, 0> >, ra::Cell<int, const
  1280. ra::SmallArray<ra::Dim, std::tuple<std::integral_constant<int, 2> >, std::tuple<std::integral_constant<int, 1> >, std::tuple<ra::Dim,
  1281. ra::Dim> >&, std::integral_constant<int, 0> >}; int ...I = {0, 1}]: Assertion `check()' failed.
  1282. @end verbatim
  1283. @end itemize
  1284. @cindex exception
  1285. You can redefine @code{RA_ASSERT} to something that is more appropriate for your program. For example, if you run @code{ra::} code under a shell, an abort may not be acceptable. @code{examples/throw.cc} shows how to throw a user-defined exception instead.
  1286. @c ------------------------------------------------
  1287. @node Extras
  1288. @chapter Extras
  1289. @c ------------------------------------------------
  1290. @c ------------------------------------------------
  1291. @node Hazards
  1292. @chapter Hazards
  1293. @c ------------------------------------------------
  1294. Some of these issues arise because @code{ra::} applies its principles systematically, which can have surprising results. Still others are the result of unfortunate compromises. And a few are just bugs.
  1295. @section Reuse of expression objects
  1296. Expression objects are meant to be used once. This applies to anything produced with @code{ra::map}, @code{ra::iter}, @code{ra::start}, or @code{ra::ptr}. Reuse errors are @emph{not} checked. For example:
  1297. @example
  1298. @verbatim
  1299. ra::Big<int, 2> B({3, 3}, ra::_1 + ra::_0*3); // {{0 1 2} {3 4 5} {6 7 8}}
  1300. std::array<int, 2> l = { 1, 2 };
  1301. cout << B(ra::ptr(l), ra::ptr(l)) << endl; // ok => {{4 5} {7 8}}
  1302. auto ll = ra::ptr(l);
  1303. cout << B(ll, ll) << endl; // ??
  1304. @end verbatim
  1305. @end example
  1306. @section Assignment to views
  1307. FIXME
  1308. With rt-shape containers (e.g. @code{Big}), @code{operator=} replaces the left hand side instead of writing over its contents. This behavior is inconsistent with @code{View::operator=} and is there only so that istream @code{>>} container may work; do not rely on it.
  1309. @section View of const vs const view
  1310. @c FIXME
  1311. FIXME
  1312. Passing view arguments by reference
  1313. @section Rank extension in assignments
  1314. Assignment of an expression onto another expression of lower rank may not do what you expect. This example matches @code{a} and 3 [both of shape ()] with a vector of shape (3). This is equivalent to @code{@{a=3+4; a=3+5; a=3+6;@}}. You may get a different result depending on traversal order.
  1315. @example @c [ma107]
  1316. @verbatim
  1317. int a = 0;
  1318. ra::scalar(a) = 3 + ra::Small<int, 3> {4, 5, 6}; // ?
  1319. @end verbatim
  1320. @result{} a = 9
  1321. @end example
  1322. Compare with
  1323. @example
  1324. @verbatim
  1325. int a = 0;
  1326. ra::scalar(a) += 3 + ra::Small<int, 3> {4, 5, 6}; // 0 + 3 + 4 + 5 + 6
  1327. @end verbatim
  1328. @result{} a = 18
  1329. @end example
  1330. @node Performance pitfalls of rank extension
  1331. @section Performance pitfalls of rank extension
  1332. In the following example where @code{b} has its shape extended from (3) to (3, 4), @code{f} is called 12 times, even though only 3 calls are needed if @code{f} doesn't have side effects. In such cases it might be preferrable to write the outer loop explicitly, or to do some precomputation.
  1333. @example
  1334. @verbatim
  1335. ra::Big<int, 2> a = {{1, 2, 3, 4}, {5, 6, 7, 8} {9, 10, 11, 12}};
  1336. ra::Big<int, 1> b = {1, 2, 3};
  1337. ra::Big<int, 2> c = map(f, b) + a;
  1338. @end verbatim
  1339. @end example
  1340. @node Initialization of nested types
  1341. @section Initialization of nested types
  1342. This doesn't do what you probably think it does.
  1343. @example
  1344. @verbatim
  1345. ra::Big<int2, 0> b({}, int2 {1, 3}); // b = {int2 {3, 3}} (*)
  1346. @end verbatim
  1347. @end example
  1348. What happens here is that rank-0 @code{b} is prefix matched to rank-1 @code{int2 @{1, 3@}} which results in two assignments, first @code{b} ← 1 and then @code{b} ← 3. Compare with
  1349. @example
  1350. @verbatim
  1351. ra::Big<int2, 0> b({}, ra::scalar(int2 {1, 3})); // b = {int2 {1, 3}} @c [ma116]
  1352. @end verbatim
  1353. @end example
  1354. The use of @ref{x-scalar,@code{scalar}} forces @code{b} to match the whole of @code{int2 @{1, 3@}} as a rank-0 object. After that, the single element in @code{b} is matched to @code{int2 @{1, 3@}}.
  1355. Assignment or initialization from higher rank to lower rank is something of a footgun. Assignments have some @ref{x-axis-reductions,legitimate uses}, but initializations like @code{(*)} may become errors in future versions of @code{ra::}.
  1356. @section Chained assignment
  1357. FIXME
  1358. When @code{a=b=c} works, it operates as @code{b=c; a=b;} and not as an array expression.
  1359. @section Unregistered scalar types
  1360. FIXME
  1361. @code{View<T, N> x; x = T()} fails if @code{T} isn't registered as @code{is_scalar}.
  1362. @enumerate
  1363. @item
  1364. Item 0
  1365. @item
  1366. Item 1
  1367. @item
  1368. Item 2
  1369. @end enumerate
  1370. @section The cost of @code{RA_DO_CHECK}
  1371. FIXME
  1372. @c ------------------------------------------------
  1373. @node Internals
  1374. @chapter Internals
  1375. @c ------------------------------------------------
  1376. @code{ra::} has two main components: a set of container classes, and the expression template mechanism. The container classes provide leaves for the expression template trees, and the container classes also make some use of the expression template mechanism internally (e.g. in the selection operator, or for initialization).
  1377. @menu
  1378. * Header structure::
  1379. * Type hierarchy::
  1380. * Term agreement::
  1381. * Traversal::
  1382. * Introspection::
  1383. * Building the test suite::
  1384. @end menu
  1385. @c ------------------------------------------------
  1386. @node Headers
  1387. @section Headers
  1388. @c ------------------------------------------------
  1389. The header structure of @code{ra::} is as follows.@c @footnote{Diagram generated using Graphviz and @url{https://www.flourish.org/cinclude2dot}.
  1390. @c @verbatim
  1391. @c cd ra && cinclude2dot.pl --include . > headers.dot
  1392. @c dot -Tpng headers.dot -Gdpi=100 > headers.png
  1393. @c @end verbatim
  1394. @c }
  1395. @itemize
  1396. @item @code{tuples.hh} -
  1397. Generic macros and tuple library.
  1398. @item @code{base.hh} -
  1399. Basic types and concepts, introspection.
  1400. @item @code{expr.hh} -
  1401. Expression template nodes.
  1402. @item @code{ply.hh} -
  1403. Traversal, including I/O.
  1404. @item @code{small.hh} -
  1405. Array type with compile time dimensions.
  1406. @item @code{big.hh} -
  1407. Array type with run time dimensions.
  1408. @item @code{ra.hh} -
  1409. Functions and operators, main header.
  1410. @item @code{test.hh} -
  1411. (accessory) Testing/benchmarking library.
  1412. @item @code{dual.hh} -
  1413. (accessory) Dual number type and operations.
  1414. @end itemize
  1415. @c @image{headers,4cm}
  1416. @c ------------------------------------------------
  1417. @node Type hierarchy
  1418. @section Type hierarchy
  1419. @c ------------------------------------------------
  1420. Some of the categories below are C++20 ‘concepts’, some are still informal.
  1421. @itemize
  1422. @item @b{Container} --- @code{Big}, @code{Shared}, @code{Unique}, @code{Small}
  1423. These are array types that own their data in one way or another.
  1424. @item @b{View} --- @code{ViewBig}, @code{ViewSmall}
  1425. These are array views into data in memory, which may be writable. Any of the @b{Container} types can be treated as a @b{View}, but one may also create @b{View}s into memory that has been allocated independently.
  1426. @item @b{Iterator} --- @code{CellBig}, @code{CellSmall}, @code{Iota}, @code{Ptr}, @code{Scalar}, @code{Expr}, @code{Pick}
  1427. This is a traversable object. @b{Iterator}s are accepted by all the array functions such as @code{map}, @code{for_each}, etc. @code{map} produces an @b{Iterator} itself, so most array expressions are @b{Iterator}s. @b{Iterator}s are created from @b{View}s and from certain foreign array-like types primarily through the function @code{start}. This is done automatically when those types are used in array expressions.
  1428. @b{Iterator}s have two traversal functions: @code{.adv(k, d)}, moves the iterator along any dimension @var{k}, and @code{.mov(d)}, is used on linearized views of the array. The methods @code{len()}, @code{step()}, @code{keep()} are used to determine the extent of these linearized views. In this way, a loop involving @b{Iterator}s can have its inner loop unfolded, which is faster than a nested loop, especially if the inner dimensions of the loop are small.
  1429. @b{Iterator}s also provide an @code{at(i ...)} method for random access to any element.
  1430. @end itemize
  1431. @c ------------------------------------------------
  1432. @node Term agreement
  1433. @section Term agreement
  1434. @c ------------------------------------------------
  1435. The execution of an expression template begins with the determination of its shape — the length of each of its dimensions. This is done recursively by traversing the terms of the expression. For a given dimension @code{k}≥0, terms that have rank less or equal than @code{k} are ignored, following the prefix matching principle. Likewise terms where dimension @code{k} has undefined length (such as @code{iota()} or dimensions created with @code{insert}) are ignored. All the other terms must match.
  1436. Then we select a order of traversal. @code{ra::} supports ‘array’ orders, meaning that the dimensions are sorted in a certain way from outermost to innermost and a full dimension is traversed before one advances on the dimension outside. However, currently (v@value{VERSION}) there is no heuristic to choose a dimension order, so traversal always happens in row-major order (which shouldn't be relied upon). @code{ply_ravel} will unroll as many innermost dimensions as it can, and in some cases traversal will be executed as a flat loop.
  1437. Finally we select a traversal method. @code{ra::} has two traversal methods: @code{ply_fixed} can be used when the rank and the traversal order are known at compile time, and @code{ply_ravel} can be used in the general case.
  1438. @c ------------------------------------------------
  1439. @node Traversal
  1440. @section Traversal
  1441. @c ------------------------------------------------
  1442. @c TODO
  1443. @c ------------------------------------------------
  1444. @node Introspection
  1445. @section Introspection
  1446. @c ------------------------------------------------
  1447. The following functions are available to query the properties of @code{ra::} objects.
  1448. @cindex @code{rank}
  1449. @anchor{x-rank}
  1450. @deftypefn @w{Function} rank_t rank e
  1451. Return the rank of expression @var{e}.
  1452. @end deftypefn
  1453. @cindex @code{shape}
  1454. @anchor{x-shape}
  1455. @deftypefn @w{Function} array shape e
  1456. @deftypefnx @w{Function} expr shape e k
  1457. The first form returns the shape of expression @var{e} as an array. The second form returns the length of axis @var{k}, i.e. @code{shape(e)[k]} ≡ @code{shape(e, k)}. It is possible to use @ref{x-len,@code{len}} in @var{k} to mean the rank of @var{e}.
  1458. @example
  1459. @verbatim
  1460. ra::Small<int, 2, 3, 4> A;
  1461. cout << shape(A, ra::len-1) << endl; // length of last axis
  1462. @end verbatim
  1463. @print{} 4
  1464. @end example
  1465. If @var{k} is not a scalar, @code{shape(e, k)} returns a map of @var{k} over @code{shape(e, ·)}.
  1466. @example
  1467. @verbatim
  1468. cout << shape(A, ra::iota(2, ra::len-2)) << endl; // last two lengths
  1469. @end verbatim
  1470. @print{} 3 4
  1471. @end example
  1472. Note that the return type @var{array} of @code{shape(e)} might be a @ref{x-foreign-type,foreign type} (such as @code{std::array} or @code{std::vector}) instead of a @code{ra::} type. In that case, @code{shape(A)(ra::iota(2, ra::len-2))} will not work.
  1473. @end deftypefn
  1474. @c ------------------------------------------------
  1475. @node Building the test suite
  1476. @section Building the test suite
  1477. @c ------------------------------------------------
  1478. @code{ra::} uses its own test and benchmark suites and it comes with three kinds of tests: proper tests, benchmarks, and examples. To build and run them all, run the following from the top directory:
  1479. @quotation
  1480. @verbatim
  1481. CXXFLAGS=-O3 scons
  1482. @end verbatim
  1483. @end quotation
  1484. or
  1485. @quotation
  1486. @verbatim
  1487. CXXFLAGS=-O3 cmake . && make && make test
  1488. @end verbatim
  1489. @end quotation
  1490. @code{ra::} is highly dependent on the optimization level and the test suite may run much slower with @option{-O0} or @option{-O1}.
  1491. The SCons/CMake scripts accept the following options:
  1492. @cindex BLAS
  1493. @itemize
  1494. @item @code{scons --no-sanitize} / @code{cmake -DSANITIZE=0}
  1495. Remove the sanitizer flags. This is necessary because appending @code{-fno-sanitize=all} to @code{CXXFLAGS}, as in @code{-fsanitize=... -fno-sanitize=all}, doesn't completely remove the sanitizers. The default is to build with sanitizers.
  1496. @item @code{scons --blas}
  1497. Try to use BLAS in the benchmarks. The CMake script tries to detect BLAS automatically, but the SCons script doesn't, hence this option.
  1498. @end itemize
  1499. @c TODO Flags and notes about different compilers
  1500. @c ------------------------------------------------
  1501. @node The future
  1502. @chapter The future
  1503. @c ------------------------------------------------
  1504. @section Error messages
  1505. FIXME
  1506. @section Reductions
  1507. FIXME
  1508. @section Etc
  1509. FIXME
  1510. @c ------------------------------------------------
  1511. @node Reference
  1512. @chapter Reference
  1513. @c ------------------------------------------------
  1514. @cindex @code{agree}
  1515. @anchor{x-agree} @defun agree arg ...
  1516. Return true if the shapes of arguments @var{arg...} match (see @ref{Rank extension}), else return false.
  1517. This is useful when @ref{Error handling,error checking} is enabled and one wants to avoid the failure response.
  1518. @example
  1519. @verbatim
  1520. ra::Small<int, 2, 3> A;
  1521. ra::Small<int, 2> B;
  1522. ra::Small<int, 3> C;
  1523. agree(A, B); // -> true
  1524. static_assert(agree(A, B)); // ok for ct shapes
  1525. cout << (A+B) << endl; // ok
  1526. agree(A, C); // -> false
  1527. cout << (A+C) << endl; // error. Maybe abort, maybe throw - cf Error Handling
  1528. @end verbatim
  1529. @end example
  1530. @end defun
  1531. @cindex @code{agree_op}
  1532. @anchor{x-agree_op} @defun agree_op op arg ...
  1533. Return true if the shapes of arguments @var{arg...} match (see @ref{Rank extension}) relative to operator @var{op}, else return false.
  1534. This differs from @ref{x-agree,@code{agree}} when @var{op} has non-zero argument ranks. For example:
  1535. @example
  1536. @verbatim
  1537. ra::Big<real, 1> a({3}, 0.);
  1538. ra::Big<real, 2> b({2, 3}, 0.n);
  1539. agree(a, b); // -> false
  1540. cout << (a+b) << endl; // error
  1541. agree_op(ra::wrank<1, 1>(std::plus()), a, b); // -> true
  1542. cout << map(ra::wrank<1, 1>(std::plus()), a, b) << endl; // ok
  1543. @end verbatim
  1544. @end example
  1545. @end defun
  1546. @cindex @code{at}
  1547. @anchor{x-at} @defun at expr indices
  1548. Look up @var{expr} at each element of @var{indices}, which shall be a multi-index into @var{expr}.
  1549. This can be used for sparse subscripting. For example:
  1550. @example @c [ra30]
  1551. @verbatim
  1552. ra::Big<int, 2> A = {{100, 101}, {110, 111}, {120, 121}};
  1553. ra::Big<ra::Small<int, 2>, 2> i = {{{0, 1}, {2, 0}}, {{1, 0}, {2, 1}}};
  1554. ra::Big<int, 2> B = at(A, i);
  1555. @end verbatim
  1556. @result{} B = @{@{101, 120@}, @{110, 121@}@}
  1557. @end example
  1558. @end defun
  1559. @cindex @code{begin}
  1560. @anchor{x-begin} @defun begin expr
  1561. Create STL iterator from @var{expr}.
  1562. See @ref{Using @code{ra::} types with the STL}.
  1563. See also @ref{x-end,@code{end}}, @ref{x-range,@code{range}}.
  1564. @end defun
  1565. @cindex @code{cast}
  1566. @anchor{x-cast} @defun cast <type> expr
  1567. Create an array expression that casts @var{expr} into @var{type}.
  1568. @end defun
  1569. @cindex @code{collapse}
  1570. @anchor{x-collapse} @defun collapse
  1571. @c TODO
  1572. See also @ref{x-explode,@code{explode}}.
  1573. @end defun
  1574. @cindex @code{concrete}
  1575. @anchor{x-concrete} @defun concrete a
  1576. Convert the argument to a container of the same shape as @var{a}.
  1577. If the argument has rt or ct shape, it is the same for the result. The main use of this function is to obtain modifiable copy of an array expression without having to prepare a container beforehand, or compute the appropiate type.
  1578. @c FIXME example
  1579. @end defun
  1580. @cindex @code{diag}
  1581. @anchor{x-diag} @defun diag view
  1582. Equivalent to @code{transpose<0, 0>(view)}.
  1583. @end defun
  1584. @cindex @code{dot}
  1585. @anchor{x-dot} @defun dot a b
  1586. Compute dot product of expressions @var{a} and @var{b}.
  1587. @c TODO
  1588. @end defun
  1589. @cindex @code{explode}
  1590. @anchor{x-explode} @defun explode
  1591. @c TODO
  1592. See also @ref{x-collapse,@code{collapse}}.
  1593. @end defun
  1594. @cindex @code{end}
  1595. @anchor{x-end} @defun end expr
  1596. Create STL end iterator from @var{expr}.
  1597. See @ref{Using @code{ra::} types with the STL}.
  1598. See also @ref{x-begin,@code{begin}}, @ref{x-range,@code{range}}.
  1599. @end defun
  1600. @cindex @code{for_each}
  1601. @anchor{x-for_each} @defun for_each op expr ...
  1602. Create an array expression that applies @var{op} to @var{expr} ..., and traverse it. The return value of @var{op} is discarded.
  1603. For example:
  1604. @example
  1605. @verbatim
  1606. double s = 0.;
  1607. for_each([&s](auto && a) { s+=a; }, ra::Small<double, 1> {1., 2., 3})
  1608. @end verbatim
  1609. @result{} s = 6.
  1610. @end example
  1611. See also @ref{x-map,@code{map}}.
  1612. @end defun
  1613. @cindex @code{format_array}
  1614. @anchor{x-format_array} @defun format_array expr fmt
  1615. Format expression for character output. @var{fmt} is a struct with the following fields:
  1616. @itemize
  1617. @item @code{shape} (@code{bool}):
  1618. Whether to print the shape of @var{expr} before @var{expr} itself.
  1619. @item @code{sep0} (@code{char const *}):
  1620. Separator between 0-cells.
  1621. @item @code{sepn} (@code{char const *}):
  1622. Separator between cells of rank > 0. Printed once.
  1623. @item @code{rep} (@code{char const *}):
  1624. Separator between cells of rank > 1. Printed @var{c}-1 times, where @var{c} is the rank of cells.
  1625. @item @code{open} (@code{char const *}):
  1626. Dimension opener.
  1627. @item @code{end} (@code{char const *}):
  1628. Dimension closer.
  1629. @item @code{align} (@code{bool}):
  1630. Align the dimension openers. This works better (or at all) when @code{sepn} ends in a newline.
  1631. @end itemize
  1632. The shape that might be printed depending on @code{.shape} is not subject to these separators and is always printed as if @code{@{.open="", .close="", .sep0=" "@}}.
  1633. @example
  1634. @verbatim
  1635. ra::Small<int, 2, 2> A = {{1, 2}, {3, 4}};
  1636. cout << "case a:\n" << A << endl;
  1637. cout << "\ncase b:\n" << format_array(A) << endl;
  1638. cout << "\ncase c:\n" << format_array(A, {.sep0="|", .sepn="-"}) << endl;
  1639. cout << "\ncase d:\n" << format_array(A, {.shape=noshape, .open="{", .close="}", .sep0=", ", .sepn=",\n", .rep="", .align=true}) << endl;
  1640. @end verbatim
  1641. @print{}
  1642. @verbatim
  1643. case a:
  1644. 1 2
  1645. 3 4
  1646. case b:
  1647. 1 2
  1648. 3 4
  1649. case c:
  1650. 1|2-3|4
  1651. case d:
  1652. {{1, 2},
  1653. {3, 4}}
  1654. @end verbatim
  1655. @end example
  1656. @cindex @code{jstyle}
  1657. @cindex @code{cstyle}
  1658. @cindex @code{lstyle}
  1659. @cindex @code{pstyle}
  1660. The following styles are predefined: @code{jstyle} (the default), @code{cstyle}, @code{lstyle}, and @code{pstyle}. Currently @code{ra::operator>>(std::istream &)} can only read whitespace formats, like @code{jstyle}.
  1661. @example
  1662. @verbatim
  1663. ra::Big<int> A = {{{1, 2, 3}, {3, 4, 5}}, {{4, 5, 6}, {7, 8, 9}}};
  1664. cout << format_array(A, ra::pstyle) << endl;
  1665. // cout << ra::pstyle << A << endl; // variant
  1666. @end verbatim
  1667. @print{}
  1668. @verbatim
  1669. [[[1, 2, 3],
  1670. [3, 4, 5]],
  1671. [[4, 5, 6],
  1672. [7, 8, 9]]]
  1673. @end verbatim
  1674. @end example
  1675. See also @ref{x-noshape,@code{noshape}}, @ref{x-withshape,@code{withshape}}.
  1676. @end defun
  1677. @cindex @code{from}
  1678. @anchor{x-from} @defun from op expr ...
  1679. Create outer product expression. This is defined as
  1680. @display
  1681. @math{E = \mathrm{from}(\mathrm{op}, \mathrm{expr}_0, \mathrm{expr}_1 ...) ⇒ E(i_{00}, i_{01} ..., i_{10}, i_{11}, ..., ...) = \mathrm{op}\big(\mathrm{expr}_0(i_{00}, i_{01}, ...), \mathrm{expr}_1(i_{10}, i_{11}, ...), ...\big)}.
  1682. @end display
  1683. For example:
  1684. @example
  1685. @verbatim
  1686. ra::Big<double, 1> a {1, 2, 3};
  1687. ra::Big<double, 1> b {10, 20, 30};
  1688. ra::Big<double, 2> axb = from([](auto && a, auto && b) { return a*b; }, a, b)
  1689. @end verbatim
  1690. @result{} axb = @{@{10, 20, 30@}, @{20, 40, 60@}, @{30, 60, 90@}@}
  1691. @end example
  1692. @example
  1693. @verbatim
  1694. ra::Big<int, 1> i {2, 1};
  1695. ra::Big<int, 1> j {0, 1};
  1696. ra::Big<double, 2> A = {{1, 2}, {3, 4}, {5, 6}};
  1697. ra::Big<double, 2> Aij = from(A, i, j)
  1698. @end verbatim
  1699. @result{} Aij = @{@{6, 5@}, @{4, 3@}@}
  1700. @end example
  1701. The last example is more or less how @code{A(i, j)} is implemented for arbitrary subscripts (@pxref{The rank conjunction}).
  1702. @end defun
  1703. @cindex @code{gemm}
  1704. @anchor{x-gemm} @defun gemm a b
  1705. Compute matrix-matrix product of expressions @var{a} and @var{b}. This function returns a container.
  1706. @c TODO
  1707. See @ref{Performance pitfalls of rank extension}.
  1708. See also @ref{x-gemv,@code{gemv}}, @ref{x-gevm,@code{gevm}}.
  1709. @end defun
  1710. @cindex @code{gemv}
  1711. @anchor{x-gemv} @defun gemv a b
  1712. Compute matrix-vector product of expressions @var{a} and @var{b}. This function returns a container.
  1713. @c TODO
  1714. See @ref{Performance pitfalls of rank extension}.
  1715. See also @ref{x-gemm,@code{gemm}}, @ref{x-gevm,@code{gevm}}.
  1716. @end defun
  1717. @cindex @code{gevm}
  1718. @anchor{x-gevm} @defun gevm a b
  1719. Compute vector-matrix product of expressions @var{a} and @var{b}. This function returns a container.
  1720. @c TODO
  1721. See @ref{Performance pitfalls of rank extension}.
  1722. See also @ref{x-gemm,@code{gemm}}, @ref{x-gemv,@code{gemv}}.
  1723. @end defun
  1724. @cindex @code{imag_part}
  1725. @anchor{x-imag_part} @defun imag_part
  1726. Take imaginary part of a complex number. This can be used as reference.
  1727. For example: @c [ma115]
  1728. @example
  1729. @verbatim
  1730. ra::Small<std::complex<double>, 2, 2> A = {{1., 2.}, {3., 4.}};
  1731. imag_part(A) = -2*real_part(A);
  1732. cout << A << endl;
  1733. @end verbatim
  1734. @print{}
  1735. (1, -2) (2, -4)
  1736. (3, -6) (4, -8)
  1737. @end example
  1738. See also @ref{x-real_part,@code{real_part}}.
  1739. @end defun
  1740. @cindex @code{map}
  1741. @anchor{x-map} @defun map op expr ...
  1742. Create an array expression that applies callable @var{op} to @var{expr} ...
  1743. For example:
  1744. @example
  1745. @verbatim
  1746. ra::Big<double, 1> x = map(cos, std::array {0.});
  1747. @end verbatim
  1748. @result{} x = @{ 1. @}
  1749. @end example
  1750. @var{op} can return a reference. For example:
  1751. @example
  1752. @verbatim
  1753. ra::Big<int, 2> x = {{3, 3}, 0.};
  1754. ra::Big<int, 2> i = {0, 1, 1, 2};
  1755. ra::Big<int, 2> j = {1, 0, 2, 1};
  1756. map(x, i, j) = 1;
  1757. @end verbatim
  1758. @result{} x = @{@{0, 1, 0@}, @{1, 0, 1@}, @{0, 1, 0@}@}
  1759. @end example
  1760. @var{op} can be any callable. For example:
  1761. @example
  1762. @verbatim
  1763. struct A { int a, b; };
  1764. std::vector<A> v = {{1, 2}, {3, 4}};
  1765. ra::map(&A::a, v) = -ra::map(&A::b, v); // pointer to member
  1766. @end verbatim
  1767. @result{} v = @{@{-2, 2@}, @{-4, 4@}@}
  1768. @end example
  1769. Operations defined on scalar arguments are usually extended to higher rank arguments through @code{op(i ...)} ≡ @code{map(op, i ...)}, but note that @ref{x-subscript-outer-product,this is not the case} when @var{op} is a view.
  1770. See also @ref{x-for_each,@code{for_each}}.
  1771. @end defun
  1772. @cindex @code{pack}
  1773. @anchor{x-pack} @defun pack <type> expr ...
  1774. Create an array expression that brace-constructs @var{type} from @var{expr} ...
  1775. @end defun
  1776. @cindex @code{pick}
  1777. @anchor{x-pick} @defun pick select_expr expr ...
  1778. Create an array expression that selects the first of @var{expr} ... if @var{select_expr} is 0, the second if @var{select_expr} is 1, and so on. The expressions that are not selected are not looked up.
  1779. This function cannot be defined using @ref{x-map,@code{map}}, because @code{map} looks up each one of its argument expressions before calling @var{op}.
  1780. For example:
  1781. @example @c cf examples/readme.cc [ma100].
  1782. @verbatim
  1783. ra::Small<int, 3> s {2, 1, 0};
  1784. ra::Small<double, 3> z = pick(s, s*s, s+s, sqrt(s));
  1785. @end verbatim
  1786. @result{} z = @{1.41421, 2, 0@}
  1787. @end example
  1788. See also @ref{x-where,@code{where}}.
  1789. @end defun
  1790. @cindex @code{ply}
  1791. @anchor{x-ply} @defun ply expr
  1792. Traverse @var{expr}. @code{ply} returns @code{void} so @var{expr} should be run for effect.
  1793. It's rarely necessary to use @code{ply}. Expressions are traversed automatically when they are assigned to views, for example, or printed out. @ref{x-for_each,@code{for_each}}@code{(...)}, which is equivalent to @code{ply(map(...))}, should cover most other uses.
  1794. @example
  1795. @verbatim
  1796. double s = 0.;
  1797. ply(map([&s](auto && a) { s+=a; }, ra::Small<double, 1> {1., 2., 3})) // same as for_each
  1798. @end verbatim
  1799. @result{} s = 6.
  1800. @end example
  1801. @end defun
  1802. @cindex @code{real_part}
  1803. @anchor{x-real_part} @defun real_part
  1804. Take real part of a complex number. This can be used as reference.
  1805. See also @ref{x-imag_part,@code{imag_part}}.
  1806. @end defun
  1807. @cindex @code{reverse}
  1808. @anchor{x-reverse} @defun reverse view axis
  1809. Create a new view by reversing axis @var{k} of @var{view}.
  1810. This is equivalent to @code{view(ra::dots<k>, ra::iota(ra::len, ra::len-1, -1))}.
  1811. This operation does not work on arbitrary array expressions yet. @c TODO FILL
  1812. @end defun
  1813. @cindex @code{size}
  1814. @anchor{x-size} @defun size a
  1815. Get the total size of an @code{ra::} object: the product of all its lengths.
  1816. @end defun
  1817. @c FIXME example
  1818. @cindex @code{stencil}
  1819. @anchor{x-stencil} @defun stencil view lo hi
  1820. Create a stencil on @var{view} with lower bounds @var{lo} and higher bounds @var{hi}.
  1821. @var{lo} and @var{hi} are expressions of rank 1 indicating the extent of the stencil on each dimension. Scalars are rank extended, that is, @var{lo}=0 is equivalent to @var{lo}=(0, 0, ..., 0) with length equal to the rank @code{r} of @var{view}. The stencil view has twice as many axes as @var{view}. The first @code{r} dimensions are the same as those of @var{view} except that they have their lengths reduced by @var{lo}+@var{hi}. The last @code{r} dimensions correspond to the stencil around each element of @var{view}; the center element is at @code{s(i0, i1, ..., lo(0), lo(1), ...)}.
  1822. This operation does not work on arbitrary array expressions yet. @c TODO FILL
  1823. @end defun
  1824. @cindex @code{swap}
  1825. @anchor{x-swap} @defun swap a b
  1826. Swap the contents of containers @var{a} and @var{b}.
  1827. Both containers must be of the same storage type. The containers may have different shapes, but if at least one of them is of ct rank, then both of them must have the same rank.
  1828. This function reuses @code{std::swap} for same-rank overloads, so it must not be qualified (i.e. use @code{swap(a, b)}, not @code{ra::swap(a, b)}).
  1829. @end defun
  1830. @example @c [ra30]
  1831. @verbatim
  1832. ra::Big<int> a ({2, 3}, 1 + ra::_0 - ra::_1); // (1)
  1833. ra::Big<int> b ({2, 3, 4}, 1 - ra::_0 + ra::_1 + ra::_2); // (2)
  1834. swap(a, b);
  1835. // as if (1) had b and (2) had a
  1836. @end verbatim
  1837. @end example
  1838. @cindex @code{transpose}
  1839. @anchor{x-transpose}
  1840. @defun transpose <axes ...> (view) | (axes, view)
  1841. Create a new view by transposing the axes of @var{view}. The number of @var{axes} must match the rank of @var{view}.
  1842. @code{axes} are the @emph{destination} axes, that is, axis @math{i} of @var{view} matches axis @var{axes}@math{_i} of the result. For example:
  1843. @example
  1844. @verbatim
  1845. ra::Unique<double, 2> a = {{1, 2, 3}, {4, 5, 6}};
  1846. cout << transpose<1, 0>(a) << endl;
  1847. @end verbatim
  1848. @print{}
  1849. 3 2
  1850. 1 4
  1851. 2 5
  1852. 3 6
  1853. @end example
  1854. The rank of the result is @math{1+\mathrm{max}ᵢ(}@var{axes}@math{_i}@math{)} and it may be smaller or larger than that of @var{view}. If an axis is repeated, the step on the destination is the sum of the steps of the source axes and the length is the smallest of the lengths of the source axes. For example:
  1855. @example
  1856. @verbatim
  1857. ra::Unique<double, 2> a = {{1, 2, 3}, {4, 5, 6}};
  1858. cout << transpose<0, 0>(a) << endl; // { a(0, 0), a(1, 1) }
  1859. @end verbatim
  1860. @print{}
  1861. 2
  1862. 1 5
  1863. @end example
  1864. If one of the destination axes isn't listed in @var{axes}, then it becomes a ‘dead’ axis similar to those produced by @ref{x-insert,@code{insert}}. For example:
  1865. @example
  1866. @verbatim
  1867. ra::Unique<double, 1> a = {1, 2, 3};
  1868. cout << ((a*10) + transpose<1>(a)) << endl;
  1869. @end verbatim
  1870. @print{}
  1871. 3 3
  1872. 11 21 31
  1873. 12 22 32
  1874. 13 23 33
  1875. @end example
  1876. The two argument form lets you specify the axis list at runtime. In that case the result will have runtime rank as well. For example: @c [ma117]
  1877. @example
  1878. @verbatim
  1879. ra::Small<int, 2> axes = {0, 1};
  1880. ra::Unique<double, 2> a = {{1, 2, 3}, {4, 5, 6}};
  1881. cout << "A: " << transpose(axes, a) << endl;
  1882. axes = {1, 0};
  1883. cout << "B: " << transpose(axes, a) << endl;
  1884. @end verbatim
  1885. @print{}
  1886. A: 2
  1887. 2 3
  1888. 1 2 3
  1889. 4 5 6
  1890. B: 2
  1891. 3 2
  1892. 1 4
  1893. 2 5
  1894. 3 6
  1895. @end example
  1896. This operation does not work on arbitrary array expressions yet. @c TODO FILL
  1897. @end defun
  1898. @cindex @code{where}
  1899. @cindex Masking
  1900. @anchor{x-where} @defun where expred extrue exfalse
  1901. Create an array expression that selects @var{extrue} if @var{expred} is @code{true}, and @var{exfalse} if @var{expred} is @code{false}. The expression that is not selected is not looked up.
  1902. For example:
  1903. @example
  1904. @verbatim
  1905. ra::Big<double, 1> s {1, -1, 3, 2};
  1906. s = where(s>=2, 2, s); // saturate s
  1907. @end verbatim
  1908. @result{} s = @{1, -1, 2, 2@}
  1909. @end example
  1910. See also @ref{x-pick,@code{pick}}.
  1911. @end defun
  1912. @cindex @code{wrank}
  1913. @anchor{x-wrank} @defun wrank <input_rank ...> op
  1914. Wrap @var{op} using a rank conjunction (@pxref{The rank conjunction}).
  1915. @c TODO examples
  1916. @end defun
  1917. @c @anchor{x-reshape}
  1918. @c @defun reshape view shape
  1919. @c Create a new view with shape @var{shape} from the row-major ravel of @var{view}.
  1920. @c FIXME fill when the implementation is more mature...
  1921. @c @end defun
  1922. @c @anchor{x-ravel}
  1923. @c @defun ravel view
  1924. @c Return the ravel of @var{view} as a view on @var{view}.
  1925. @c FIXME fill when the implementation is more mature...
  1926. @c @end defun
  1927. @cindex @code{noshape}
  1928. @cindex @code{withshape}
  1929. @anchor{x-noshape}
  1930. @anchor{x-withshape}
  1931. @deffn @w{Special object} {withshape noshape}
  1932. If either of these objects is sent to a @code{std::ostream} before an expression object, the shape of that object will or will not be printed.
  1933. If the object has ct shape, the default is not to print the shape, so @code{noshape} isn't necessary, and conversely for @code{withshape} if the object has rt shape. Note that the array readers [@code{operator>>(std::istream &, ...)}] require the default setting.
  1934. For example:
  1935. @example
  1936. @verbatim
  1937. ra::Small<int, 2, 2> A = {77, 99};
  1938. cout << "case a:\n" << A << endl;
  1939. cout << "case b:\n" << ra::noshape << A << endl;
  1940. cout << "case c:\n" << ra::withshape << A << endl;
  1941. @end verbatim
  1942. @print{} case a:
  1943. 77 99
  1944. case b:
  1945. 77 99
  1946. case c:
  1947. 2
  1948. 77 99
  1949. @end example
  1950. but:
  1951. @example
  1952. @verbatim
  1953. ra::Big<int> A = {77, 99};
  1954. cout << "case a:\n" << A << endl;
  1955. cout << "case b:\n" << ra::noshape << A << endl;
  1956. cout << "case c:\n" << ra::withshape << A << endl;
  1957. @end verbatim
  1958. @print{} case a:
  1959. 1
  1960. 2
  1961. 77 99
  1962. case b:
  1963. 77 99
  1964. case c:
  1965. 1
  1966. 2
  1967. 77 99
  1968. @end example
  1969. Note that in the last example the very shape of @code{ra::Big<int>} has rt length, so that length (the rank of @code{A}, that is 1) is printed as part of the shape of @code{A}.
  1970. See also @ref{x-format_array,@code{format_array}}.
  1971. @end deffn
  1972. @cindex @code{ptr}
  1973. @anchor{x-ptr}
  1974. @deffn @w{Function} ptr bidirectional_iterator [len [step]]
  1975. @deffnx @w{Function} ptr bidirectional_range
  1976. Create rank-1 expression from foreign object.
  1977. If @code{len} is not given for @var{bidirectional_iterator}, the expression has undefined length, and needs to be matched with other expressions whose length is defined. @code{ra::} doesn't know what is actually accessible through the iterator, so be careful. For instance:
  1978. @example
  1979. @verbatim
  1980. int pp[] = {1, 2, 3};
  1981. int * p = pp; // erase length
  1982. ra::Big<int, 1> v3 {1, 2, 3};
  1983. ra::Big<int, 1> v4 {1, 2, 3, 4};
  1984. v3 += ra::ptr(p); // ok, shape (3): v3 = {2, 4, 6}
  1985. v4 += ra::ptr(p); // undefined, shape (4): bad access to p[3]
  1986. // cout << (ra::ptr(p)+ra::iota()) << endl; // ct error, expression has undefined shape
  1987. cout << (ra::ptr(p, 3)+ra::iota()) << endl; // ok, prints { 1, 3, 5 }
  1988. cout << (ra::ptr(p, 4)+ra::iota()) << endl; // undefined, bad access at p[4]
  1989. @end verbatim
  1990. @end example
  1991. Of course in this example one could simply have used @code{pp} instead of @code{ra::ptr(p)}, since the array type retains shape information.
  1992. @example
  1993. @verbatim
  1994. v3 += pp; // ok, shapes match
  1995. v4 += pp; // error checked by ra::, shape clash (4) += (3)
  1996. cout << (pp + ra::iota()) << endl; // ok, shape from pp
  1997. @end verbatim
  1998. @end example
  1999. @var{len} and @var{step} can be constant types. For example:
  2000. @example
  2001. @verbatim
  2002. char const * s = "hello";
  2003. auto p = ra::ptr(s, std::integral_constant<int, 2> {});
  2004. static_assert(2==ra::size(p)); // p not constexpr, but still ok
  2005. @end verbatim
  2006. @end example
  2007. @code{ra::ptr} is often equivalent to @ref{x-start,@code{start}}, and can be omitted in the same way, but raw pointers require @code{ra::ptr}.
  2008. See also @ref{x-start,@code{start}}, @ref{x-scalar,@code{scalar}}, @ref{x-iota,@code{iota}}.
  2009. @end deffn
  2010. @cindex @code{range}
  2011. @anchor{x-range} @defun range expr
  2012. Create STL range iterator from @var{expr}.
  2013. See @ref{Using @code{ra::} types with the STL}.
  2014. See also @ref{x-begin,@code{begin}}, @ref{x-end,@code{end}}.
  2015. @end defun
  2016. @cindex @code{start}
  2017. @anchor{x-start} @defun start foreign_object
  2018. Create array expression from @var{foreign_object}.
  2019. @var{foreign_object} can be a built-in array (e.g. @code{int[3][2]}), a @code{std::random_access_range} type (including @code{std::vector} or @code{std::array}, @pxref{Compatibility}), an initializer list, or any object that @code{ra::} accepts as scalar (see @ref{x-is-scalar,@code{here}}).
  2020. The resulting expresion has shape according to the original object. Compare this with @ref{x-scalar,@code{scalar}}, which only produces rank 0 expressions, or @ref{x-ptr,@code{ptr}}, which only produces rank 1 expressions.
  2021. Generally one can mix these types with @code{ra::} expressions without needing @code{ra::start}, but sometimes this isn't possible, for example for operators that must be class members.
  2022. @example
  2023. @verbatim
  2024. std::vector<int> x = {1, 2, 3};
  2025. ra::Big<int, 1> y = {10, 20, 30};
  2026. cout << (x+y) << endl; // same as ra::start(x)+y
  2027. // x += y; // error: no match for operator+=
  2028. ra::start(x) += y; // ok
  2029. @end verbatim
  2030. @print{} 3
  2031. 11 22 33
  2032. @result{} x = @{ 11, 22, 33 @}
  2033. @end example
  2034. See also @ref{x-scalar,@code{scalar}}, @ref{x-ptr,@code{ptr}}.
  2035. @end defun
  2036. @cindex @code{scalar}
  2037. @anchor{x-scalar} @defun scalar expr
  2038. Create scalar expression from @var{expr}.
  2039. The primary use of this function is to bring a scalar object into the @code{ra::} namespace. A somewhat artificial example:
  2040. @example
  2041. @verbatim
  2042. struct W { int x; }
  2043. ra::Big<W, 1> w { {1}, {2}, {3} };
  2044. // error: no matching function for call to start(W)
  2045. // for_each([](auto && a, auto && b) { cout << (a.x + b.x) << endl; }, w, W {7});
  2046. // bring W into ra:: with ra::scalar
  2047. for_each([](auto && a, auto && b) { cout << (a.x + b.x) << endl; }, w, ra::scalar(W {7}));
  2048. @end verbatim
  2049. @print{} 8
  2050. 9
  2051. 10
  2052. @end example
  2053. See also @ref{x-scalar-char-star,@code{this example}}.
  2054. Since @code{scalar} produces an object with rank 0, it's also useful when dealing with nested arrays, even for objects that are already in @code{ra::}. Consider:
  2055. @example
  2056. @verbatim
  2057. using Vec2 = ra::Small<double, 2>;
  2058. Vec2 x {-1, 1};
  2059. ra::Big<Vec2, 1> c { {1, 2}, {2, 3}, {3, 4} };
  2060. // c += x // error: x has shape (2) and c has shape (3)
  2061. c += ra::scalar(x); // ok: scalar(x) has shape () and matches c.
  2062. @end verbatim
  2063. @result{} c = @{ @{0, 3@}, @{1, 4@}, @{2, 5@} @}
  2064. @end example
  2065. The result is @{c(0)+x, c(1)+x, c(2)+x@}. Compare this with
  2066. @example
  2067. @verbatim
  2068. c(ra::iota(2)) += x; // c(ra::iota(2)) with shape (2) matches x with shape (2)
  2069. @end verbatim
  2070. @result{} c = @{ @{-1, 2@}, @{2, 5@}, @{2, 5@} @}
  2071. @end example
  2072. where the result is @{c(0)+x(0), c(1)+x(1), c(2)@}.
  2073. See also @ref{x-start,@code{start}}.
  2074. @end defun
  2075. @cindex @code{iter}
  2076. @anchor{x-iter} @defun iter <k> (view)
  2077. Create iterator over the @var{k}-cells of @var{view}. If @var{k} is negative, it's interpreted as the negative of the frame rank. In the current version of @code{ra::}, @var{view} may have rt or ct shape.
  2078. @example
  2079. @verbatim
  2080. ra::Big<int, 2> c {{1, 3, 2}, {7, 1, 3}};
  2081. cout << "max of each row: " << map([](auto && a) { return amax(a); }, iter<1>(c)) << endl;
  2082. ra::Big<int, 1> m({3}, 0);
  2083. scalar(m) = max(scalar(m), iter<1>(c));
  2084. cout << "max of each column: " << m << endl;
  2085. m = 0;
  2086. for_each([&m](auto && a) { m = max(m, a); }, iter<1>(c));
  2087. cout << "max of each column again: " << m << endl;
  2088. @end verbatim
  2089. @print{} max of each row: 2
  2090. 3 7
  2091. max of each column: 3
  2092. 7 3 3
  2093. max of each column again: 3
  2094. 7 3 3
  2095. @end example
  2096. @c [ma113]
  2097. In the following example, @code{iter} emulates @ref{x-scalar,@code{scalar}}. Note that the shape () of @code{iter<1>(m)} matches the shape (3) of @code{iter<1>(c)}. Thus, each of the 1-cells of @code{c} matches against the single 1-cell of @code{m}.
  2098. @example
  2099. @verbatim
  2100. m = 0;
  2101. iter<1>(m) = max(iter<1>(m), iter<1>(c));
  2102. cout << "max of each column yet again: " << m << endl;
  2103. @end verbatim
  2104. @print{} max of each column again: 3
  2105. 7 3 3
  2106. @end example
  2107. The following example computes the trace of each of the items [(-1)-cells] of @code{c}. @c [ma104]
  2108. @example
  2109. @verbatim
  2110. ra::Small<int, 3, 2, 2> c = ra::_0 - ra::_1 - 2*ra::_2;
  2111. cout << "c: " << c << endl;
  2112. cout << "s: " << map([](auto && a) { return sum(diag(a)); }, iter<-1>(c)) << endl;
  2113. @end verbatim
  2114. @print{} c: 0 -2
  2115. -1 -3
  2116. 1 -1
  2117. 0 -2
  2118. 2 0
  2119. 1 -1
  2120. s: -3 -1 -1
  2121. @end example
  2122. @end defun
  2123. @cindex @code{sum}
  2124. @anchor{x-sum} @defun sum expr
  2125. Return the sum (+) of the elements of @var{expr}, or 0 if expr is empty. This sum is performed in unspecified order.
  2126. @end defun
  2127. @cindex @code{prod}
  2128. @anchor{x-prod} @defun prod expr
  2129. Return the product (*) of the elements of @var{expr}, or 1 if expr is empty. This product is performed in unspecified order.
  2130. @end defun
  2131. @cindex @code{amax}
  2132. @anchor{x-amax} @defun amax expr
  2133. Return the maximum of the elements of @var{expr}. If @var{expr} is empty, return @code{-std::numeric_limits<T>::infinity()} if the type supports it, otherwise @code{std::numeric_limits<T>::lowest()}, where @code{T} is the value type of the elements of @var{expr}.
  2134. @end defun
  2135. @cindex @code{amin}
  2136. @anchor{x-amin} @defun amin expr
  2137. Return the minimum of the elements of @var{expr}. If @var{expr} is empty, return @code{+std::numeric_limits<T>::infinity()} if the type supports it, otherwise @code{std::numeric_limits<T>::max()}, where @code{T} is the value type of the elements of @var{expr}.
  2138. @end defun
  2139. @cindex @code{early}
  2140. @anchor{x-early} @defun early expr default
  2141. @var{expr} is an array expression that returns @code{std::optional<T>}. @var{expr} is traversed as by @code{for_each}. If the optional ever contains a value, traversal stops and that value is returned. If traversal ends, @var{default} is returned instead. If @code{default} is a reference, @code{early} will return its value. @c FIXME
  2142. The following definition of elementwise @code{lexicographical_compare} relies on @code{early}.
  2143. @example
  2144. @verbatim
  2145. template <class A, class B>
  2146. inline bool
  2147. lexicographical_compare(A && a, B && b)
  2148. {
  2149. return early(map([](auto && a, auto && b) { return a==b ? std::nullopt : std::make_optional(a<b); },
  2150. std::forward<A>(a), std::forward<B>(b)),
  2151. false);
  2152. }
  2153. @end verbatim
  2154. @end example
  2155. @end defun
  2156. @cindex @code{any}
  2157. @anchor{x-any} @defun any expr
  2158. Return @code{true} if any element of @var{expr} is true, @code{false} otherwise. The traversal of the array expression will stop as soon as possible, but the traversal order is not specified.
  2159. @end defun
  2160. @cindex @code{every}
  2161. @anchor{x-every} @defun every expr
  2162. Return @code{true} if every element of @var{expr} is true, @code{false} otherwise. The traversal of the array expression will stop as soon as possible, but the traversal order is not specified.
  2163. @end defun
  2164. @cindex @code{sqr}
  2165. @anchor{x-sqr} @defun sqr expr
  2166. Compute the square of the elements of @var{expr}.
  2167. @end defun
  2168. @cindex @code{sqrm}
  2169. @anchor{x-sqrm} @defun sqrm expr
  2170. Compute the square of the norm-2 of the elements of @var{expr}, that is, @code{conj(expr)*expr}.
  2171. @end defun
  2172. @cindex @code{conj}
  2173. @anchor{x-conj} @defun conj expr
  2174. Compute the complex conjugate of @var{expr}.
  2175. @end defun
  2176. @cindex @code{xi}
  2177. @anchor{x-xi} @defun xi expr
  2178. Compute @code{(0+1j)} times @var{expr}.
  2179. @end defun
  2180. @cindex @code{rel_error}
  2181. @anchor{x-rel-error} @defun rel_error a b
  2182. @var{a} and @var{b} are arbitrary array expressions. Compute the error of @var{a} relative to @var{b} as
  2183. @code{(a==0. && b==0.) ? 0. : 2.*abs(a, b)/(abs(a)+abs(b))}
  2184. @end defun
  2185. @cindex @code{none}
  2186. @anchor{x-none}
  2187. @deffn @w{Special object} {none}
  2188. Pass @code{none} to container constructors to indicate that the contents shouldn't be initialized. This is appropriate when the initialization you have in mind doesn't fit in a constructor argument. For example:
  2189. @example
  2190. @verbatim
  2191. void foreign_initializer(int m, int n, double *);
  2192. ra::Big<double> b({2, 3}, ra::none);
  2193. foreign_initializer(2, 3, b.data());
  2194. @end verbatim
  2195. @end example
  2196. @end deffn
  2197. @c ------------------------------------------------
  2198. @node @mybibnode{}
  2199. @chapter Sources
  2200. @c ------------------------------------------------
  2201. @multitable @columnfractions .1 .9
  2202. @item @mybibitem{Abr70} @tab Philip S. Abrams. An APL machine. Technical report SLAC-114 UC-32 (MISC), Stanford Linear Accelerator Center, Stanford University, Stanford, CA, USA, February 1970.
  2203. @item @mybibitem{Ber87} @tab Robert Bernecky. An introduction to function rank. ACM SIGAPL APL Quote Quad, 18(2):39–43, December 1987.
  2204. @item @mybibitem{bli17} @tab The Blitz++ meta-template library. @url{http://blitz.sourceforge.net}, November 2017.
  2205. @item @mybibitem{Cha86} @tab Gregory J. Chaitin. Physics in APL2, June 1986.
  2206. @item @mybibitem{FI68} @tab Adin D. Falkoff and Kenneth Eugene Iverson. APL\360 User’s manual. IBM Thomas J. Watson Research Center, August 1968.
  2207. @item @mybibitem{FI73} @tab Adin D. Falkoff and Kenneth Eugene Iverson. The design of APL. IBM Journal of Research and Development, 17(4):5–14, July 1973.
  2208. @item @mybibitem{FI78} @tab Adin D. Falkoff and Kenneth Eugene Iverson. The evolution of APL. ACM SIGAPL APL, 9(1):30– 44, 1978.
  2209. @item @mybibitem{J S} @tab J Primer. J Software, @url{https://www.jsoftware.com/help/primer/contents.htm}, November 2017.
  2210. @item @mybibitem{Mat} @tab MathWorks. MATLAB documentation, @url{https://www.mathworks.com/help/matlab/}, November 2017.
  2211. @item @mybibitem{num17} @tab NumPy. @url{http://www.numpy.org}, November 2017.
  2212. @item @mybibitem{Ric08} @tab Henry Rich. J for C programmers, February 2008.
  2213. @item @mybibitem{SSM14} @tab Justin Slepak, Olin Shivers, and Panagiotis Manolios. An array-oriented language with static rank polymorphism. In Z. Shao, editor, ESOP 2014, LNCS 8410, pages 27–46, 2014.
  2214. @item @mybibitem{Vel01} @tab Todd Veldhuizen. Blitz++ user’s guide, February 2001.
  2215. @item @mybibitem{Wad90} @tab Philip Wadler. Deforestation: transforming programs to eliminate trees. Theoretical Computer Science, 73(2): 231--248, June 1990. @url{https://doi.org/10.1016/0304-3975%2890%2990147-A}
  2216. @end multitable
  2217. @c ------------------------------------------------
  2218. @node Indices
  2219. @unnumbered Indices
  2220. @c ------------------------------------------------
  2221. @c @node Concept Index
  2222. @c @unnumbered Concept Index
  2223. @printindex cp
  2224. @c @node Function Index
  2225. @c @unnumbered Function Index
  2226. @c @printindex fn
  2227. @c \nocite{JLangReference,FalkoffIverson1968,Abrams1970,FalkoffIverson1973,FalkoffIverson1978,APLexamples1,ArraysCowan,KonaTheLanguage,blitz++2001}
  2228. @c ------------------------------------------------
  2229. @node Notes
  2230. @unnumbered Notes
  2231. @c ------------------------------------------------
  2232. @enumerate
  2233. @item
  2234. @code{ra::} uses the non standard @code{#pragma once} (supported on all major compilers).
  2235. @end enumerate
  2236. @bye