scope.rof 59 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031
  1. .\" NOTE: This file can be processed with eqn <filename> | tbl | troff -ms
  2. .hw unknowns
  3. .hw FORTRAN
  4. .hw FORMAC
  5. .hw REDUCE
  6. .hw GENTRAN
  7. .nr PS 11
  8. .ps 11
  9. .nr VS 15
  10. .EQ
  11. tdefine !eq '~ = back 80 / ~'
  12. ndefine !eq '~ = "./" ~'
  13. tdefine empty % size +1 \(es %
  14. ndefine empty % O./ %
  15. tdefine !member ' ^ \(mo back 70 / ^ '
  16. ndefine !member % C.-./ %
  17. tdefine => % ^ = back 40 = back 40 > ^ %
  18. ndefine => % "==>" %
  19. tdefine !=> % ^ = back 40 / back 30 = back 60 > ^ %
  20. tdefine <=> % ^ < back 25 = back 40 = back 60 > ^ %
  21. ndefine <=> % "<=>" %
  22. tdefine exists %"\s-3\h'.2m'\v'.2m'\z\(em\v'-.5m'\z\(em\v'-.5m'\z\(em\
  23. \v'.85m'\h'.9m'\v'-.3m'\z\(br\h'.01m'\(br\v'.3m'\h'.02m'\v'-.05m'\s0" ~%
  24. tdefine !exist %"\s-3\h'.2m'\v'.2m'\z\(em\v'-.5m'\z\(em\v'-.5m'\z\(em\
  25. \v'.85m'\h'.9m'\v'-.3m'\z\(br\h'.01m'\(br\v'.3m'\h'.02m'\v'-.05m'\
  26. \s0" { back 65 down 10 size +3 / } ^%
  27. tdefine forall % "\f1\s+1\zV\zV\v'-.2m'\h'.22m'\z--\v'.2m'\h'.05m'\s0\fP" %
  28. ndefine forall % V.- %
  29. tdefine orsign % ^ "\s-2\h'.05m'\v'.15m'\z\e\e\h'-.08m'\z\(sl\
  30. \(sl\h'-.1m'\v'-.15m'\s0" ^ %
  31. ndefine orsign % \e / %
  32. tdefine andsign % ^ "\s-2\v'.15m'\z\(sl\(sl\h'-.18m'\z\e\e\v'-.15m'\s0" ^ %
  33. ndefine andsign % / \e %
  34. delim @@
  35. .EN
  36. .de Sq
  37. .pl +\\n(.v
  38. .nr tL \\n(.l-\\w'\(sq'
  39. .lt 0
  40. .lt +\\n(.lu
  41. .br
  42. .if !\\n(.n-\\n(tL .sp -1
  43. .tl '''\fR\(sq\fP'
  44. .nr tL 0
  45. .pl -\\n(.v
  46. ..
  47. ..
  48. \f3SCOPE, a Source-Code Optimization PackagE for REDUCE - User's Manual\f1
  49. .sp 2
  50. \f2Preliminary Version\f1
  51. .sp 2
  52. \f2J.A. van Hulzen \f1
  53. .sp 4
  54. \f3Table of Contents\f1
  55. .LP
  56. .nr vs 13
  57. .TS
  58. expand,tab(/);
  59. l n.
  60. _
  61. .sp
  62. \f31. Introduction\f1/2
  63. .sp .3
  64. \f32. Source-Code Optimization : The Basic Facilities\f1/4
  65. .sp .3
  66. \ \ \ \f32.1. The Strategy\f1/4
  67. .sp .3
  68. \ \ \ \f32.2. The Facilities\f1/6
  69. .sp .3
  70. \f33. Preprocessing Possibilities\f1/17
  71. .sp .3
  72. \f34. Generation of Declarations\f1/22
  73. .sp .3
  74. \f35. File Management and Optimization Strategies\f1/23
  75. .sp .3
  76. \f36. Some Possible Shortcomings and Future Versions\f1/26
  77. .sp .3
  78. \f3Acknowledgements\f1/26
  79. .sp .3
  80. \f3References\f1/27
  81. .sp .3
  82. \f3Appendix 1: How to install the code\f1/29
  83. .sp
  84. _
  85. .TE
  86. .nr VS 15
  87. .vs 15
  88. .sp 10
  89. \f3Abstract\f1
  90. .sp .4
  91. A survey of the strategy behind and the facilities of SCOPE, a Source-Code
  92. Optimization PackagE for REDUCE is given. We avoid a detailed discussion of the
  93. different algorithms and concentrate on the user aspects of the package.
  94. Examples of straightforward and more advanced usage are shown.
  95. A combined use of GENTRAN and SCOPE is not yet discussed in this preliminary
  96. version of the SCOPE manual.
  97. .bp
  98. .NH
  99. Introduction
  100. .LP
  101. An important application of computer algebra systems is the generation
  102. of code for numerical purposes via automatic or semi-automatic program
  103. generation. GENTRAN [3,4], a flexible general-purpose package, was
  104. especially developed to assist in such a task, when using MACSYMA or
  105. REDUCE.
  106. .br
  107. Attendant to automatic program generation is the problem of automatic
  108. source-code optimization. This is a crucial aspect because code generated
  109. from symbolic computations often tends to be made up of lengthy arithmetic
  110. expressions. One of our test examples contained, for instance, 20534
  111. additions and subtractions, 41746 multiplications,
  112. 12473 integer exponentiations and 7990 other operations, such as function
  113. calls.
  114. These lengthy expressions will be grouped together in blocks of
  115. straightline code in a program for numerical purposes. The main objective
  116. of source-code optimization is to minimize the number of (elementary)
  117. arithmetic operations in such blocks.
  118. This form of optimization is often helpful in reducing redundancy in
  119. expressions. Simplification algorithms applied on expressions
  120. viewed as entities, neither guarantee complete structure preservation nor
  121. allow improvements inside expressions by renaming sub-expressions.
  122. .br
  123. Optimizing compilers ought to deal effectively and efficiently with the
  124. average, hand coded program. The enormous, arithmetic intensive
  125. expressions, easily producable by a computer algebra system, fall outside the
  126. range of the FORTRAN-programs, once analyzed and discussed by Knuth [15].
  127. He suggested that optimization of the arithmetic in such
  128. a program is slightly overdone. This may explain why even in reasonably
  129. recent literature, such as [1], optimization of arithmetic code is hardly
  130. discussed. The dag-models, usually employed for optimization of arithmetic
  131. code, hardly allow the application of any algebraic identity (see for instance
  132. [6]). These models often force constants to act as if they were
  133. indeterminates and powers
  134. as objects requiring function calls, i.e. they force to think of
  135. @2a~+~3b@ and @4a~+~6b@ or of @a sup 2@, @a sup 4@ and @a sup 6@ as being
  136. different entities.
  137. Our optimization strategy however, requires the validity of some
  138. elementary algebraic laws. We employ heuristic techniques to reduce the
  139. arithmetic complexity of the given representation of a set of input expressions
  140. @ roman E sub in@, thus producing a set of output expressions @roman E sub
  141. out@. The optimized version of the earlier mentioned test example contains
  142. "only" 4316 additions and subtractions, 4919 multiplications,
  143. 13 integer exponentiations and 60 other operations.
  144. @roman E sub in@ and @roman E sub out@ define blocks of code, which would
  145. compute the same exact values for the same exact inputs, thus implicitly
  146. proving the correctness of the underlying software. Obviously the use
  147. of @roman E sub out@ saves a considerable amount of execution time in
  148. comparison with the application of @roman E sub in@.
  149. Johnson e.a. [14] suggest that such transformations do not destabilize the
  150. computations. However this is only apparent after a careful error analysis
  151. of both @roman E sub in@ and @roman E sub out@. In view of the
  152. size of both @roman E sub in@ and @roman E sub out@ such an analysis has
  153. to be automatized as well. Work in this direction is in progress.
  154. .LP
  155. The current version of SCOPE, our Source-Code Optimization PackagE,
  156. is written in RLISP. It can be used as an extension of REDUCE [9].
  157. It allows to subject
  158. almost any set of proper REDUCE assignment statements to a heuristic
  159. search for common (sub)expressions (cse's).
  160. The output is obtained as a sequence of assignment statements,
  161. by default given in REDUCE syntax.
  162. .br
  163. The first version of the package was designed to optimize the description of
  164. REDUCE-statements, generated by NETFORM [17,18]. This version was
  165. tailored to a restrictive class of problems, occurring mainly in
  166. electrical network theory, thus implying that the
  167. righthand sides (rhs's) in the input were limited to elements of
  168. @{roman bold Z} sub 2@[V], where V is a set of identifiers.
  169. The second version [12] allowed rhs's from \f3Z\f1[V]. For both versions the
  170. validity of the commutative and the associative law was assumed.
  171. A third version evolved from the latter package by allowing to apply the
  172. distributive law, i.e. by replacing (sub)expressions like @a.b~+~a.c@
  173. by @a.(b~+~c)@ whenever possible.
  174. But the range of possible applications of this version was really
  175. enlarged by redefining V as a set of kernels, implying
  176. that, at least by that time, almost any proper REDUCE expression could
  177. function as a rhs.
  178. The mathematical capabilities of this version are shortly summarized in [19],
  179. in the context of code generation and optimization for finite-element analysis.
  180. It is used in combination with GENTRAN, for the construction
  181. of Jacobians and Hessians [10] and also in experiments with strategies for
  182. code vectorization [5].
  183. It still assumes constant coefficients to be elements of \f3Z\f1.
  184. The user-interface of the present version relies on some GENTRAN
  185. facilities.
  186. .sp
  187. .LP
  188. .ps 11
  189. In [11,12] we described the overall optimization-strategy used for SCOPE
  190. as a composite function @{roman R sup -1}~\(de~{roman T}~\(de~{roman R}@.
  191. The function R
  192. defines how to store the input @{roman E} sub 0@ in an expression data base
  193. @{roman D} sub 0@. This @{roman D} sub 0@ is formed by two matrix-structures
  194. and a function table.
  195. The incidence matrices represent @{roman E} sub 0@, a set of arithmetic
  196. expressions, in a two-dimensional structure where the rows represent expression
  197. or sub-expression references and the columns represent identifier references
  198. such as variable and function names.
  199. The function names are taken from the function table,
  200. consisting of a list of pairs of function applications occurring in
  201. @{roman E} sub 0@, and system selected names functioning as their placeholders
  202. during the optimization process. Arguments of functions are similarly
  203. entered in the matrix-structures when ever relevant.
  204. A given sub-expression will be entered in one
  205. of two types of incidence matrices, one for sums and one for products, depending
  206. on the nature of the arithmetic operation at the top level of the expression.
  207. The two matrices are correlated by auxiliary predecessor-successor information
  208. at the row level for every sub-expression reference. The actual entries in the
  209. matrices are either multiplicative numerical coefficients for the sums matrix
  210. or powers for the products matrix.
  211. The inverse function @{roman R} sup {-1}@ defines the output-production.
  212. The function T defines the optimization process itself. It essentially
  213. consists of a heuristic remodeling of the (extendable) matrices in
  214. combination with storing information required for a fast retrieval and
  215. correct insertion of the detected cse's in the output.
  216. This is accomplished by an iteratively applied search, resulting
  217. in a stepwise reduction of the arithmetic complexity of the input set, using
  218. an extended version of Breuer's grow factor algorithm [2,11,12]. It is applied until
  219. no further profit is gained, i.e. until the reduction in arithmetic
  220. complexity stops. Before producing output, a finishing touch can be performed
  221. to further reduce the arithmetic complexity with some locally applied
  222. techniques. The overall process can be summarized as follows:
  223. .ps 10
  224. .EQ
  225. {roman R}~mark :~{{roman E} sub 0}~->~({{roman D} sub 0},{{roman profit} sub 0})
  226. .EN
  227. .EQ
  228. {{roman T} sub beta}~mark :~({{roman D} sub i},{{roman profit} sub i})~->~
  229. ({{roman D} sub {i+1}},{{roman profit} sub {i+1}})~,~{roman i}~=~0,..., lambda -1.
  230. .EN
  231. .EQ
  232. {roman F}~mark :~({{roman D} sub lambda},{{roman profit} sub lambda})~->~
  233. {D sub lambda}
  234. .EN
  235. .EQ
  236. {{roman R} sup {-1}}~mark :~{D sub lambda}~->~{{roman E} sub lambda}
  237. .EN
  238. .ps 11
  239. @{roman D} sub 0@ is created as a result of an R-application
  240. performed on input @{roman E} sub 0@.
  241. The termination condition depends on some profit criterion related
  242. to the arithmetic complexity of the latest version of the input,
  243. @{{roman D} sub i}@. Hence we assume @{{roman profit} sub i}~=~true@ for
  244. @i~=~0,..., lambda -1@ and @{{roman profit} sub lambda}~=~false@.
  245. The function T is composite as well, and defined by @{roman T}~=~{roman F}~\(de~
  246. {{{roman T} sub beta} sup lambda}@. @{roman T} sub beta@
  247. defines one iteration step, i.e one
  248. application of the extended version of Breuer's algorithm. The function F
  249. defines a finishing touch, resulting in the final
  250. version @D sub lambda@ of @{roman D} sub 0@, used to produce the
  251. output @{roman E} sub lambda@. We omit a further discussion of the different
  252. algorithms used for optimization; this can be found in [11,12],
  253. for instance.
  254. .LP
  255. .ps 11
  256. .LP
  257. The present version makes use of some GENTRAN facilities to translate
  258. its input into LISP prefix-forms. This approach can be seen as a form
  259. of preprocessing, i.e. @{roman E} sub 0@, the input for R can be considered
  260. as a list of \f3setq\f1-applications
  261. .LP
  262. .ps 11
  263. The GENTRAN-SCOPE Interface, allows other preprocessing activities.
  264. We introduced the optional use of GENTRAN's \f3declare\f1-statement,
  265. thus allowing
  266. to specify the type of some or all of the lhs's and of the identifiers
  267. used to construct the rhs's. In addition to the prefixlist,
  268. a list of declarations in the Target Language can be produced,
  269. based on default assumptions concerning un-typed lhs's and identifiers
  270. in the input.
  271. This facility is based on the use of GENTRAN's symbol-table.
  272. .br
  273. Before optimizing rhs's it might be attractive to rewrite them using a
  274. generalized form of Horner's rule. We designed such a command,
  275. which does not necessarily have to be used in the context
  276. of SCOPE. It can operate on a set
  277. of assignment statements and it can deliver the result in the form of a sequence
  278. of prefix-forms, defining the rewritten statements. Subjecting such a
  279. sequence of prefix-forms to a SCOPE-application implies that the
  280. GENTRAN-approach is not directly applicable. The GENTRAN := and :=:
  281. assignment operators define literal translation or rhs-simplification,
  282. respectively. Therefore we extended our Interface with special facilities,
  283. allowing SCOPE to accept the result of the application of such a
  284. command literally. Besides the \f3g\f1(eneralized)\f3horner\f1(rule) we have a
  285. command, generalizing the impact of the \f3structr\f1-command to a set of
  286. assignment statements.
  287. .LP
  288. .ps 11
  289. We discuss and illustrate a straightforward use of SCOPE in section 2.
  290. In section 3 we introduce the special commands \f3ghorner\f1 and \f3gstructr\f1
  291. and show
  292. how to use them, also in combination with SCOPE. We use section 4 to
  293. discuss the declaration-facilities and section 5 to show the different
  294. file-handling possibilities and modes of operation.
  295. Section 6 is finally used to indicate future work.
  296. Guidelines for installing the package are given in Appendix 1.
  297. .NH
  298. Source-Code Optimization : The Basic Facilities
  299. .NH 2
  300. The Strategy
  301. .LP
  302. .ps 11
  303. Before illustrating the effect of applying SCOPE, we shortly
  304. describe the operations, covered by the functions @{roman T} sub beta@ and F,
  305. mentioned in the previous section.
  306. .br
  307. The function R accepts assignment statements given in prefix-form.
  308. We can divide these forms in three categories using
  309. their leading operator. We distinguish between PLUS, TIMES and OTHER-operators.
  310. Leaving aside the OTHER-operators for awhile, we reduce the
  311. structure of possible rhs's to those of not necessarily expanded multivariate
  312. polynomials with integer coefficients. Assuming the leading operator is PLUS,
  313. the operands, being terms of a polynomial (for instance
  314. @3a~+~2b~+~3 {b sup 2} c (3a~+~2b){(c~+~d) sup 2}@),
  315. can either be primitive or composite.
  316. A primitive term is an integer, an identifier or the product of an integer
  317. and an identifier.
  318. Hence the primitive terms of a sum form an (eventually empty) linear
  319. expression (@3a~+~2b@).
  320. Composite terms are products, which cannot be qualified as a primitive
  321. term (@3 {b sup 2} c (3a~+~2b) {(c~+~d)} sup 2@).
  322. Like sums, prefix-forms with a TIMES-operator, can have
  323. a primitive and/or composite part. The primitive part of a product is an
  324. (eventually empty) power product(@{b sup 2} c@).
  325. The composite part is a product of sums and/or powers of sums
  326. (@(3a~+~2b) {(c~+~d) sup 2}@).
  327. Observe that our expression-structure discussed so far is still too simple.
  328. Powers of sums have EXPT as their leading operator (@{(c~+~d)} sup 2@).
  329. Similarly, a product can have a integer coefficient (@3 {b sup 2} c@).
  330. .br
  331. This description suggests, as already indicated in section 1,
  332. that we can consider any set of rhs's as
  333. being built with linear expressions and power products only. This allows
  334. to map such a set onto two incidence matrices: One defining the linear
  335. expressions, using the coefficients, and another defining the power products,
  336. using the exponents. The rows of these matrices can be associated with the
  337. (sub)expressions under consideration and the columns with the identifiers,
  338. used to construct these expressions. This is why we need to assume the
  339. validity of the commutative and associative law. To be able to retrieve
  340. the structure of the assignment statements forming the input set,
  341. we need to combine additional information with
  342. the rows and columns of these matrices. Essential is, for instance, storage
  343. of the exponents of sums and of the coefficients of products.
  344. Equally important is storage of the lhs's, which are the rhs-recognizers.
  345. Details are given in [12]. The examples 2.2.1 and 2.2.2 provide
  346. illustrations of these data structures.
  347. .br
  348. When introducing kernels, i.e. when assuming the set of OTHER-operators
  349. to be not empty, we have to store lists of non-commutable arguments.
  350. Therefore a function table of pairs is made, formed by the
  351. kernels and their internally created names. These names are entered
  352. in the matrices as new identifiers. The arguments of such a kernel
  353. can be arbitrary REDUCE-expressions, which also have to be incorporated in
  354. the matrices. Hence the function table is created recursively.
  355. .LP
  356. What is a cse and how do we locate its occurrences?
  357. A (sub)expression is common when it occurs repeatedly in the
  358. input. The occurrences are, as part of the input, distributed over the matrices,
  359. and shown as equivalent integer (sub)patterns.
  360. In fact, we repeatedly search for completely dense (sub)matrices of rank 1.
  361. The expression @2a~+~3c@ is a cse of
  362. @{e sub 1}~=~2a~+~4b~+~3c@ and @{e sub 2}~=~4a~+~6c~+~5d@, representable
  363. by (2,4,3,0) and (4,0,6,5), respectively.
  364. We indeed have to assume commutativity, so as
  365. to be able to produce new patterns (2,0,3,0,0), (0,4,0,0,1) and (0,0,0,5,2),
  366. representing @s~=~2a~+~3c@, @{e sub 1}~=~4b~+~s@ and @{e sub 2}~=~5d~+~2s@,
  367. respectively, and thus saving one addition and one multiplication.
  368. Such an additive cse can be a factor in a composite (sub)product,
  369. which in turn can be reduced to a primitive product, when the cse is replaced by
  370. a new symbol.
  371. Therefore an essential part of an optimization step is regrouping
  372. of information. This migration of information between the matrices is performed
  373. if the Breuer-searches are temporarily completed.
  374. After this regrouping the distributive law is applied, eventually also leading
  375. to a further regrouping. If at least one of these actions leads to a
  376. rearrangement of information the function @{roman T} sub beta@ is again applied.
  377. Observe that this @{roman T} sub beta@ is also a composite function.
  378. In view of the iterative character of the optimization process
  379. we always accept minimal profits.
  380. .br
  381. A similar search is performed to detect multiplicative cse's, for instance
  382. occuring in @{e sub 1}~=~{a sup 2} {b sup 4} {c sup 3}@ and
  383. @{e sub 2}~=~{a sup 4} {c sup 6} {d sup 5}@.
  384. However, given a power product @prod from i=1 to m {x sub i} sup {a sub i}@,
  385. any product @prod from i=1 to m {x sub i} sup {b sub i}@, such that some
  386. @{b sub i}~<~{a sub i}@, for i = 1(1)m, can function as a cse.
  387. We therefore extend the search for multiplicative cse's by employing this
  388. property, and as indicated in [12].
  389. .br
  390. The function F -defining the finishing touch- performs one-row and/or
  391. one-column searches. Once the extended Breuer-searches do not lead to
  392. further reduction in the arithmetic complexity we try -applying it-
  393. to improve what is left.
  394. The integer coefficients in (sub)sums can have, possibly locally,
  395. a gcd, which can be factored out.
  396. One-column operations serve to discover and properly replace integer
  397. multiples of identifiers.
  398. As part of the output-process we subject all exponentiations left -
  399. at most one for each identifier - to an addition chain algorithm.
  400. Another action, covered by F is therefore replacement by a new symbol
  401. of those (sub)sums, which are raised to some integer power.
  402. .NH 2
  403. The Facilities
  404. .LP
  405. .ps 11
  406. REDUCE allows, roughly speaking, two modes of operation: ON EXP or OFF EXP.
  407. The first alternative is the default setting leading to expanded forms.
  408. The latter gives unexpanded forms, as discussed by Hearn in some
  409. detail [7,8]. It is obvious that the OFF EXP setting is in general preferable
  410. over the ON EXP setting when attempting to optimize the description of
  411. a set of assignment statements.
  412. .br
  413. Starting a REDUCE session gives the initial state. All switches have their
  414. initial value: ON EXP, PERIOD and OFF FORT, for instance.
  415. When loading SCOPE we create a new operating environment, without
  416. disturbing the current state of the system.
  417. .br
  418. The result of an application of SCOPE can be influenced by the use
  419. of certain REDUCE- or SCOPE-switches. The influence of EXP is obvious.
  420. By default the switch ACINFO is turned on. This guarantees an echo of the form
  421. in which the assignment statements are consumed by SCOPE. It also
  422. guarantees tables with the numbers of arithmetic operations,
  423. occuring in @{{roman E} sub 0}@ and @{roman E} sub lambda@, respectively,
  424. to be printed.
  425. Some switches are available to obtain information about the process itself.
  426. They were introduced to assist in debugging and testing.
  427. PRIMAT can be used to visualize both @{roman D} sub 0@ and @D sub lambda@.
  428. PRIALL is a switch which combines not only the effect of ACINFO and PRIMAT,
  429. but also allows to obtain timings of the different sub-algorithms of SCOPE.
  430. .br
  431. Output is by default given in REDUCE syntax, but FORTRAN syntax is possible
  432. in the usual way. The switch PREFIX can be used to obtain the prefixlist itself
  433. as output.
  434. .br
  435. A SCOPE action is easily performed.
  436. Either the command "\f3optimize\f1 @<@object@>@;" or the command
  437. "\f3optimize\f1 @<@object@>@ \f3iname\f1 @<@cse-prefix@>@;" suffices.
  438. The @<@object@>@ to be elaborated is either one assignment statement or
  439. a list of such statements, all obeying the GENTRAN rules.
  440. The @<@cse-prefix@>@ is an identifier, used to generate the cse-names, by
  441. extending it with an integer part. The \f3gensym\f1-function is applied
  442. when the \f3iname\f1-extension is omitted.
  443. .LP
  444. We now illustrate the use of SCOPE through some small examples,
  445. by showing parts of REDUCE sessions.
  446. .sp
  447. \f3Example 2.2.1\f1
  448. .LP
  449. The multivariate polynomial Z is a sum of 6 composite terms. These terms,
  450. monomials, are constant multiples of primitive products.
  451. A picture of @{roman D} sub 0@
  452. is shown after the input echo. The sums-matrix consists of only one row,
  453. identifiable by its Fa(the)r Z, the lhs. Its exponent is given in the
  454. E(xponent or )C(oefficient) field. The 6 monomials are stored in the
  455. products-matrix. The coefficients are stored in the EC-fields and the
  456. predecessor row index, 0, is given in the Far-field. Before the @D sub lambda@
  457. picture is given the effect of the optimization process, the output
  458. and the operator counts are shown. The optimized form of Z is obtained by
  459. applying the distributive law. The output also shows applications of an addition
  460. chain algorithm ([16], 441-466) as part of @{roman R} sup {-1}@, although
  461. its use in example 2.2.3 is more apparent.
  462. .br
  463. Observe that the output illustrates the heuristic character of the optimization
  464. process: In this particular case the rhs can be written as a polynomial in
  465. S3, thus saving one extra multiplication.
  466. .LP
  467. .in +3
  468. .vs 10
  469. .nf
  470. \fC\s8
  471. on primat$
  472. optimize z:=a^2*b^2+10*a^2*m^6+a^2*m^2+2*a*b*m^4+2*b^2*m^6+b^2*m^2 iname s;
  473. 2 2 2 6 2 2 4 2 6 2 2
  474. Z := A *B + 10*A *M + A *M + 2*A*B*M + 2*B *M + B *M
  475. Sumscheme :
  476. || EC|Far
  477. ------------
  478. 0|| 1| Z
  479. ------------
  480. Productscheme :
  481. | 0 1 2| EC|Far
  482. ---------------------
  483. 1| 2 2| 1| 0
  484. 2| 6 2| 10| 0
  485. 3| 2 2| 1| 0
  486. 4| 4 1 1| 2| 0
  487. 5| 6 2 | 2| 0
  488. 6| 2 2 | 1| 0
  489. ---------------------
  490. 0 : M
  491. 1 : B
  492. 2 : A
  493. Number of operations in the input is:
  494. Number of (+,-)-operations : 5
  495. Number of (*)-operations : 10
  496. Number of integer exponentiations : 11
  497. Number of other operations : 0
  498. S0 := B*A
  499. S4 := M*M
  500. S8 := B*B
  501. S1 := S4*S8
  502. S9 := A*A
  503. S2 := S4*S9
  504. S3 := S4*S4
  505. Z := S1 + S2 + S0*(2*S3 + S0) + S3*(2*S1 + 10*S2)
  506. Number of operations after optimization is:
  507. Number of (+,-)-operations : 5
  508. Number of (*)-operations : 12
  509. Number of integer exponentiations : 0
  510. Number of other operations : 0
  511. Sumscheme :
  512. | 0 3 4 5| EC|Far
  513. ------------------------
  514. 0| 1 1| 1| Z
  515. 15| 2 10| 1| 14
  516. 17| 2 1 | 1| 16
  517. ------------------------
  518. 0 : S3
  519. 3 : S0
  520. 4 : S1
  521. 5 : S2
  522. Productscheme :
  523. | 8 9 10 11 17 18 19 20| EC|Far
  524. ------------------------------------
  525. 7| 1 1| 1| S0
  526. 8| 1 2 | 1| S1
  527. 9| 1 2| 1| S2
  528. 10| 2 | 1| S3
  529. 11| 2 | 1| S4
  530. 14| 1 | 1| 0
  531. 16| 1 | 1| 0
  532. ------------------------------------
  533. 8 : S4
  534. 9 : S3
  535. 10 : S2
  536. 11 : S1
  537. 17 : S0
  538. 18 : M
  539. 19 : B
  540. 20 : A
  541. \f1\s0
  542. .fi
  543. .in -3
  544. .vs 15
  545. .Sq
  546. .LP
  547. \f3Example 2.2.2\f1
  548. .LP
  549. The input echo below shows the literal copy of the first assignment,
  550. in accordance with the GENTRAN := operator.
  551. The second assignment, again in accordance with the GENTRAN operator ::=:,
  552. has a rhs in expanded form.
  553. .br
  554. The @{roman D} sub 0@ picture shows that during parsing string matching
  555. of kernels in prefix-form already contributes to optimization :
  556. S2 = C*X + D and S3 =SIN(S2) are stored once.
  557. .br
  558. Application of the distributive law gives the original structure of A(1,1)
  559. back.
  560. .in +3
  561. .vs 10
  562. .nf
  563. \fC\s8
  564. on primat$
  565. operator a$
  566. k:=j:=1$
  567. u:=c*x+d$
  568. v:=sin(u)$
  569. optimize {a(k,j) := v*(v^2*cos(u)^2+u),
  570. a(k,j)  ::=:v*(v^2*cos(u)^2+u)} iname s;
  571. 2 2
  572. A(K,J) := V*(V *COS(U) + U)
  573. 2 3
  574. A(1,1) := COS(C*X + D) *SIN(C*X + D) + SIN(C*X + D)*C*X + SIN(C*X + D)*D
  575. Sumscheme :
  576. | 7 8| EC|Far
  577. ------------------
  578. 1| 1 | 1| 0
  579. 3| | 1| A(1,1)
  580. 5| 1| 1| S2
  581. ------------------
  582. 7 : U
  583. 8 : D
  584. Productscheme :
  585. | 0 1 2 3 4 5 6| EC|Far
  586. ---------------------------------
  587. 0| 1| 1| A(K,J)
  588. 2| 2 2| 1| 1
  589. 4| 3 2 | 1| 3
  590. 6| 1 1 | 1| 5
  591. 7| 1 1 1 | 1| 3
  592. 8| 1 1 | 1| 3
  593. ---------------------------------
  594. 0 : D
  595. 1 : S3=SIN(S2)
  596. 2 : S1=COS(S2)
  597. 3 : X
  598. 4 : C
  599. 5 : S0=COS(U)
  600. 6 : V
  601. Number of operations in the input is:
  602. Number of (+,-)-operations : 7
  603. Number of (*)-operations : 10
  604. Number of integer exponentiations : 4
  605. Number of other operations : 5
  606. S6 := COS(U)*V
  607. S9 := S6*S6
  608. A(K,J) := V*(U + S9)
  609. S2 := D + X*C
  610. S3 := SIN(S2)
  611. S7 := S3*COS(S2)
  612. S8 := S7*S7
  613. A(1,1) := S3*(S2 + S8)
  614. Number of operations after optimization is:
  615. Number of (+,-)-operations : 3
  616. Number of (*)-operations : 7
  617. Number of integer exponentiations : 0
  618. Number of other operations : 3
  619. Sumscheme :
  620. | 2 12 13| EC|Far
  621. ---------------------
  622. 1| 1 | 1| 0
  623. 3| | 1| A(1,1)
  624. 5| 1| 1| S2
  625. 11| 1 | 1| 10
  626. ---------------------
  627. 2 : S2
  628. 12 : U
  629. 13 : D
  630. Productscheme :
  631. | 0 1 5 6 7 8 9 10 11| EC|Far
  632. ---------------------------------------
  633. 0| 1| 1| A(K,J)
  634. 2| 2 | 1| 1
  635. 4| 2 | 1| 11
  636. 9| 1 1 | 1| 5
  637. 10| 1 | 1| 3
  638. 13| 1 1| 1| S6
  639. 14| 1 1 | 1| S7
  640. ---------------------------------------
  641. 0 : S7
  642. 1 : S6
  643. 5 : D
  644. 6 : S3=SIN(S2)
  645. 7 : COS(S2)
  646. 8 : X
  647. 9 : C
  648. 10 : COS(U)
  649. 11 : V
  650. \f1\s0
  651. .fi
  652. .in -3
  653. .vs 15
  654. .Sq
  655. .LP
  656. \f3Example 2.2.3\f1
  657. .LP
  658. The effect is shown of a finishing touch application, in combination
  659. with FORTRAN output.
  660. During output preparation @{roman S0} sup 13@ is rewritten, using the
  661. earlier mentioned addition chain algorithm.
  662. .in +3
  663. .vs 10
  664. .nf
  665. \fC\s8
  666. on fort$
  667. off acinfo,period$
  668. optimize z:=96*a+18*b+9*c+3*d+6*e+18*f+6*g+5*h+5*k+3)^13 iname s;
  669. S0=5*(H+K)+3*(3*C+D+1+6*(B+F)+2*(A+E+G))
  670. S4=S0*S0
  671. S3=S0*S4
  672. S2=S3*S3
  673. S1=S2*S2
  674. Z=S0*S1
  675. \f1\s0
  676. .fi
  677. .in -3
  678. .vs 15
  679. .Sq
  680. \f3Example 2.2.4\f1
  681. .LP
  682. Recovery of repeatedly occurring integer multiples of identifiers,
  683. as part of the finishing touch, is illustrated. The switch ACINFO
  684. is turned off.
  685. .LP
  686. .in +3
  687. .vs 10
  688. .nf
  689. \fC\s8
  690. optimize {x:=3*a*p,
  691. y:=3*a*q,
  692. z:=6*a*r+2*b*p,
  693. u:=6*a*d+2*b*q,
  694. v:=9*a*c+4*b*d,
  695. w:=4*b} iname s;
  696. S1 := 3*A
  697. X := S1*P
  698. Y := S1*Q
  699. S2 := 6*A
  700. S3 := 2*B
  701. Z := S3*P + S2*R
  702. U := S3*Q + S2*D
  703. S0 := 4*B
  704. V := S0*D + 9*A*C
  705. W := S0
  706. \f1\s0
  707. .fi
  708. .in -3
  709. .vs 15
  710. .Sq
  711. .bp
  712. \f3Example 2.2.5\f1
  713. .LP
  714. .ps 11
  715. The effect of ON EXP or OFF EXP on the result of a
  716. SCOPE-application is now shown by optimizing the representation of the
  717. determinant of a symmetric (3,3) matrix M. Besides differences in computing time
  718. we also observe that the arithmetic complexity of the optimized version of the
  719. expanded representation of the determinant is about the same as the not
  720. optimized form of the unexpanded representation.
  721. .LP
  722. .in +3
  723. .vs 10
  724. .nf
  725. \fC\s8
  726. matrix M(3,3)$
  727. m(1,1):=18*cos(q3)*cos(q2)*m30*p^2-sin(q3)^2*j30y+sin(q3)^2*j30z-
  728. 9*sin(q3)^2*m30*p^2+j1oy+j30y+m10*p^2+18*m30*p^2$
  729. m(2,1):=
  730. m(1,2):=9*cos(q3)*cos(q2)*m30*p^2-sin(q3)^2*j30y+sin(q3)^2*j30z-
  731. 9*sin(q3)^2*m30*p^2+j30y+9*m30*p^2$
  732. m(3,1):=
  733. m(1,3):=-9*sin(q3)*sin(q2)*m30*p^2$
  734. m(2,2):=-sin(q3)^2*j30y+sin(q3)^2*j30z-9*sin(q3)^2*m30*p^2+j30y+9*m30*p^2$
  735. m(3,2):=
  736. m(2,3):=0$
  737. m(3,3):=9*m30*p^2+j30x$
  738. optimize detm:=:det(M) iname s;
  739. 4 2 6 3 4 2 4 2
  740. DETM := 729*SIN(Q3) *SIN(Q2) *P *M30 + 81*SIN(Q3) *SIN(Q2) *P *M30 *
  741. 4 2 4 2 2 2 6
  742. J30Y - 81*SIN(Q3) *SIN(Q2) *P *M30 *J30Z - 729*SIN(Q3) *SIN(Q2) *P *
  743. 3 2 2 4 2 2 6 3
  744. M30 - 81*SIN(Q3) *SIN(Q2) *P *M30 *J30Y - 729*SIN(Q3) *P *M30 - 81*
  745. 2 6 2 2 4 2 2 4 2
  746. SIN(Q3) *P *M30 *M10 - 81*SIN(Q3) *P *M30 *J30Y + 81*SIN(Q3) *P *M30
  747. 2 4 2 2 4 2
  748. *J30Z - 81*SIN(Q3) *P *M30 *J1OY - 81*SIN(Q3) *P *M30 *J30X - 9*
  749. 2 4 2 4 2 4
  750. SIN(Q3) *P *M30*J30Y*M10 + 9*SIN(Q3) *P *M30*J30Z*M10 - 9*SIN(Q3) *P
  751. 2 2 2 2
  752. *M30*M10*J30X - 9*SIN(Q3) *P *M30*J30Y*J1OY - 9*SIN(Q3) *P *M30*J30Y*
  753. 2 2 2 2
  754. J30X + 9*SIN(Q3) *P *M30*J30Z*J1OY + 9*SIN(Q3) *P *M30*J30Z*J30X - 9*
  755. 2 2 2 2 2 2
  756. SIN(Q3) *P *M30*J1OY*J30X - SIN(Q3) *P *J30Y*M10*J30X + SIN(Q3) *P *
  757. 2 2
  758. J30Z*M10*J30X - SIN(Q3) *J30Y*J1OY*J30X + SIN(Q3) *J30Z*J1OY*J30X -
  759. 2 2 6 3 2 2 4 2
  760. 729*COS(Q3) *COS(Q2) *P *M30 - 81*COS(Q3) *COS(Q2) *P *M30 *J30X +
  761. 6 3 6 2 4 2 4 2
  762. 729*P *M30 + 81*P *M30 *M10 + 81*P *M30 *J30Y + 81*P *M30 *J1OY + 81
  763. 4 2 4 4 2
  764. *P *M30 *J30X + 9*P *M30*J30Y*M10 + 9*P *M30*M10*J30X + 9*P *M30*J30Y
  765. 2 2 2
  766. *J1OY + 9*P *M30*J30Y*J30X + 9*P *M30*J1OY*J30X + P *J30Y*M10*J30X +
  767. J30Y*J1OY*J30X
  768. Number of operations in the input is:
  769. Number of (+,-)-operations : 36
  770. Number of (*)-operations : 148
  771. Number of integer exponentiations : 84
  772. Number of other operations : 32
  773. S0 := SIN(Q3)
  774. S30 := S0*S0
  775. S1 := SIN(Q2)
  776. S34 := S1*S1
  777. S35 := P*P
  778. S7 := S35*M30
  779. S33 := S7*S7
  780. S5 := S33*J30Y
  781. S6 := S30*S7
  782. S8 := S30*M10
  783. S49 := COS(Q2)*COS(Q3)
  784. S9 := S49*S49
  785. S11 := S34*S30*S30
  786. S22 := S35*S7
  787. S14 := S30*J30Z
  788. S19 := S35*J30X
  789. S23 := J30X*J1OY
  790. S31 := S33*S7
  791. S47 := 81*S33*J30X
  792. S39 := - S47 - S23*J30Y - 81*S33*J1OY
  793. S40 := - 81*S30*S5 - 729*S33*S6
  794. S45 := 9*S6*J30Z
  795. S46 := 9*S6
  796. S48 := 81*S5
  797. DETM := S48 + S40 - S39 + 729*S31 + ( - J1OY - J30X)*(9*(S6*J30Y - S7
  798. *J30Y) - S45) + (J30Z - J30Y)*(9*S22*S8 + S19*S8) + 9*(M10 - S8
  799. )*(S22*J30X + 9*S22*S7) + M10*J30Y*(9*S22 + S19) + S23*(S14 + 9*S7
  800. - S46) + S39*S30 + S31*(729*(S11 - S9)) + S34*(S40 - S46*S45) -
  801. S47*S9 + 81*S33*S14 + S48*S11
  802. Number of operations after optimization is:
  803. Number of (+,-)-operations : 29
  804. Number of (*)-operations : 58
  805. Number of integer exponentiations : 0
  806. Number of other operations : 4
  807. off exp$
  808. optimize detm:=:det(M) iname s;
  809. 2 2 2
  810. DETM := ((9*P *M30 + J30Y - J30Z)*SIN(Q3) - (18*M30 + M10)*P - 18*
  811. 2 2
  812. COS(Q3)*COS(Q2)*P *M30 - J30Y - J1OY)*((9*P *M30 + J30Y -
  813. 2 2 2
  814. J30Z)*SIN(Q3) - 9*P *M30 - J30Y)*(9*P *M30 + J30X) -
  815. 2 2 2 2
  816. ((9*P *M30 + J30Y - J30Z)*SIN(Q3) - 9*COS(Q3)*COS(Q2)*P *M30 - 9*P *
  817. 2 2 2
  818. M30 - J30Y) *(9*P *M30 + J30X) + 81*((9*P *M30 + J30Y - J30Z)*
  819. 2 2 2 2 4 2
  820. SIN(Q3) - 9*P *M30 - J30Y)*SIN(Q3) *SIN(Q2) *P *M30
  821. Number of operations in the input is:
  822. Number of (+,-)-operations : 24
  823. Number of (*)-operations : 42
  824. Number of integer exponentiations : 21
  825. Number of other operations : 10
  826. S0 := SIN(Q3)
  827. S9 := S0*S0
  828. S8 := P*P
  829. S5 := S8*M30
  830. S6 := S5*COS(Q2)*COS(Q3)
  831. S15 := 9*S5
  832. S13 := (S15 + J30Y - J30Z)*S9
  833. S14 := S13 - S15 - J30Y
  834. S3 := S14 - 9*S6
  835. S4 := SIN(Q2)
  836. DETM := (S15 + J30X)*(S14*(S13 - 18*S6 - J30Y - J1OY - S8*(18*M30 +
  837. M10)) - S3*S3) + 9*S15*S14*S9*S5*S4*S4
  838. Number of operations after optimization is:
  839. Number of (+,-)-operations : 13
  840. Number of (*)-operations : 20
  841. Number of integer exponentiations : 0
  842. Number of other operations : 4
  843. \f1\s0
  844. .fi
  845. .in -3
  846. .vs 15
  847. .LP
  848. .ps 11
  849. We can also use this example to show that correctness of the results can easily
  850. be verified. When turning off the switch NAT and storing the result of
  851. a SCOPE application in a file, it is of course possible to read the result
  852. in again. But we then operate in a normal REDUCE-like way. This
  853. implies that all cse-names are automatically replaced by their values.
  854. We show the "correctness" of SCOPE by storing the optimized version of
  855. the expanded form of the determinant of M, called detm1 in file out1 and
  856. the result of a SCOPE-application on the unexpanded form, detm2, in file out2,
  857. followed by reading both files and by subtracting detm2 from detm1, resulting
  858. in the value 0. This is of course an ad hoc correctness-proof for one specific
  859. example. It is in fact another way of testing the code of the package.
  860. So, assuming SCOPE is loaded and the matrix M is known to the system, all we
  861. have to do is:
  862. .LP
  863. .in +3
  864. .vs 10
  865. .nf
  866. \fC\s8
  867. 2: off acinfo,nat$
  868. 3: out out1$
  869. 4: optimize detm1:=:det(M) iname s;
  870. 5: write "end$"$
  871. 6: shut "out1"$
  872. 7: off exp$
  873. 8: out out2$
  874. 9: optimize detm2:=:det(M) iname t;
  875. 10: write "end$"$
  876. 11: shut out2$
  877. 12: on nat$
  878. 13: in out1;
  879. S0 := SIN(Q3)$
  880. S30 := S0*S0$
  881. S1 := SIN(Q2)$
  882. S34 := S1*S1$
  883. S35 := P*P$
  884. S7 := S35*M30$
  885. S33 := S7*S7$
  886. S5 := S33*J30Y$
  887. S6 := S30*S7$
  888. S8 := S30*M10$
  889. S49 := COS(Q2)*COS(Q3)$
  890. S9 := S49*S49$
  891. S11 := S34*S30*S30$
  892. S22 := S35*S7$
  893. S14 := S30*J30Z$
  894. S19 := S35*J30X$
  895. S23 := J30X*J1OY$
  896. S31 := S33*S7$
  897. S47 := 81*S33*J30X$
  898. S39 := - S47 - S23*J30Y - 81*S33*J1OY$
  899. S40 := - 81*S30*S5 - 729*S33*S6$
  900. S45 := 9*S6*J30Z$
  901. S46 := 9*S6$
  902. S48 := 81*S5$
  903. DETM1 := S48 + S40 - S39 + 729*S31 + ( - J1OY - J30X)*(9*(S6*J30Y - S7*J30Y) -
  904. S45) + (J30Z - J30Y)*(9*S22*S8 + S19*S8) + 9*(M10 - S8)*(S22*J30X + 9
  905. *S22*S7) + M10*J30Y*(9*S22 + S19) + S23*(S14 + 9*S7 - S46) + S39*S30
  906. + S31*(729*(S11 - S9)) + S34*(S40 - S46*S45) - S47*S9 + 81*S33*S14 +
  907. S48*S11$
  908. end$
  909. 14: in out2;
  910. T0 := SIN(Q3)$
  911. T9 := T0*T0$
  912. T8 := P*P$
  913. T5 := T8*M30$
  914. T6 := T5*COS(Q2)*COS(Q3)$
  915. T15 := 9*T5$
  916. T13 := (T15 + J30Y - J30Z)*T9$
  917. T14 := T13 - T15 - J30Y$
  918. T3 := T14 - 9*T6$
  919. T4 := SIN(Q2)$
  920. DETM2 := (T15 + J30X)*(T14*(T13 - 18*T6 - J30Y - J1OY - T8*(18*M30 +
  921. M10)) - T3*T3) + 9*T15*T14*T9*T5*T4*T4$
  922. end$
  923. 15: detm1-detm2;
  924. 0
  925. \f1\s0
  926. .fi
  927. .in -3
  928. .vs 15
  929. .Sq
  930. \f3Example 2.2.6\f1
  931. .LP
  932. .ps 11
  933. This example serves to show how SCOPE deals with rational exponents.
  934. All rational exponents of a variable are collected. The least common
  935. multiple lcm of the denominators of these rational exponents is computed
  936. and the variable is replaced
  937. by a possibly newly selected variable name, denoting the variable raised
  938. to the power 1/lcm. This facility is only efficient for what we believe
  939. to be problems occurring in computational practice. This is easily verified by
  940. extending the sum we are elaborating here with some extra terms.
  941. .br
  942. .ps 11
  943. Producing FORTRAN-output shows an implied danger, due to a shortcoming in
  944. GENTRAN. This rational exponent will in practice act as if it were 0.
  945. .LP
  946. .ps 11
  947. This example is also used to show the effect of turning on the switch PRIALL.
  948. .LP
  949. .in +3
  950. .vs 10
  951. .nf
  952. \fC\s8
  953. on fort,priall$
  954. optimize z:=:for j:=2:6 sum q^(1/j) iname s;
  955. 1/6 1/5 1/4 1/3
  956. Z := Q + Q + Q + Q + SQRT(Q)
  957. Sumscheme :
  958. || EC|Far
  959. ------------
  960. 0|| 1| Z
  961. ------------
  962. Productscheme :
  963. | 0| EC|Far
  964. ---------------
  965. 1| 10| 1| 0
  966. 2| 12| 1| 0
  967. 3| 15| 1| 0
  968. 4| 20| 1| 0
  969. 5| 30| 1| 0
  970. ---------------
  971. 0 : Q
  972. Number of operations in the input is:
  973. Number of (+,-)-operations : 4
  974. Number of (*)-operations : 0
  975. Number of integer exponentiations : 0
  976. Number of other operations : 5
  977. Time: 2992 ms
  978. Breuer search :
  979. Time: 867 ms
  980. Removal of different names for identical cse's :
  981. Time: 17 ms
  982. Change Scheme :
  983. Time: 0 ms
  984. Local Factorization :
  985. Time: 34 ms
  986. Breuer search :
  987. Time: 204 ms
  988. Removal of different names for identical cse's :
  989. Time: 0 ms
  990. Change Scheme :
  991. Time: 17 ms
  992. Local Factorization :
  993. Time: 0 ms
  994. Breuer search :
  995. Time: 187 ms
  996. Removal of different names for identical cse's :
  997. Time: 0 ms
  998. Change Scheme :
  999. Time: 17 ms
  1000. Local Factorization :
  1001. Time: 0 ms
  1002. Breuer search :
  1003. Time: 119 ms
  1004. Removal of different names for identical cse's :
  1005. Time: 0 ms
  1006. Change Scheme :
  1007. Time: 17 ms
  1008. Local Factorization :
  1009. Time: 0 ms
  1010. Additional optimization during finishing touch :
  1011. Time: 34 ms
  1012. Q=Q**(1/60)
  1013. S7=Q*Q
  1014. S6=S7*Q
  1015. S4=S7*S6
  1016. S2=S4*S4
  1017. S1=S7*S2
  1018. S0=S6*S1
  1019. S3=S4*S0
  1020. Z=S3+S0+S1+S2+S3*S2
  1021. Number of operations after optimization is:
  1022. Number of (+,-)-operations : 4
  1023. Number of (*)-operations : 8
  1024. Number of integer exponentiations : 0
  1025. Number of other operations : 1
  1026. Sumscheme :
  1027. | 3 4 5 6| EC|Far
  1028. ------------------------
  1029. 0| 1 1 1 1| 1| Z
  1030. ------------------------
  1031. 3 : S3
  1032. 4 : S0
  1033. 5 : S1
  1034. 6 : S2
  1035. Productscheme :
  1036. | 9 10 12 13 14 15 16 22| EC|Far
  1037. ------------------------------------
  1038. 5| 1 1 | 1| 0
  1039. 6| 1 1 | 1| S0
  1040. 7| 1 1 | 1| S1
  1041. 8| 2 | 1| S2
  1042. 9| 1 1 | 1| S3
  1043. 10| 1 1 | 1| S4
  1044. 12| 1 1| 1| S6
  1045. 13| 2| 1| S7
  1046. ------------------------------------
  1047. 9 : S7
  1048. 10 : S6
  1049. 12 : S4
  1050. 13 : S3
  1051. 14 : S2
  1052. 15 : S1
  1053. 16 : S0
  1054. 22 : Q
  1055. Time: 459 ms
  1056. \f1\s0
  1057. .fi
  1058. .in -3
  1059. .vs 15
  1060. .Sq
  1061. .NH
  1062. Preprocessing Possibilities
  1063. .LP
  1064. .ps 11
  1065. It may happen that structure is obviously visible in the
  1066. rhs's of a set of assignment statements, which we want to optimize.
  1067. One can think of a set of partial derivatives of products.
  1068. Or one may consider the application of Horner-rules.
  1069. Such facilities may be attractive, independent of the question if a
  1070. SCOPE-application will be performed on its result.
  1071. Therefore we first discuss these facilities and show their effect, again
  1072. by using simple examples, before we continue with a combined use of SCOPE
  1073. and these possibilities.
  1074. .br
  1075. .ps 11
  1076. The first alternative demands a generalized \f3structr\f1-command.
  1077. We implemented such a facility. Its syntax is straightforward:
  1078. "\f3gstructr\f1 @<@object@>@ \f3name\f1 @<@id@>@;" The @<@object@>@
  1079. to be elaborated is one assignment statement or a set of such
  1080. statements, separated by semicolons and grouped together between the
  1081. special symbols @<<@ and @>>@. In stead of a statement a matrix-name
  1082. is also allowed. Then all non-zero matrix elements are incorporated in the
  1083. search for obvious cse's. The @<@id@>@ of the optional \f3name\f1-part,
  1084. being an identifier, is used to identify
  1085. the subexpressions, produced via the application of a \f3gstructr\f1
  1086. command. When the switch ALGPRI is on -the default setting-
  1087. the output is given in REDUCE syntax, while turning it off leads to output
  1088. in prefix-form.
  1089. The latter is employed by the function R, used to store SCOPE-input
  1090. in @{roman D} sub 0@.
  1091. It is also possible to get FORTRAN-output by turning off the switch PERIOD and
  1092. turning on the switch FORTRAN.
  1093. The input remains unchanged when the switch EXP is on.
  1094. .LP
  1095. \f3Example 3.1\f1
  1096. .LP
  1097. We show part of a REDUCE session.
  1098. .vs 10
  1099. .in +3
  1100. .nf
  1101. \fC\s8
  1102. off exp$
  1103. matrix a(2,2)$
  1104. a(1,1) := x+y+z$
  1105. a(1,2) := x*y$
  1106. a(2,1) := (x+y)*x*y$
  1107. a(2,2) := (x+2*y+3)^3-x$
  1108. on fort$
  1109. off period$
  1110. load struct$
  1111. gstructr << a;
  1112. b:=(x+y)^2;
  1113. c:=(x+y)*(y+z);
  1114. d:=(x+2*y)*(y+z)*(z+x)^2
  1115. >> name v;
  1116. V1=X+Y+Z
  1117. A(1,1)=V1
  1118. A(1,2)=X*Y
  1119. V2=X+Y
  1120. A(2,1)=V2*X*Y
  1121. V3=X+2*Y+3
  1122. V4=V3**3-X
  1123. A(2,2)=V4
  1124. B=V2**2
  1125. V5=Y+Z
  1126. C=V2*V5
  1127. V6=X+2*Y
  1128. V7=X+Z
  1129. D=V6*V7**2*V5
  1130. \f1\s0
  1131. .fi
  1132. .in -3
  1133. .vs 15
  1134. .LP
  1135. .ps 11
  1136. Observe that V1, V3, V4, V6 and V7 only occur once in this result
  1137. of a \f3gstructr\f1-application. When applied as part of a SCOPE-operation
  1138. these redundancies will be removed before the actual optimization process
  1139. is performed, as shown in example 3.3.
  1140. .Sq
  1141. .sp
  1142. .LP
  1143. .ps 11
  1144. The syntax for the \f3ghorner\f1-command is very similar.
  1145. The application of a Horner-rule assumes an ordering of the identifiers.
  1146. We allow instead of the \f3name\f1-part of the \f3gstructr\f1 command an
  1147. optional \f3vorder\f1 @<@list of id.s@>@ extension.
  1148. The @<@list of id.s@>@ consists
  1149. of at least one identifier. This list overrules, in the order given, the
  1150. current identifier ordering of the system. The rhs's are considered as
  1151. polynomials in the leftmost element of the \f3vorder\f1-list. The thus created
  1152. coefficients are in turn considered as polynomials in the second element
  1153. of this list. And so on. When the \f3vorder\f1-extension is omitted the
  1154. current system identifier ordering is applied.
  1155. The internal switch ALGPRI is again applicable and has the same meaning as for
  1156. \f3gstructr\f1.
  1157. .br
  1158. Some optimizing compilers apply Horner-rules when possible. Our optimization
  1159. strategy is based on an all levels, all expressions search. This contradicts
  1160. the Horner-mechanism. To avoid destabilizing side-effects of
  1161. Horner-rule applications we decided to bring such a facility under user-control.
  1162. .LP
  1163. \f3Example 3.2
  1164. .LP
  1165. Some Taylor-expansions are shown.
  1166. .LP
  1167. .vs 10
  1168. .in +3
  1169. .nf
  1170. \fC\s8
  1171. algebraic procedure taylor(fx,x,x0,n);
  1172. sub(x=x0,fx)+for k:=1:n sum (sub(x=x0,df(fx,x,k))*(x-x0)^k/
  1173. (if k<3 then k else for j:=2:k product j))$
  1174. let x^4=0,y^7=0$
  1175. f1:=(taylor(e^x,x,0,4)*taylor(cos y,y,0,6))^2;
  1176. 3 6 3 4 3 2 3 2 6 2
  1177. F1 := - (8*X *Y - 60*X *Y + 180*X *Y - 180*X + 12*X *Y - 90*X *
  1178. 4 2 2 2 6 4 2
  1179. Y + 270*X *Y - 270*X + 12*X*Y - 90*X*Y + 270*X*Y -
  1180. 6 4 2
  1181. 270*X + 6*Y - 45*Y + 135*Y - 135)/135
  1182. load horner$
  1183. ghorner << f1:=f1;
  1184. g1:=taylor(e^x,x,0,4);
  1185. h1:=taylor(cos y,y,0,6);
  1186. f1:=(g1*h1)^2 >> vorder y,x;
  1187. 2
  1188. F1 := ((135 + X*(270 + X*(270 + X*180))) + Y *(( - 135 + X*( - 270 +
  1189. 2
  1190. X*( - 270 + X*(-180)))) + Y *((45 + X*(90 + X*(90 + X
  1191. 2
  1192. *60))) + Y *( - 6 + X*( - 12 + X*( - 12 + X*(-8
  1193. )))))))/135
  1194. 6 + X*(6 + X*(3 + X))
  1195. G1 := -----------------------
  1196. 6
  1197. 2 2 2
  1198. 720 + Y *( - 360 + Y *(30 + Y *(-1)))
  1199. H1 := ---------------------------------------
  1200. 720
  1201. 2 2 2 2 2
  1202. (6 + X*(6 + X*(3 + X))) * ( - 720 + Y *(360 + Y *( - 30 + Y )))
  1203. F1 := -------------------------------------------------------------------
  1204. 18662400
  1205. \f1\s0
  1206. .fi
  1207. .in -3
  1208. .vs 15
  1209. .Sq
  1210. .LP
  1211. Both commands can be used inside an \f3optimize\f1-command. We advise to compile
  1212. both facilities and SCOPE separately (see also Appendix 1).
  1213. .LP
  1214. .ps 11
  1215. To be able to order the application of either a \f3gstructr\f1-command or a
  1216. \f3ghorner\f1-rewrite instruction inside the definition of a SCOPE-operation
  1217. we have to extend the rules given in section 2.2. The permissible structures
  1218. for the <object>'s to be elaborated by SCOPE are simply extended with
  1219. syntactically correct \f3ghorner\f1- and \f3gstructr\f1-commands. Hence the
  1220. structure of an \f3optimize\f1-command is not altered, as is shown by the
  1221. following two examples.
  1222. .LP
  1223. .ps 11
  1224. \f3Example 3.3\f1
  1225. .LP
  1226. We show the effect of an application of the \f3optimize\f1-command
  1227. on the \f3gstructr\f1-command of example 3.1.
  1228. Observe that the cse-names produced during optimization begin with an S,
  1229. while \f3gstructr\f1 created names start with a V.
  1230. .in +3
  1231. .vs 10
  1232. .nf
  1233. \fC\s8
  1234. on fort,acinfo$
  1235. off exp,period$
  1236. optimize gstructr << a;
  1237. b:=(x+y)^2;
  1238. c:=(x+y)*(y+z);
  1239. d:=(x+2*y)*(y+z)*(z+x)^2
  1240. >> name v
  1241. iname s;
  1242. A(1,1) := X + Y + Z
  1243. A(1,2) := X*Y
  1244. V2 := X + Y
  1245. A(2,1) := V2*X*Y
  1246. 3
  1247. A(2,2) := (X + 2*Y + 3) - X
  1248. 2
  1249. B := V2
  1250. V5 := Y + Z
  1251. C := V2*V5
  1252. 2
  1253. D := (X + 2*Y)*(X + Z) *V5
  1254. Number of operations in the input is:
  1255. Number of (+,-)-operations : 9
  1256. Number of (*)-operations : 8
  1257. Number of integer exponentiations : 3
  1258. Number of other operations : 0
  1259. S5=X+Z
  1260. A(1,1)=S5+Y
  1261. S8=Y*X
  1262. A(1,2)=S8
  1263. V2=X+Y
  1264. A(2,1)=S8*V2
  1265. S6=X+2*Y
  1266. S4=S6+3
  1267. A(2,2)=S4*S4*S4-X
  1268. B=V2*V2
  1269. V5=Y+Z
  1270. C=V5*V2
  1271. D=S6*S5*S5*V5
  1272. Number of operations after optimization is:
  1273. Number of (+,-)-operations : 7
  1274. Number of (*)-operations : 10
  1275. Number of integer exponentiations : 0
  1276. Number of other operations : 0
  1277. \f1\s0
  1278. .fi
  1279. .in -3
  1280. .vs 15
  1281. .Sq
  1282. \f3Example 3.4\f1
  1283. .LP
  1284. .ps 11
  1285. For completeness we also show how to use the Horner-facilities inside an
  1286. \f3optimize\f1-command. Due to the structure of the method -we operate
  1287. internally on expanded forms- both representations of h1, and thus also
  1288. of the corresponding prefix representations used to built @{roman D} sub 0@
  1289. slightly differ. The consequences are visualized in the results of the
  1290. SCOPE-application.
  1291. .LP
  1292. .in +3
  1293. .vs 10
  1294. .nf
  1295. \fC\s8
  1296. load scope$
  1297. optimize ghorner <<h1:=taylor(cos y,y,0,6);
  1298. f1:=(taylor(e^x,x,0,4)*h1)^2>> vorder y,x
  1299. iname s;
  1300. 2 2 2
  1301. 720 + Y *( - 360 + Y *(30 + Y *(-1)))
  1302. H1 := ---------------------------------------
  1303. 720
  1304. 2 2 2 2 2
  1305. (6 + X*(6 + X*(3 + X))) * ( - 720 + Y *(360 + Y *( - 30 + Y )))
  1306. F1 := -------------------------------------------------------------------
  1307. 18662400
  1308. Number of operations in the input is:
  1309. Number of (+,-)-operations : 9
  1310. Number of (*)-operations : 8
  1311. Number of integer exponentiations : 8
  1312. Number of other operations : 2
  1313. S6 := Y*Y
  1314. 720 + S6*(S6*(30 - 1*S6) - 360)
  1315. H1 := ---------------------------------
  1316. 720
  1317. S7 := (S6*(360 + S6*(S6 - 30)) - 720)*(6 + X*(6 + X*(3 + X)))
  1318. S7*S7
  1319. F1 := ----------
  1320. 18662400
  1321. Number of operations after optimization is:
  1322. Number of (+,-)-operations : 9
  1323. Number of (*)-operations : 10
  1324. Number of integer exponentiations : 0
  1325. Number of other operations : 2
  1326. \f1\s0
  1327. .fi
  1328. .in -3
  1329. .vs 15
  1330. .Sq
  1331. .NH
  1332. Generation of Declarations
  1333. .LP
  1334. The GENTRAN \f3declare\f1-statement can also be used as an optional extension of
  1335. the \f3optimize\f1-command. An illustration of this facility is given in
  1336. example 4.1. The syntax of such a statement is in accordance with the
  1337. GENTRAN-rules [3]. We also use the symbol-table of GENTRAN.
  1338. During parsing, the declared identifiers and/or array- and matrix-names
  1339. are entered in the symbol-table. Once optimization is finished
  1340. all relevant information for completing the declarations is known,
  1341. and collected in the prefixlist, which is used for output-production.
  1342. This prefixlist is employed to decide which not yet
  1343. typed identifiers and system selected cse-names have to be
  1344. entered in the symbol-table. We make use of already known information,
  1345. expression-structure and the normal hierarchy in data types.
  1346. The strategy to achieve this is essentially based on chapter 6 of [1].
  1347. Once this table is completed a list of declarations is produced
  1348. if the switch OPTDECS is turned on.
  1349. SCOPE-output is by default given in REDUCE syntax. Alternative output is
  1350. obtained by assigning a relevant value to the global identifier Optlang!*.
  1351. This causes GENTRAN to take over the output preparation, as shown in:
  1352. .LP
  1353. \f3Example 4.1\f1
  1354. .LP
  1355. .in +3
  1356. .vs 10
  1357. .nf
  1358. \fC\s8
  1359. on optdecs$
  1360. off acinfo$
  1361. optlang!*:='fortran$
  1362. optimize {x(i+1):=a(i+1,i-1)+b(i),y(i-1):=a(i-1,i+1)-b(i)}
  1363. iname s
  1364. declare << a(4,4),x(4),y(5):real; b(5):integer >>;
  1365. INTEGER B(5),I,S1,S3
  1366. DOUBLE PRECISION A(4,4),S4,X(4),Y(5)
  1367. S1=I+1
  1368. S3=I-1
  1369. S4=B(I)
  1370. X(S1)=A(S1,S3)+S4
  1371. Y(S3)=A(S3,S1)-S4
  1372. optlang!*:='c$
  1373. optimize {x(i+1):=a(i+1,i-1)+b(i),y(i-1):=a(i-1,i+1)-b(i)}
  1374. iname s
  1375. declare << a(4,4),x(4),y(5):real; b(5):integer >>;
  1376. LONG B[6],I,S1,S3;
  1377. DOUBLE A[5][5],S4,X[5],Y[6];
  1378. {
  1379. S1=I+1;
  1380. S3=I-1;
  1381. S4=B[I];
  1382. X[S1]=A[S1][S3]+S4;
  1383. Y[S3]=A[S3][S1]-S4;
  1384. }
  1385. optlang!*:='pascal$
  1386. optimize {x(i+1):=a(i+1,i-1)+b(i),y(i-1):=a(i-1,i+1)-b(i)}
  1387. iname s
  1388. declare << a(4,4),x(4),y(5):real; b(5):integer >>;
  1389. VAR
  1390. B[0..5],I,S1,S3: INTEGER;
  1391. A[0..4,0..4],S4,X[0..4],Y[0..5]: REAL;
  1392. BEGIN
  1393. S1:=I+1;
  1394. S3:=I-1;
  1395. S4:=B[I];
  1396. X[S1]:=A[S1,S3]+S4;
  1397. Y[S3]:=A[S3,S1]-S4
  1398. END;
  1399. optlang!*:='ratfor$
  1400. optimize {x(i+1):=a(i+1,i-1)+b(i),y(i-1):=a(i-1,i+1)-b(i)}
  1401. iname s
  1402. declare << a(4,4),x(4),y(5):real; b(5):integer >>;
  1403. INTEGER B(5),I,S1,S3
  1404. DOUBLE PRECISION A(4,4),S4,X(4),Y(5)
  1405. {
  1406. S1=I+1
  1407. S3=I-1
  1408. S4=B(I)
  1409. X(S1)=A(S1,S3)+S4
  1410. Y(S3)=A(S3,S1)-S4
  1411. }
  1412. %%% The following command restores the initial situation. %%%
  1413. optlang!*:='nil$
  1414. \f1\s0
  1415. .fi
  1416. .in -3
  1417. .vs 15
  1418. .Sq
  1419. .sp
  1420. .LP
  1421. .NH
  1422. File Management and Optimization Strategies
  1423. .LP
  1424. Another alternative for the <object>'s to be optimized is formed by the
  1425. sequence \f3in\f1 @{roman file} sub 1@, @{roman file} sub 2@, ...,
  1426. @{roman file} sub n@, @n~>=~1@. Each of these files is assumed to contain
  1427. one or a list of more assignment statements, obeying the GENTRAN-assignment
  1428. rules. A SCOPE application results in a unified sequence of assignment
  1429. statements in the target language. This is illustrated by the following
  1430. example, where each file fi contains one assignment statement of the form
  1431. ei := some expression.
  1432. .LP
  1433. .ps 11
  1434. \f3Example 5.1\f1
  1435. .LP
  1436. .ps 11
  1437. .LP
  1438. .in +3
  1439. .vs 10
  1440. .nf
  1441. \fC\s8
  1442. 3: optimize in f1,f2,f3 iname s;
  1443. 2
  1444. 2 (X + Y) 8 2 2
  1445. 2*(SIN(X) - COS(E ) + 3*COS(X)) *(X + Y) + 4*Y + 4*Y
  1446. E1 := ----------------------------------------------------------------
  1447. 3*X + 2*Y
  1448. E2 := (4*
  1449. 2
  1450. 2 (X + Y) 2 3 2
  1451. (SIN(X) - COS(E ) + 2*COS(X)) *(X + Y) + (4*X - 4*Y)
  1452. 2
  1453. - 6*X)/(8*X + 3*Y - 2*X)
  1454. 2
  1455. (X + Y)
  1456. E3 := (4*SIN(COS(E )) + SIN(X + Y) +
  1457. 2 2
  1458. (4*X - X + 2*Y) )/(3*Y + F(X,G( - COS(X))))
  1459. Number of operations in the input is:
  1460. Number of (+,-)-operations : 21
  1461. Number of (*)-operations : 20
  1462. Number of integer exponentiations : 12
  1463. Number of other operations : 16
  1464. S3 := SIN(X)
  1465. S7 := X + Y
  1466. S6 := S7*S7
  1467. S6
  1468. S4 := COS(E )
  1469. S8 := COS(X)
  1470. S28 := S3*S3 - S4
  1471. S2 := S28 + 3*S8
  1472. S36 := S2*S2
  1473. S35 := S36*S36
  1474. S30 := 2*Y
  1475. S9 := S30 + 3*X
  1476. 2*(2*Y + S30*Y + S6*S35*S35)
  1477. E1 := ------------------------------
  1478. S9
  1479. S12 := S28 + 2*S8
  1480. S29 := 4*X*X
  1481. S27 := S29 - X
  1482. S31 := 3*Y
  1483. S29 - 2*S9 + 4*S6*S12*S12*S7
  1484. E2 := ------------------------------
  1485. S31 + 2*S27
  1486. S18 := S30 + S27
  1487. 4*SIN(S4) + SIN(S7) + S18*S18
  1488. E3 := -------------------------------
  1489. S31 + F(X,G( - S8))
  1490. Number of operations after optimization is:
  1491. Number of (+,-)-operations : 15
  1492. Number of (*)-operations : 24
  1493. Number of integer exponentiations : 0
  1494. Number of other operations : 11
  1495. \f1\s0
  1496. .fi
  1497. .in -3
  1498. .vs 15
  1499. .Sq
  1500. .Lp
  1501. .ps 11
  1502. .br
  1503. However a switch is available for stepwise performing the optimization of
  1504. a set of assignment statements, distributed over different files.
  1505. When turning on this AGAIN switch the finishing touch is not done.
  1506. Moreover, the system is instructed to save relevant internal information
  1507. in combination with the result of the present optimization run. The
  1508. thus extended output is assumed to be stored in a file. When the
  1509. optimization task is continued during another session this file is
  1510. assumed to be read before all other remaining files.
  1511. This mode of operation is illustrated in
  1512. .LP
  1513. .ps 11
  1514. \f3Example 5.2\f1
  1515. .LP
  1516. .ps 11
  1517. .LP
  1518. .in +3
  1519. .vs 10
  1520. .nf
  1521. \fC\s8
  1522. 2: off acinfo$
  1523. 3: on again$
  1524. 4: out f5$
  1525. 5: optimize in f1,f2 iname s;
  1526. 6: write "end$"$
  1527. 7: shut f5$
  1528. 8: off again$
  1529. 9: on acinfo$
  1530. 10: optimize in f5,f3 iname t;
  1531. S7 := X + Y
  1532. 2
  1533. S6 := S7
  1534. S8 := COS(X)
  1535. 2 S6
  1536. S18 := SIN(X) - COS(E )
  1537. S9 := 3*X + 2*Y
  1538. 2 8
  1539. 4*Y + 4*Y + 2*S6*(S18 + 3*S8)
  1540. E1 := ---------------------------------
  1541. S9
  1542. 2
  1543. S15 := X
  1544. 2
  1545. 4*S15 - 2*S9 + 4*S6*(S18 + 2*S8) *S7
  1546. E2 := --------------------------------------
  1547. 8*S15 - 2*X + 3*Y
  1548. 2
  1549. (X + Y)
  1550. E3 := (4*SIN(COS(E )) + SIN(X + Y) +
  1551. 2 2
  1552. (4*X - X + 2*Y) )/(3*Y + F(X,G( - COS(X))))
  1553. Number of operations in the total input, i.e. in the 2 input sets is:
  1554. Number of (+,-)-operations : 22
  1555. Number of (*)-operations : 20
  1556. Number of integer exponentiations : 13
  1557. Number of other operations : 17
  1558. T17 := X + Y
  1559. T16 := T17*T17
  1560. S8 := COS(X)
  1561. T1 := SIN(X)
  1562. T16
  1563. T2 := COS(E )
  1564. S18 := T1*T1 - T2
  1565. T28 := 2*Y
  1566. S9 := T28 + 3*X
  1567. T6 := S18 + 3*S8
  1568. T36 := T6*T6
  1569. T35 := T36*T36
  1570. 2*(2*Y + T28*Y + T35*T35*T16)
  1571. E1 := -------------------------------
  1572. S9
  1573. S15 := X*X
  1574. T9 := S18 + 2*S8
  1575. T30 := 4*S15
  1576. T26 := T30 - X
  1577. T29 := 3*Y
  1578. T30 - 2*S9 + 4*T17*T9*T9*T16
  1579. E2 := ------------------------------
  1580. T29 + 2*T26
  1581. T19 := T28 + T26
  1582. 4*SIN(T2) + SIN(T17) + T19*T19
  1583. E3 := --------------------------------
  1584. T29 + F(X,G( - S8))
  1585. Number of operations after optimization is:
  1586. Number of (+,-)-operations : 15
  1587. Number of (*)-operations : 24
  1588. Number of integer exponentiations : 0
  1589. Number of other operations : 11
  1590. \f1\s0
  1591. .fi
  1592. .in -3
  1593. .vs 15
  1594. .Sq
  1595. .LP
  1596. .ps 11
  1597. Since the construction of declarations in combination with some optimization
  1598. activity is based on a quite specific use of GENTRAN's symbol table, one has
  1599. to operate carefully when optimizing input in different sessions. A correct
  1600. list of declarations is only guaranteed, when the last optimization-command
  1601. is extended with the required declaration-information.
  1602. .NH
  1603. Some Possible Shortcomings and Future Versions
  1604. .LP
  1605. .ps 11
  1606. The present version of SCOPE may have some shortcomings and possibly also
  1607. some inefficiencies. However, since we are working on a second version,
  1608. as stated in [13], we do not have the intention to largely modify the
  1609. present version.
  1610. However, we intend to improve one special aspect of the present SCOPE-version:
  1611. The combined use of SCOPE and GENTRAN. This preliminary version of the manual
  1612. will shortly be extended with the description of these combined features.
  1613. .br
  1614. Bugs and obvious deficiencies will of course be removed.
  1615. .sp
  1616. \f3Acknowledgements\f1
  1617. .LP
  1618. .ps 11
  1619. The many discussions I had over the past years with Barbara L. Gates,
  1620. Victor V. Goldman, Anthony C. Hearn, Jaap Smit and Paul S. Wang about the
  1621. symbolic-numeric aspects of computer algebra have been very stimulating and
  1622. valuable. They also contributed to the present status of SCOPE.
  1623. .br
  1624. Completion of the code would have been impossible without the dedicated
  1625. assistance of my students and the frequent discussions we had.
  1626. I certainly want to mention Ben Hulshof, Pim van den Heuvel, Marcel van
  1627. Heerwaarden, Anco Smit, Johan de Boer and Jan Verheul.
  1628. .bp
  1629. \f3References\f1
  1630. .LP
  1631. .KS
  1632. .IP [1]
  1633. Aho, A.V., Sethi, R., Ullman, J.D.: \f2Compilers Principles, Techniques
  1634. and Tools\f1. Reading, Mass.: Addison-Wesley (1986).
  1635. .KE
  1636. .KS
  1637. .IP [2]
  1638. Breuer, M.A.: "Generation of optimal code for expressions via factorization",
  1639. \f2Comm. ACM\f1 \f312, 6\f1, 330-340 (1969).
  1640. .KE
  1641. .KS
  1642. .IP [3]
  1643. Gates, B.L.: "GENTRAN: An automatic code generation facility for REDUCE",
  1644. \f2A.C.M. SIGSAM Bulletin\f1 \f319, 3\f1, 24-42. New York: A.C.M. (1985).
  1645. .KE
  1646. .KS
  1647. .IP [4]
  1648. Gates, B.L., Wang,P.S.: "A LISP-based RATFOR code generator", \f2Proceedings
  1649. 1984 MACSYMA User's Conference\f1 (V.E. Golden, ed.), 319-329.
  1650. Schenectady, N.Y.: Gen. El. (1984).
  1651. .KE
  1652. .KS
  1653. .IP [5]
  1654. Goldman, V.V., van Hulzen, J.A.: "Automatic code vectorization of arithmetic
  1655. expressions by bottom-up structure recognition", \f2Computer Algebra and
  1656. Parallelism\f1 (J. Della Dora and J. Fitch, ed.s), 119-132. London: Academic
  1657. Press (1989).
  1658. .KE
  1659. .KS
  1660. .IP [6]
  1661. Gonzales, T., Ja' Ja', J.: "Evaluation of arithmetic expressions with
  1662. algebraic identities", \f2SIAM J. Comp.\f1, \f311\f1, \f34\f1, 633-662
  1663. (1982).
  1664. .KE
  1665. .KS
  1666. .IP [7]
  1667. Hearn, A.C.: "Structure: The key to improved algebraic computation",
  1668. \f2Proceedings RSYMSAC2\f1 (N. Inada en T. Soma, ed.'s), 215-230.
  1669. Singapore: World Scientific Publ. (1985).
  1670. .KE
  1671. .KS
  1672. .IP [8]
  1673. Hearn, A.C.: "Optimal evaluation of algebraic expressions", \f2Proceedings
  1674. AAECC-3\f1 (J. Calmet, ed.), Springer LNCS series nr \f3229\f1, 392-403.
  1675. Heidelberg: Springer Verlag (1986).
  1676. .KE
  1677. .KS
  1678. .IP [9]
  1679. Hearn, A.C.: \f2REDUCE user's manual, version 3.3\f1. Santa Monica, CA:
  1680. The Rand Corporation (1988).
  1681. .KE
  1682. .KS
  1683. .IP [10]
  1684. van den Heuvel, P., van Hulzen, J.A., Goldman, V.V.: "Automatic generation
  1685. of FORTRAN-coded Jacobians and Hessians", \f2Proceedings EUROCAL '87\f1 (J.H.
  1686. Davenport, ed.), Springer LNCS-series nr \f3378\f1, 120-131. Heidelberg:
  1687. Springer Verlag (1989).
  1688. .KE
  1689. .KS
  1690. .IP [11]
  1691. van Hulzen, J.A.: "Breuer's grow factor algorithm in computer
  1692. algebra", \f2Proceedings SYMSAC '81\f1 (P.S. Wang, ed.), 100-104. New York:
  1693. ACM-Press (1981).
  1694. .KE
  1695. .KS
  1696. .IP [12]
  1697. van Hulzen, J.A.: "Code optimization of multivariate polynomial schemes:
  1698. A pragmatic approach", \f2Proceedings EUROCAL '83\f1, (J.A. van Hulzen, ed.),
  1699. Springer LNCS series \f3162\f1, 286-300. Heidelberg: Springer Verlag (1983).
  1700. .KE
  1701. .KS
  1702. .IP [13]
  1703. van Hulzen, J.A.: "Current trends in source-code optimization", \f2Proceedings
  1704. JINR IV Conference on Computer Algebra and its Applications in Theoretical
  1705. Physics\f1, Dubna, May 20-25, 1990 (to appear). Also available as Memorandum
  1706. \f3INF-90-41\f1, Department of Computer Science, Uniersity Twente (1990).
  1707. .KE
  1708. .KS
  1709. .IP [14]
  1710. Johnson, D.B., Miller, W., Minnihan, B., Wrathall, C.: "Reducibility among
  1711. floating-point graphs", \f2J. ACM\f1 \f326\f1, \f34\f1, 739-760 (1979).
  1712. .KE
  1713. .KS
  1714. .IP [15]
  1715. Knuth, D.E.: "An empirical study of Fortran programs", \f2Software Practice
  1716. and Experience\f1 \f31\f1, 105-133 (1971).
  1717. .KE
  1718. .KS
  1719. .IP [16]
  1720. Knuth, D.E.: \f2The art of computer programming, Vol.\f1 \f32\f1
  1721. (Second Edition). Reading, Mass.: Addison-Wesley (1980).
  1722. .KE
  1723. .KS
  1724. .IP [17]
  1725. Smit, J., van Hulzen, J.A., Hulshof, B.J.A.: "NETFORM and code optimizer
  1726. manual", \f2A.C.M. SIGSAM Bulletin\f1 \f315,4\f1, 23-32.
  1727. New York: A.C.M. (1981).
  1728. .KE
  1729. .KS
  1730. .IP [18]
  1731. Smit, J., van Hulzen, J.A.: "Symbolic-numeric methods in microwave
  1732. technology", \f2Proceedings EUROCAM '82\f1 (J. Calmet, ed.), Springer
  1733. LNCS series \f3144\f1, 281-288. Heidelberg: Springer Verlag (1982).
  1734. .KE
  1735. .KS
  1736. .IP [19]
  1737. Wang, P.S., Chang, T.Y.P., van Hulzen, J.A.: "Code generation and
  1738. optimization for finite element analysis", \f2Proceedings EUROSAM '84
  1739. \f1, (J.P. Fitch ed.), Springer LNCS series \f3174\f1,
  1740. 237-247. Heidelberg: Springer Verlag (1984).
  1741. .KE
  1742. .bp
  1743. \f3Appendix 1: How to install the Code\f1
  1744. .sp
  1745. .LP
  1746. .ps 11
  1747. The code consists of a number of modules, collected in five files. Two of
  1748. these modules play a special role and can best be compiled separately:
  1749. gstructr, defining the \f3gstructr\f1 facilities, and ghorner, containing
  1750. the code for the Horner-rules.
  1751. .LP
  1752. .ps 11
  1753. The other modules form SCOPE. Since @{roman D} sub 0@ and all operations on it
  1754. and on its later versions @{roman D} sub i@ are defined using \f3smacros's\f1
  1755. it is essential to read in the module cosmac, containing these \f3smacro's\f1,
  1756. first. Since we also use part of
  1757. the GENTRAN code care have to be taken that GENTRAN is loaded when
  1758. compiling the code.