lalr.scm 76 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134
  1. ;;; nyacc/lalr.scm
  2. ;; Copyright (C) 2014-2018 Matthew R. Wette
  3. ;;
  4. ;; This library is free software; you can redistribute it and/or
  5. ;; modify it under the terms of the GNU Lesser General Public
  6. ;; License as published by the Free Software Foundation; either
  7. ;; version 3 of the License, or (at your option) any later version.
  8. ;;
  9. ;; This library is distributed in the hope that it will be useful,
  10. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. ;; Lesser General Public License for more details.
  13. ;;
  14. ;; You should have received a copy of the GNU Lesser General Public License
  15. ;; along with this library; if not, see <http://www.gnu.org/licenses/>
  16. ;;; Notes:
  17. ;; I need to find way to preserve srconf, rrconf after hashify.
  18. ;; compact needs to deal with it ...
  19. ;;; Code:
  20. (define-module (nyacc lalr)
  21. #:export (lalr-spec process-spec
  22. make-lalr-machine compact-machine hashify-machine
  23. lalr-start lalr-match-table
  24. restart-spec add-recovery-logic!
  25. pp-lalr-notice pp-lalr-grammar pp-lalr-machine
  26. write-lalr-actions write-lalr-tables
  27. pp-rule find-terminal gen-match-table) ; used by (nyacc bison)
  28. #:re-export (*nyacc-version*)
  29. #:use-module ((srfi srfi-1) #:select (fold fold-right remove lset-union
  30. lset-intersection lset-difference))
  31. #:use-module ((srfi srfi-9) #:select (define-record-type))
  32. #:use-module (srfi srfi-43)
  33. #:use-module (nyacc util)
  34. #:use-module (nyacc version)
  35. #:use-module (ice-9 pretty-print)
  36. )
  37. ;; token values for default reduction and erro, sync with parser.scm
  38. ;; used in hashify-machine, compact-machine
  39. (define $default 1)
  40. (define $error 2)
  41. ;; @deffn {Procedure} proxy-? sym rhs
  42. ;; @example
  43. ;; (LHS (($? RHS))
  44. ;; ($P (($$ #f))
  45. ;; ($P RHS ($$ (set-cdr! (last-pair $1) (list $2)) $1)))
  46. ;; @end example
  47. ;; @end deffn
  48. (define (proxy-? sym rhs)
  49. (list sym
  50. (list '(action #f #f (list)))
  51. rhs))
  52. ;; @deffn {Procedure} proxy-+ sym rhs
  53. ;; @example
  54. ;; (LHS (($* RHS))
  55. ;; ($P (($$ '()))
  56. ;; ($P RHS ($$ (set-cdr! (last-pair $1) (list $2)) $1)))
  57. ;; @end example
  58. ;; @end deffn
  59. (define (proxy-* sym rhs)
  60. (if (pair? (filter (lambda (elt) (eqv? 'action (car elt))) rhs))
  61. (error "no RHS action allowed")) ;; rhs
  62. (list
  63. sym
  64. (list '(action #f #f (list)))
  65. (append (cons (cons 'non-terminal sym) rhs)
  66. (list '(action #f #f
  67. (set-cdr! (last-pair $1) (list $2))
  68. $1)))))
  69. ;; @deffn {Procedure} proxy-+ sym rhs
  70. ;; @example
  71. ;; (LHS (($+ RHS))
  72. ;; ($P (RHS ($$ (list $1)))
  73. ;; ($P RHS ($$ (set-cdr! (last-pair $1) (list $2)) $1)))
  74. ;; @end example
  75. ;; @end deffn
  76. (define (proxy-+ sym rhs)
  77. (if (pair? (filter (lambda (elt) (eq? 'action (car elt))) rhs))
  78. (error "no RHS action allowed")) ;; rhs
  79. (list
  80. sym
  81. (append rhs (list '(action #f #f (list $1))))
  82. (append (cons (cons 'non-terminal sym) rhs)
  83. (list '(action #f #f
  84. (set-cdr! (last-pair $1) (list $2))
  85. $1)))))
  86. ;; @deffn {Procedure} reserved? grammar-symbol
  87. ;; Determine whether the syntax argument is a reserved symbol, that is.
  88. ;; So instead of writing @code{'$fixed} for syntax one can write
  89. ;; @code{$fixed}. We may want to change this to
  90. ;; @example
  91. ;; (reserved-terminal? grammar-symbol)
  92. ;; (reserved-non-term? grammar-symbol)
  93. ;; @end example
  94. ;; @end deffn
  95. (define (reserved? grammar-symbol)
  96. ;; If the first character `$' then it's reserved.
  97. (eqv? #\$ (string-ref (symbol->string (syntax->datum grammar-symbol)) 0)))
  98. ;; @deffn {Syntax} lalr-spec grammar => spec
  99. ;; This routine reads a grammar in a scheme-like syntax and returns an a-list.
  100. ;; This spec' can be an input for @item{make-parser-generator} or
  101. ;; @item{pp-spec}.
  102. ;;.This will return the specification. Notably the grammar will have rhs
  103. ;; arguments decorated with type (e.g., @code{(terminal . #\,)}).
  104. ;; Each production rule in the grammar will be of the form
  105. ;; @code{(lhs rhs1 rhs2 ...)} where each element of the RHS is one of
  106. ;; @itemize
  107. ;; @item @code{('terminal . atom)}
  108. ;; @item @code{('non-terminal . symbol)}
  109. ;; @item @code{('action . (ref narg guts)}
  110. ;; @end itemize
  111. ;; Currently, the number of arguments for items is computed in the routine
  112. ;; @code{process-grammar}.
  113. ;; @end deffn
  114. (define-syntax parse-rhs
  115. (lambda (x)
  116. ;; The following is syntax-case because we use a fender.
  117. (syntax-case x (quote $$ $$/ref $$-ref $prec $empty $? $* $+)
  118. ;; action specifications
  119. ((_ ($$ <guts> ...) <e2> ...)
  120. (syntax (cons '(action #f #f <guts> ...) (parse-rhs <e2> ...))))
  121. ((_ ($$-ref <ref>) <e2> ...)
  122. (syntax (cons `(action #f ,<ref> . #f) (parse-rhs <e2> ...))))
  123. ((_ ($$/ref <ref> <guts> ...) <e2> ...)
  124. (syntax (cons `(action #f ,<ref> <guts> ...) (parse-rhs <e2> ...))))
  125. ;; other internal $-syntax
  126. ((_ ($prec <tok>) <e2> ...)
  127. (syntax (cons (cons 'prec <tok>) (parse-rhs <e2> ...))))
  128. ((_ $empty <e2> ...) ; TODO: propagate to processor
  129. (syntax (parse-rhs <e2> ...)))
  130. ;; (experimental) proxies
  131. ((_ ($? <s1> <s2> ...) <e2> ...)
  132. (syntax (cons (cons* 'proxy proxy-? (parse-rhs <s1> <s2> ...))
  133. (parse-rhs <e2> ...))))
  134. ((_ ($+ <s1> <s2> ...) <e2> ...)
  135. (syntax (cons (cons* 'proxy proxy-+ (parse-rhs <s1> <s2> ...))
  136. (parse-rhs <e2> ...))))
  137. ((_ ($* <s1> <s2> ...) <e2> ...)
  138. (syntax (cons (cons* 'proxy proxy-* (parse-rhs <s1> <s2> ...))
  139. (parse-rhs <e2> ...))))
  140. ;; terminals and non-terminals
  141. ((_ (quote <e1>) <e2> ...)
  142. (syntax (cons '(terminal . <e1>) (parse-rhs <e2> ...))))
  143. ((_ (<f> ...) <e2> ...)
  144. (syntax (cons (<f> ...) (parse-rhs <e2> ...))))
  145. ((_ <e1> <e2> ...)
  146. (identifier? (syntax <e1>)) ; fender to trap non-term's
  147. (if (reserved? (syntax <e1>))
  148. (syntax (cons '(terminal . <e1>) (parse-rhs <e2> ...)))
  149. (syntax (cons '(non-terminal . <e1>) (parse-rhs <e2> ...)))))
  150. ((_ <e1> <e2> ...)
  151. (syntax (cons '(terminal . <e1>) (parse-rhs <e2> ...))))
  152. ((_) (syntax (list))))))
  153. (define-syntax parse-rhs-list
  154. (syntax-rules ()
  155. ((_ (<ex> ...) <rhs> ...)
  156. (cons (parse-rhs <ex> ...)
  157. (parse-rhs-list <rhs> ...)))
  158. ((_) '())))
  159. (define-syntax parse-grammar
  160. (syntax-rules ()
  161. ((_ (<lhs> <rhs> ...) <prod> ...)
  162. (cons (cons '<lhs> (parse-rhs-list <rhs> ...))
  163. (parse-grammar <prod> ...)))
  164. ((_) '())))
  165. (define-syntax parse-precedence
  166. (syntax-rules (left right nonassoc)
  167. ((_ (left <tk> ...) <ex> ...)
  168. (cons (cons 'left (list <tk> ...))
  169. (parse-precedence <ex> ...)))
  170. ((_ (right <tk> ...) <ex> ...)
  171. (cons (cons 'right (list <tk> ...))
  172. (parse-precedence <ex> ...)))
  173. ((_ (nonassoc <tk> ...) <ex> ...)
  174. (cons (cons 'nonassoc (list <tk> ...))
  175. (parse-precedence <ex> ...)))
  176. ((_ <tk> <ex> ...)
  177. (cons (list 'undecl <tk>)
  178. (parse-precedence <ex> ...)))
  179. ((_) '())))
  180. (define-syntax lalr-spec-1
  181. (syntax-rules (start alt-start expect notice reserve prec< prec> grammar)
  182. ((_ (start <symb>) <e> ...)
  183. (cons (cons 'start '<symb>) (lalr-spec-1 <e> ...)))
  184. ((_ (alt-start <sym1> <sym2> ...) <e> ...)
  185. (cons (cons 'alt-start '(<sym1> <sym2> ...)) (lalr-spec-1 <e> ...)))
  186. ((_ (expect <n>) <e> ...)
  187. (cons (cons 'expect <n>) (lalr-spec-1 <e> ...)))
  188. ((_ (notice <str>) <e> ...)
  189. (cons (cons 'notice <str>) (lalr-spec-1 <e> ...)))
  190. ((_ (reserve <t1> ...) <e> ...)
  191. (cons (list 'reserve <t1> ...) (lalr-spec-1 <e> ...)))
  192. ((_ (prec< <ex> ...) <e> ...)
  193. (cons (cons 'precedence (parse-precedence <ex> ...))
  194. (lalr-spec-1 <e> ...)))
  195. ((_ (prec> <ex> ...) <e> ...)
  196. (cons (cons 'precedence (reverse (parse-precedence <ex> ...)))
  197. (lalr-spec-1 <e> ...)))
  198. ((_ (grammar <prod> ...) <e> ...)
  199. (cons (cons 'grammar (parse-grammar <prod> ...))
  200. (lalr-spec-1 <e> ...)))
  201. ((_) '())))
  202. (define-syntax lalr-spec
  203. (syntax-rules ()
  204. ((_ <expr> ...)
  205. (process-spec (lalr-spec-1 <expr> ...)))))
  206. ;; @deffn {Procedure} atomize terminal => object
  207. ;; Generate an atomic object for a terminal. Expected terminals are strings,
  208. ;; characters and symbols. This will convert the strings @code{s} to symbols
  209. ;; of the form @code{'$:s}.
  210. ;; @end deffn
  211. (define (atomize terminal)
  212. (if (string? terminal)
  213. (string->symbol (string-append "$:" terminal))
  214. terminal))
  215. ;; @deffn {Procedure} normize terminal => char|symbol
  216. ;; Normalize a token. This routine will normalize tokens in order to check
  217. ;; for similarities. For example, @code{"+"} and @code{#\+} are similar,
  218. ;; @code{'foo} and @code{"foo"} are similar.
  219. ;; @end deffn
  220. (define (normize terminal)
  221. (if (not (string? terminal)) terminal
  222. (if (= 1 (string-length terminal))
  223. (string-ref terminal 0)
  224. (string->symbol terminal))))
  225. ;; @deffn {Procedure} eqv-terminal? a b
  226. ;; This is a predicate to determine if the terminals @code{a} and @code{b}
  227. ;; are equivalent.
  228. ;; @end deffn
  229. (define (eqv-terminal? a b)
  230. (eqv? (atomize a) (atomize b)))
  231. ;; @deffn {Procedure} find-terminal symb term-l => term-symb
  232. ;; Find the terminal in @code{term-l} that is equivalent to @code{symb}.
  233. ;; @end deffn
  234. (define (find-terminal symb term-l)
  235. (let iter ((tl term-l))
  236. (if (null? tl) #f
  237. (if (eqv-terminal? symb (car tl)) (car tl)
  238. (iter (cdr tl))))))
  239. ;; @deffn {Procedure} process-spec tree => specification (as a-list)
  240. ;; Here we sweep through the production rules. We flatten and order the rules
  241. ;; and place all p-rules with like LHSs together. There is a non-trivial
  242. ;; amount of extra code to deal with mid-rule actions (MRAs).
  243. ;; @end deffn
  244. (define (process-spec tree)
  245. ;; Generate a new symbol. This is a helper for proxies and mid-rule-actions.
  246. ;; The counter here is the only @code{set!} in @code{process-spec}.
  247. ;; Otherwise, I believe @code{process-spec} is referentially transparent.
  248. (define gensy
  249. (let ((cntr 1))
  250. (lambda ()
  251. (let ((c cntr))
  252. (set! cntr (1+ cntr))
  253. (string->symbol (string-append "$P" (number->string c)))))))
  254. ;; Canonicalize precedence and associativity. Precedence will appear
  255. ;; as sets of equivalent items in increasing order of precedence
  256. ;; (e.g., @code{((+ -) (* /)}). The input tree has nodes that look like
  257. ;; @example
  258. ;; '(precedence (left "+" "-") (left "*" "/"))
  259. ;; '(precedence ('then "else")
  260. ;; @end example
  261. ;; @noindent
  262. ;; =>
  263. ;; @example
  264. ;; (prec ((+ -) (* /)) ((then) (else)))
  265. ;; @end example
  266. (define (prec-n-assc tree)
  267. ;; prec-l; lt-assc-l rt-assc-l non-assc-l pspec
  268. (let iter ((pll '()) (pl '()) (la '()) (ra '()) (na '())
  269. (spec '()) (tree tree))
  270. (cond
  271. ((pair? spec)
  272. ;; item ~ ('left "+" "-") => a ~ 'left, tl ~ (#\+ #\-)
  273. (let* ((item (car spec)) (as (car item)) (tl (map atomize (cdr item))))
  274. (case as
  275. ((left)
  276. (iter pll (cons tl pl) (append tl la) ra na (cdr spec) tree))
  277. ((right)
  278. (iter pll (cons tl pl) la (append tl ra) na (cdr spec) tree))
  279. ((nonassoc)
  280. (iter pll (cons tl pl) la ra (append tl na) (cdr spec) tree))
  281. ((undecl)
  282. (iter pll (cons tl pl) la ra na (cdr spec) tree)))))
  283. ((pair? pl)
  284. (iter (cons (reverse pl) pll) '() la ra na spec tree))
  285. ((pair? tree)
  286. (iter pll pl la ra na
  287. (if (eqv? 'precedence (caar tree)) (cdar tree) '()) (cdr tree)))
  288. (else
  289. (list
  290. `(prec . ,(reverse pll))
  291. `(assc (left ,@la) (right ,@ra) (nonassoc ,@na)))))))
  292. ;;.@deffn {Procedure} make-mra-proxy sy pel act => ???
  293. ;; Generate a mid-rule-action proxy.
  294. ;; @end deffn
  295. (define (make-mra-proxy sy pel act)
  296. (list sy (list (cons* 'action (length pel) (cdr act)))))
  297. ;; @deffn {Procedure} gram-check-2 tl nl err-l
  298. ;; Check for fatal: symbol used as terminal and non-terminal.
  299. ;; @end deffn
  300. (define (gram-check-2 tl nl err-l)
  301. (let ((cf (lset-intersection eqv? (map atomize tl) nl)))
  302. (if (pair? cf)
  303. (cons (fmtstr "*** symbol is terminal and non-terminal: ~S" cf)
  304. err-l) err-l)))
  305. ;; @deffn gram-check-3 ll nl err-l
  306. ;; Check for fatal: non-terminal's w/o production rule.
  307. ;; @end deffn
  308. (define (gram-check-3 ll nl err-l)
  309. (fold
  310. (lambda (n l)
  311. (if (not (memq n ll))
  312. (cons (fmtstr "*** non-terminal with no production rule: ~A" n) l)
  313. l))
  314. err-l nl))
  315. ;; @deffn {Procedure} gram-check-4 ll nl err-l
  316. ;; Check for warning: unused LHS.
  317. ;; TODO: which don't appear in OTHER RHS, e.g., (foo (foo))
  318. ;; @end deffn
  319. (define (gram-check-4 ll nl err-l)
  320. (let ((alt-start (or (assq-ref tree 'alt-start) '())))
  321. (fold
  322. (lambda (s l) (cons (fmtstr "+++ LHS not used in any RHS: ~A" s) l))
  323. err-l
  324. (let iter ((ull '()) (all ll)) ; unused LHSs, all LHS's
  325. (if (null? all) ull
  326. (iter (if (or (memq (car all) nl)
  327. (memq (car all) ull)
  328. (memq (car all) alt-start) ; new 02Sep18
  329. (eq? (car all) '$start))
  330. ull (cons (car all) ull))
  331. (cdr all)))))))
  332. ;; TODO: check for repeated tokens in precedence spec's: prec<, prec>
  333. ;; IN PROGRESS: add zero-rule $accept : start-symbol $end
  334. (let* ((gram (assq-ref tree 'grammar))
  335. (start-symbol (and=> (assq-ref tree 'start) atomize))
  336. (start-rule (lambda () (list start-symbol)))
  337. (add-el (lambda (e l) (if (member e l) l (cons e l))))
  338. (pna (prec-n-assc tree)))
  339. ;; We sweep through the grammar to generate a canonical specification.
  340. ;; Note: the local rhs is used to hold RHS terms, but a
  341. ;; value of @code{'()} is used to signal "add rule", and a value of
  342. ;; @code{#f} is used to signal ``done, proceed to next rule.''
  343. ;; We use @code{tail} below to go through all remaining rules so that any
  344. ;; like LHS get absorbed before proceeding: This keeps LHS in sequence.
  345. ;; Note: code-comm and lone-comm are added to terminals so that they end
  346. ;; up in the match-table. The parser will skip these if the automoton has
  347. ;; no associated transitions for these. This allows users to parse for
  348. ;; comments in some rules but skip the rest.
  349. (let iter ((ll '($start)) ; LHS list
  350. (@l (list ; attributes per prod' rule
  351. `((rhs . ,(vector start-symbol))
  352. (ref . all) (act 1 $1))))
  353. (tl (cons* '$error '$end ; terminals
  354. (or (assq-ref tree 'reserve) '())))
  355. (nl (list start-symbol)) ; set of non-terminals
  356. ;;
  357. (head gram) ; head of unprocessed productions
  358. (prox '()) ; proxy productions for MRA
  359. (lhs #f) ; current LHS (symbol)
  360. (tail '()) ; tail of grammar productions
  361. (rhs-l '()) ; list of RHSs being processed
  362. (attr '()) ; per-rule attributes (action, prec)
  363. (pel '()) ; processed RHS terms: '$:if ...
  364. (rhs #f)) ; elts to process: (terminal . '$:if) ...
  365. (cond
  366. ((pair? rhs)
  367. ;; Capture info on RHS term.
  368. (case (caar rhs)
  369. ((terminal)
  370. (iter ll @l (add-el (cdar rhs) tl) nl head prox lhs tail
  371. rhs-l attr (cons (atomize (cdar rhs)) pel) (cdr rhs)))
  372. ((non-terminal)
  373. (iter ll @l tl (add-el (cdar rhs) nl) head prox lhs tail
  374. rhs-l attr (cons (cdar rhs) pel) (cdr rhs)))
  375. ((action)
  376. (if (pair? (cdr rhs))
  377. ;; mid-rule action: generate a proxy (car act is # args)
  378. (let* ((sy (gensy))
  379. (pr (make-mra-proxy sy pel (cdar rhs))))
  380. (iter ll @l tl (cons sy nl) head (cons pr prox)
  381. lhs tail rhs-l attr (cons sy pel) (cdr rhs)))
  382. ;; end-rule action
  383. (iter ll @l tl nl head prox lhs tail
  384. rhs-l (acons 'action (cdar rhs) attr) pel (cdr rhs))))
  385. ((proxy)
  386. (let* ((sy (gensy))
  387. (pf (cadar rhs)) ; proxy function
  388. (p1 (pf sy (cddar rhs))))
  389. (iter ll @l tl (cons sy nl) head (cons p1 prox) lhs
  390. tail rhs-l attr (cons sy pel) (cdr rhs))))
  391. ((prec)
  392. (iter ll @l (add-el (cdar rhs) tl) nl head prox lhs tail rhs-l
  393. (acons 'prec (atomize (cdar rhs)) attr) pel (cdr rhs)))
  394. (else
  395. (error (fmtstr "bug=~S" (caar rhs))))))
  396. ((null? rhs)
  397. ;; End of RHS items for current rule.
  398. ;; Add the p-rules items to the lists ll, rl, xl, and @@l.
  399. ;; @code{act} is now:
  400. ;; @itemize
  401. ;; @item for mid-rule-action: (narg ref code)
  402. ;; @item for end-rule-action: (#f ref code)
  403. ;; @end itemize
  404. (let* ((ln (length pel))
  405. (action (assq-ref attr 'action))
  406. (nrg (if action (or (car action) ln) ln)) ; number of args
  407. (ref (if action (cadr action) #f))
  408. (act (cond
  409. ((and action (cddr action)) (cddr action))
  410. ;; if error rule then default action is print err msg:
  411. ((memq '$error pel) '((display "syntax error\n")))
  412. ((zero? nrg) '((list)))
  413. (else '($1)))))
  414. (iter (cons lhs ll)
  415. (cons (cons* (cons 'rhs (list->vector (reverse pel)))
  416. (cons* 'act nrg act) (cons 'ref ref) attr) @l)
  417. tl nl head prox lhs tail rhs-l attr pel #f)))
  418. ((pair? rhs-l)
  419. ;; Work through next RHS.
  420. (iter ll @l tl nl head prox lhs tail
  421. (cdr rhs-l) '() '() (car rhs-l)))
  422. ((pair? tail)
  423. ;; Check the next CAR of the tail. If it matches
  424. ;; the current LHS process it, else skip it.
  425. (iter ll @l tl nl head prox lhs (cdr tail)
  426. (if (eqv? (caar tail) lhs) (cdar tail) '())
  427. attr pel #f))
  428. ((pair? prox)
  429. ;; If a proxy then we have ((lhs RHS) (lhs RHS))
  430. (iter ll @l tl nl (cons (car prox) head) (cdr prox)
  431. lhs tail rhs-l attr pel rhs))
  432. ((pair? head)
  433. ;; Check the next rule-set. If the lhs has aready
  434. ;; been processed, then skip. Otherwise, copy copy
  435. ;; to tail and process.
  436. (let ((lhs (caar head)) (rhs-l (cdar head))
  437. (rest (cdr head)))
  438. (if (memq lhs ll)
  439. (iter ll @l tl nl rest prox #f '() '() attr pel #f)
  440. (iter ll @l tl nl rest prox lhs rest rhs-l attr pel rhs))))
  441. (else
  442. (let* ((al (reverse @l)) ; attribute list
  443. (err-l '())
  444. ;; symbol used as terminal and non-terminal:
  445. (err-l (gram-check-2 tl nl err-l))
  446. ;; non-terminal's w/o production rule:
  447. (err-l (gram-check-3 ll nl err-l))
  448. ;; TODO: which don't appear in OTHER RHS, e.g., (foo (foo))
  449. (err-l (gram-check-4 ll nl err-l))
  450. )
  451. (for-each (lambda (e) (fmterr "~A\n" e)) err-l)
  452. (if (pair? (filter (lambda (s) (char=? #\* (string-ref s 0))) err-l))
  453. #f
  454. (list
  455. ;; Put most referenced items first, but keep start and rhs-v at
  456. ;; top so that if we want to restart (see restart-spec) we can
  457. ;; reuse the tail here.
  458. ;;(cons 'start start-symbol) ; use lalr-start, aka rhs-v[0][0]
  459. (cons 'rhs-v (map-attr->vector al 'rhs))
  460. ;;
  461. (cons 'restart-tail #t) ; see @code{restart-spec} below
  462. (cons 'lhs-v (list->vector (reverse ll)))
  463. (cons 'non-terms nl)
  464. (cons 'terminals tl)
  465. (cons 'attr (list
  466. (cons 'expect (or (assq-ref tree 'expect) 0))
  467. (cons 'notice (assq-ref tree 'notice))))
  468. (cons 'prec (assq-ref pna 'prec)) ; lowest-to-highest
  469. (cons 'assc (assq-ref pna 'assc))
  470. (cons 'prp-v (map-attr->vector al 'prec)) ; per-rule precedence
  471. (cons 'act-v (map-attr->vector al 'act))
  472. (cons 'ref-v (map-attr->vector al 'ref)) ; action references
  473. (cons 'err-l err-l)))))))))
  474. ;;; === Code for processing the specification. ================================
  475. ;; @subsubheading Note
  476. ;; The fluid @code{*lalr-core*} is used during the machine generation
  477. ;; cycles to access core parameters of the specification. This includes
  478. ;; the list of non-terminals, the vector of left-hand side symbols and the
  479. ;; vector of vector of right-hand side symbols.
  480. (define *lalr-core* (make-fluid))
  481. ;; @deffn {Procedure} lalr-start spec => symbol
  482. ;; Return the start symbol for the grammar.
  483. ;; @end deffn
  484. (define (lalr-start spec)
  485. (vector-ref (vector-ref (assq-ref spec 'rhs-v) 0) 0))
  486. ;; This record holds the minimum data from the grammar needed to build the
  487. ;; machine from the grammar specification.
  488. (define-record-type lalr-core-type
  489. (make-lalr-core non-terms terminals lhs-v rhs-v eps-l)
  490. lalr-core-type?
  491. (non-terms core-non-terms) ; list of non-terminals
  492. (terminals core-terminals) ; list of non-terminals
  493. (lhs-v core-lhs-v) ; vec of left hand sides
  494. (rhs-v core-rhs-v) ; vec of right hand sides
  495. (eps-l core-eps-l)) ; non-terms w/ eps prod's
  496. ;; @deffn {Procedure} make-core spec => lalr-core-type
  497. ;; @end deffn
  498. (define (make-core spec)
  499. (make-lalr-core (assq-ref spec 'non-terms)
  500. (assq-ref spec 'terminals)
  501. (assq-ref spec 'lhs-v)
  502. (assq-ref spec 'rhs-v)
  503. '()))
  504. ;; @deffn {Procedure} make-core/extras spec => lalr-core-type
  505. ;; Add list of symbols with epsilon productions.
  506. ;; @end deffn
  507. (define (make-core/extras spec)
  508. (let ((non-terms (assq-ref spec 'non-terms))
  509. (terminals (assq-ref spec 'terminals))
  510. (lhs-v (assq-ref spec 'lhs-v))
  511. (rhs-v (assq-ref spec 'rhs-v)))
  512. (make-lalr-core non-terms terminals lhs-v rhs-v
  513. (find-eps non-terms lhs-v rhs-v))))
  514. ;; @section Routines
  515. ;; @deffn {Procedure} <? a b po => #t | #f
  516. ;; Given tokens @code{a} and @code{b} and partial ordering @code{po} report
  517. ;; if precedence of @code{b} is greater than @code{a}?
  518. ;; @end deffn
  519. (define (<? a b po)
  520. (if (member (cons a b) po) #t
  521. (let iter ((po po))
  522. (if (null? po) #f
  523. (if (and (eqv? (caar po) a)
  524. (<? (cdar po) b po))
  525. #t
  526. (iter (cdr po)))))))
  527. ;; @deffn {Procedure} prece a b po
  528. ;; Return precedence for @code{a,b} given the partial order @code{po} as
  529. ;; @code{'lt}, @code{'gt}, @code{'eq} or @code{#f}.
  530. ;; This is not a true partial order as we can have a<b and b<a => a=b.
  531. ;; @example
  532. ;; @code{(prece a a po)} => @code{'eq}.
  533. ;; @end example
  534. ;; @end deffn
  535. (define (prece a b po)
  536. (cond
  537. ((eqv? a b) 'eq)
  538. ((eqv? a '$error) 'lt)
  539. ((eqv? b '$error) 'gt)
  540. ((<? a b po) (if (<? b a po) 'eq 'lt))
  541. (else (if (<? b a po) 'gt #f))))
  542. ;; @deffn {Procedure} non-terminal? symb
  543. ;; @end deffn
  544. (define (non-terminal? symb)
  545. (cond
  546. ((eqv? symb '$epsilon) #t)
  547. ((eqv? symb '$end) #f)
  548. ((eqv? symb '$@) #f)
  549. ((string? symb) #f)
  550. (else
  551. (memq symb (core-non-terms (fluid-ref *lalr-core*))))))
  552. ;; @deffn {Procedure} terminal? symb
  553. ;; @end deffn
  554. (define (terminal? symb)
  555. (not (non-terminal? symb)))
  556. ;; @deffn {Procedure} prule-range lhs => (start-ix . (1+ end-ix))
  557. ;; Find the range of productiion rules for the lhs.
  558. ;; If not found raise error.
  559. ;; @end deffn
  560. (define (prule-range lhs)
  561. ;; If this needs to be really fast then we move to where lhs is an integer
  562. ;; and that used to index into a table that provides the ranges.
  563. (let* ((core (fluid-ref *lalr-core*))
  564. (lhs-v (core-lhs-v core))
  565. (n (vector-length lhs-v))
  566. (match? (lambda (ix symb) (eqv? (vector-ref lhs-v ix) symb))))
  567. (cond
  568. ((terminal? lhs) '())
  569. ((eq? lhs '$epsilon) '())
  570. (else
  571. (let iter-st ((st 0))
  572. ;; Iterate to find the start index.
  573. (if (= st n) '() ; not found
  574. (if (match? st lhs)
  575. ;; Start found, now iteratate to find end index.
  576. (let iter-nd ((nd st))
  577. (if (= nd n) (cons st nd)
  578. (if (not (match? nd lhs)) (cons st nd)
  579. (iter-nd (1+ nd)))))
  580. (iter-st (1+ st)))))))))
  581. ;; @deffn {Procedure} range-next rng -> rng
  582. ;; Given a range in the form of @code{(cons start (1+ end))} return the next
  583. ;; value or '() if at end. That is @code{(3 . 4)} => @code{'()}.
  584. ;; @end deffn
  585. (define (range-next rng)
  586. (if (null? rng) '()
  587. (let ((nxt (cons (1+ (car rng)) (cdr rng))))
  588. (if (= (car nxt) (cdr nxt)) '() nxt))))
  589. ;; @deffn {Procedure} range-last? rng
  590. ;; Predicate to indicate last p-rule in range.
  591. ;; If off end (i.e., null rng) then #f.
  592. ;; @end deffn
  593. (define (range-last? rng)
  594. (and (pair? rng) (= (1+ (car rng)) (cdr rng))))
  595. ;; @deffn {Procedure} lhs-symb prod-ix
  596. ;; Return the LHS symbol for the production at index @code{prod-id}.
  597. ;; @end deffn
  598. (define (lhs-symb gx)
  599. (vector-ref (core-lhs-v (fluid-ref *lalr-core*)) gx))
  600. ;; @deffn {Procedure} looking-at (p-rule-ix . rhs-ix)
  601. ;; Return symbol we are looking at for this item state.
  602. ;; If at the end (position = -1) (or rule is zero-length) then return
  603. ;; @code{'$epsilon}.
  604. ;; @end deffn
  605. (define (looking-at item)
  606. (let* ((core (fluid-ref *lalr-core*))
  607. (rhs-v (core-rhs-v core))
  608. (rule (vector-ref rhs-v (car item))))
  609. (if (last-item? item)
  610. '$epsilon
  611. (vector-ref rule (cdr item)))))
  612. ;; @deffn {Procedure} first-item gx
  613. ;; Given grammar rule index return the first item.
  614. ;; This will return @code{(gx . 0)}, or @code{(gx . -1)} if the rule has
  615. ;; no RHS elements.
  616. ;; @end deffn
  617. (define (first-item gx)
  618. (let* ((core (fluid-ref *lalr-core*))
  619. (rlen (vector-length (vector-ref (core-rhs-v core) gx))))
  620. (cons gx (if (zero? rlen) -1 0))))
  621. ;; @deffn {Procedure} last-item? item
  622. ;; Predictate to indicate last item in (or end of) production rule.
  623. ;; @end deffn
  624. (define (last-item? item)
  625. (negative? (cdr item)))
  626. ;; @deffn {Procedure} next-item item
  627. ;; Return the next item in the production rule.
  628. ;; A position of @code{-1} means the end. If at end, then @code{'()}
  629. ;; @end deffn
  630. (define (next-item item)
  631. (let* ((core (fluid-ref *lalr-core*))
  632. (gx (car item)) (rx (cdr item)) (rxp1 (1+ rx))
  633. (rlen (vector-length (vector-ref (core-rhs-v core) gx))))
  634. (cond
  635. ((negative? rx) '())
  636. ((eqv? rxp1 rlen) (cons gx -1))
  637. (else (cons gx rxp1)))))
  638. ;; @deffn {Procedure} prev-item item
  639. ;; Return the previous item in the grammar.
  640. ;; prev (0 . 0) is currently (0 . 0)
  641. ;; @end deffn
  642. (define (prev-item item)
  643. (let* ((core (fluid-ref *lalr-core*))
  644. (rhs-v (core-rhs-v core))
  645. (p-ix (car item))
  646. (p-ixm1 (1- p-ix))
  647. (r-ix (cdr item))
  648. (r-ixm1 (if (negative? r-ix)
  649. (1- (vector-length (vector-ref rhs-v p-ix)))
  650. (1- r-ix))))
  651. (if (zero? r-ix)
  652. (if (zero? p-ix) item ; start, i.e., (0 . 0)
  653. (cons p-ixm1 -1)) ; prev p-rule
  654. (cons p-ix r-ixm1))))
  655. ;; @deffn {Procedure} error-rule? gx => #t|#f
  656. ;; Predicate to indicate if gx rule has @code{$error} as rhs member.
  657. ;; @end deffn
  658. (define (error-rule? gx)
  659. (let* ((core (fluid-ref *lalr-core*))
  660. (rhs-v (core-rhs-v core)))
  661. (vector-any (lambda (e) (eqv? e '$error)) (vector-ref rhs-v gx))))
  662. ;; @deffn {Procedure} non-kernels symb => list of prule indices
  663. ;; Compute the set of non-kernel rules for symbol @code{symb}. If grammar
  664. ;; looks like
  665. ;; @example
  666. ;; 1: A => Bcd
  667. ;; ...
  668. ;; 5: B => Cde
  669. ;; ...
  670. ;; 7: B => Abe
  671. ;; @end example
  672. ;; @noindent
  673. ;; then @code{non-kernels 'A} results in @code{(1 5 7)}.
  674. ;; Note: To support pruning this routine will need to be rewritten.
  675. ;; @end deffn
  676. (define (non-kernels symb)
  677. (let* ((core (fluid-ref *lalr-core*))
  678. (lhs-v (core-lhs-v core))
  679. (rhs-v (core-rhs-v core))
  680. (glen (vector-length lhs-v))
  681. (lhs-symb (lambda (gx) (vector-ref lhs-v gx))))
  682. (let iter ((rslt '()) ; result is set of p-rule indices
  683. (done '()) ; symbols completed or queued
  684. (next '()) ; next round of symbols to process
  685. (curr (list symb)) ; this round of symbols to process
  686. (gx 0)) ; p-rule index
  687. (cond
  688. ((< gx glen)
  689. (cond
  690. ((memq (lhs-symb gx) curr)
  691. ;; Add rhs to next and rslt if not already done.
  692. (let* ((rhs1 (looking-at (first-item gx))) ; 1st-RHS-sym|$eps
  693. (rslt1 (if (memq gx rslt) rslt (cons gx rslt)))
  694. (done1 (if (memq rhs1 done) done (cons rhs1 done)))
  695. (next1 (cond ((memq rhs1 done) next)
  696. ((terminal? rhs1) next)
  697. (else (cons rhs1 next)))))
  698. (iter rslt1 done1 next1 curr (1+ gx))))
  699. (else
  700. ;; Nothing to check; process next rule.
  701. (iter rslt done next curr (1+ gx)))))
  702. ((pair? next)
  703. ;; Start another sweep throught the grammar.
  704. (iter rslt done '() next 0))
  705. (else
  706. ;; Done, so return.
  707. (reverse rslt))))))
  708. ;; @deffn {Procedure} expand-k-item => item-set
  709. ;; Expand a kernel-item into a list with the non-kernels.
  710. ;; @end deffn
  711. (define (expand-k-item k-item)
  712. (reverse
  713. (fold (lambda (gx items) (cons (first-item gx) items))
  714. (list k-item)
  715. (non-kernels (looking-at k-item)))))
  716. ;; @deffn {Procedure} its-equal?
  717. ;; Helper for step1
  718. ;; @end deffn
  719. (define (its-equal? its-1 its-2)
  720. (let iter ((its1 its-1) (its2 its-2)) ; cdr to strip off the ind
  721. (if (and (null? its1) (null? its2)) #t ; completed run through => #f
  722. (if (or (null? its1) (null? its2)) #f ; lists not equal length => #f
  723. (if (not (member (car its1) its-2)) #f ; mismatch => #f
  724. (iter (cdr its1) (cdr its2)))))))
  725. ;; @deffn {Procedure} its-member its its-l
  726. ;; Helper for step1
  727. ;; If itemset @code{its} is a member of itemset list @code{its-l} return the
  728. ;; index, else return #f.
  729. ;; @end deffn
  730. (define (its-member its its-l)
  731. (let iter ((itsl its-l))
  732. (if (null? itsl) #f
  733. (if (its-equal? its (cdar itsl)) (caar itsl)
  734. (iter (cdr itsl))))))
  735. ;; @deffn {Procedure} its-trans itemset => alist of (symb . itemset)
  736. ;; Compute transitions from an itemset. Thatis, map a list of kernel
  737. ;; items to a list of (symbol post-shift items).
  738. ;; @example
  739. ;; ((0 . 1) (2 . 3) => ((A (0 . 2) (2 . 4)) (B (2 . 4) ...))
  740. ;; @end example
  741. ;; @end deffn
  742. (define (its-trans items)
  743. (let iter ((rslt '()) ; result
  744. (k-items items) ; items
  745. (itl '())) ; one k-item w/ added non-kernels
  746. (cond
  747. ((pair? itl)
  748. (let* ((it (car itl)) ; item
  749. (sy (looking-at it)) ; symbol
  750. (nx (next-item it))
  751. (sq (assq sy rslt))) ; if we have seen it
  752. (cond
  753. ((eq? sy '$epsilon)
  754. ;; don't transition end-of-rule items
  755. (iter rslt k-items (cdr itl)))
  756. ((not sq)
  757. ;; haven't seen this symbol yet
  758. (iter (acons sy (list nx) rslt) k-items (cdr itl)))
  759. ((member nx (cdr sq))
  760. ;; repeat
  761. (iter rslt k-items (cdr itl)))
  762. (else
  763. ;; SY is in RSLT and item not yet in: add it.
  764. (set-cdr! sq (cons nx (cdr sq)))
  765. (iter rslt k-items (cdr itl))))))
  766. ((pair? k-items)
  767. (iter rslt (cdr k-items) (expand-k-item (car k-items))))
  768. (else
  769. rslt))))
  770. ;; @deffn {Procedure} step1 [input-a-list] => p-mach-1
  771. ;; Compute the sets of LR(0) kernel items and the transitions associated with
  772. ;; spec. These are returned as vectors in the alist with keys @code{'kis-v}
  773. ;; and @code{'kix-v}, repspectively. Each entry in @code{kis-v} is a list of
  774. ;; items in the form @code{(px . rx)} where @code{px} is the production rule
  775. ;; index and @code{rx} is the index of the RHS symbol. Each entry in the
  776. ;; vector @code{kix-v} is an a-list with entries @code{(sy . kx)} where
  777. ;; @code{sy} is a (terminal or non-terminal) symbol and @code{kx} is the
  778. ;; index of the kernel itemset. The basic algorithm is discussed on
  779. ;; pp. 228-229 of the DB except that we compute non-kernel items on the fly
  780. ;; using @code{expand-k-item}. See Example 4.46 on p. 241 of the DB.
  781. ;; @end deffn
  782. (define (step1 . rest)
  783. (let* ((al-in (if (pair? rest) (car rest) '()))
  784. (add-kset (lambda (upd kstz) ; give upd a ks-ix and add to kstz
  785. (acons (1+ (caar kstz)) upd kstz)))
  786. (init '(0 (0 . 0))))
  787. (let iter ((ksets (list init)) ; w/ index
  788. (ktrnz '()) ; ((symb src dst) (symb src dst) ...)
  789. (next '()) ; w/ index
  790. (todo (list init)) ; w/ index
  791. (curr #f) ; current state ix
  792. (trans '())) ; ((symb it1 it2 ...) (symb ...))
  793. (cond
  794. ((pair? trans)
  795. ;; Check next symbol for transitions (symb . (item1 item2 ...)).
  796. (let* ((dst (cdar trans)) ; destination item
  797. (dst-ix (its-member dst ksets)) ; return ix else #f
  798. (upd (if dst-ix '() (cons (1+ (caar ksets)) dst)))
  799. (ksets1 (if dst-ix ksets (cons upd ksets)))
  800. (next1 (if dst-ix next (cons upd next)))
  801. (dsx (if dst-ix dst-ix (car upd))) ; dest state index
  802. (ktrnz1 (cons (list (caar trans) curr dsx) ktrnz)))
  803. (iter ksets1 ktrnz1 next1 todo curr (cdr trans))))
  804. ((pair? todo)
  805. ;; Process the next state (aka itemset).
  806. (iter ksets ktrnz next (cdr todo) (caar todo) (its-trans (cdar todo))))
  807. ((pair? next)
  808. ;; Sweep throught the grammar again.
  809. (iter ksets ktrnz '() next curr '()))
  810. (else
  811. (let* ((nkis (length ksets)) ; also (caar ksets)
  812. (kisv (make-vector nkis #f))
  813. (kitv (make-vector nkis '())))
  814. ;; Vectorize kernel sets
  815. (for-each
  816. (lambda (kis) (vector-set! kisv (car kis) (cdr kis)))
  817. ksets)
  818. ;; Vectorize transitions (by src kx).
  819. (for-each
  820. (lambda (kit)
  821. (vector-set! kitv (cadr kit)
  822. (acons (car kit) (caddr kit)
  823. (vector-ref kitv (cadr kit)))))
  824. ktrnz)
  825. ;; Return kis-v, kernel itemsets, and kix-v transitions.
  826. (cons* (cons 'kis-v kisv) (cons 'kix-v kitv) al-in)))))))
  827. ;; @deffn {Procedure} find-eps non-terms lhs-v rhs-v => eps-l
  828. ;; Generate a list of non-terminals which have epsilon productions.
  829. ;; @end deffn
  830. (define (find-eps nterms lhs-v rhs-v)
  831. (let* ((nprod (vector-length lhs-v))
  832. (find-new
  833. (lambda (e l)
  834. (let iter ((ll l) (gx 0) (lhs #f) (rhs #()) (rx 0))
  835. (cond
  836. ((< rx (vector-length rhs))
  837. (if (and (memq (vector-ref rhs rx) nterms) ; non-term
  838. (memq (vector-ref rhs rx) ll)) ; w/ eps prod
  839. (iter ll gx lhs rhs (1+ rx)) ; yes: check next
  840. (iter ll (1+ gx) #f #() 0))) ; no: next p-rule
  841. ((and lhs (= rx (vector-length rhs))) ; we have eps-prod
  842. (iter (if (memq lhs ll) ll (cons lhs ll)) (1+ gx) #f #() 0))
  843. ((< gx nprod) ; check next p-rule if not on list
  844. (if (memq (vector-ref lhs-v gx) ll)
  845. (iter ll (1+ gx) #f #() 0)
  846. (iter ll gx (vector-ref lhs-v gx) (vector-ref rhs-v gx) 0)))
  847. (else ll))))))
  848. (fixpoint find-new (find-new #f '()))))
  849. ;; @deffn {Procedure} merge1 v l
  850. ;; add v to l if not in l
  851. ;; @end deffn
  852. (define (merge1 v l)
  853. (if (memq v l) l (cons v l)))
  854. ;; @deffn {Procedure} merge2 v l al
  855. ;; add v to l if not in l or al
  856. ;; @end deffn
  857. (define (merge2 v l al)
  858. (if (memq v l) l (if (memq v al) l (cons v l))))
  859. ;; @deffn {Procedure} first symbol-list end-token-list
  860. ;; Return list of terminals starting the string @code{symbol-list}
  861. ;; (see DB, p. 188). If the symbol-list can generate epsilon then the
  862. ;; result will include @code{end-token-list}.
  863. ;; @end deffn
  864. (define (first symbol-list end-token-list)
  865. (let* ((core (fluid-ref *lalr-core*))
  866. (eps-l (core-eps-l core)))
  867. ;; This loop strips off the leading symbol from stng and then adds to
  868. ;; todo list, which results in range of p-rules getting checked for
  869. ;; terminals.
  870. (let iter ((rslt '()) ; terminals collected
  871. (stng symbol-list) ; what's left of input string
  872. (hzeps #t) ; if eps-prod so far
  873. (done '()) ; non-terminals checked
  874. (todo '()) ; non-terminals to assess
  875. (p-range '()) ; range of p-rules to check
  876. (item '())) ; item in production
  877. (cond
  878. ((pair? item)
  879. (let ((sym (looking-at item)))
  880. (cond
  881. ((eq? sym '$epsilon) ; at end of rule, go next
  882. (iter rslt stng hzeps done todo p-range '()))
  883. ((terminal? sym) ; terminal, log it
  884. (iter (merge1 sym rslt) stng hzeps done todo p-range '()))
  885. ((memq sym eps-l) ; symbol has eps prod
  886. (iter rslt stng hzeps (merge1 sym done) (merge2 sym todo done)
  887. p-range (next-item item)))
  888. (else ;; non-terminal, add to todo/done, goto next
  889. (iter rslt stng hzeps
  890. (merge1 sym done) (merge2 sym todo done) p-range '())))))
  891. ((pair? p-range) ; next one to do
  892. ;; run through next rule
  893. (iter rslt stng hzeps done todo
  894. (range-next p-range) (first-item (car p-range))))
  895. ((pair? todo)
  896. (iter rslt stng hzeps done (cdr todo) (prule-range (car todo)) '()))
  897. ((and hzeps (pair? stng))
  898. ;; Last pass saw an $epsilon so check the next input symbol,
  899. ;; with saweps reset to #f.
  900. (let* ((symb (car stng)) (stng1 (cdr stng)) (symbl (list symb)))
  901. (if (terminal? symb)
  902. (iter (cons symb rslt) stng1
  903. (and hzeps (memq symb eps-l))
  904. done todo p-range '())
  905. (iter rslt stng1
  906. (or (eq? symb '$epsilon) (memq symb eps-l))
  907. symbl symbl '() '()))))
  908. (hzeps
  909. ;; $epsilon passes all the way through.
  910. ;; If end-token-list provided use that.
  911. (if (pair? end-token-list)
  912. (lset-union eqv? rslt end-token-list)
  913. (cons '$epsilon rslt)))
  914. (else
  915. rslt)))))
  916. ;; @deffn {Procedure} item->stng item => list-of-symbols
  917. ;; Convert item (e.g., @code{(1 . 2)}) to list of symbols to the end of the
  918. ;; production(?). If item is at the end of the rule then return
  919. ;; @code{'$epsilon}. The term "stng" is used to avoid confusion about the
  920. ;; term string.
  921. ;; @end deffn
  922. (define (item->stng item)
  923. (if (eqv? (cdr item) -1)
  924. (list '$epsilon)
  925. (let* ((core (fluid-ref *lalr-core*))
  926. (rhs-v (core-rhs-v core))
  927. (rhs (vector-ref rhs-v (car item))))
  928. (let iter ((res '()) (ix (1- (vector-length rhs))))
  929. (if (< ix (cdr item)) res
  930. (iter (cons (vector-ref rhs ix) res) (1- ix)))))))
  931. ;; add (item . toks) to (front of) la-item-l
  932. ;; i.e., la-item-l is unmodified
  933. (define (merge-la-item la-item-l item toks)
  934. (let* ((pair (assoc item la-item-l))
  935. (tokl (if (pair? pair) (cdr pair) '()))
  936. (allt ;; union of toks and la-item-l toks
  937. (let iter ((tl tokl) (ts toks))
  938. (if (null? ts) tl
  939. (iter (if (memq (car ts) tl) tl (cons (car ts) tl))
  940. (cdr ts))))))
  941. (if (not pair) (acons item allt la-item-l)
  942. (if (eqv? tokl allt) la-item-l
  943. (acons item allt la-item-l)))))
  944. ;; @deffn {Procedure} first-following item toks => token-list
  945. ;; For la-item A => x.By,z (where @code{item}, @code{toks}), this
  946. ;; procedure computes @code{FIRST(yz)}.
  947. ;; @end deffn
  948. (define (first-following item toks)
  949. (first (item->stng (next-item item)) toks))
  950. ;; @deffn {Procedure} closure la-item-l => la-item-l
  951. ;; Compute the closure of a list of la-items.
  952. ;; Ref: DB, Fig 4.38, Sec. 4.7, p. 232
  953. ;; @end deffn
  954. (define (closure la-item-l)
  955. ;; Compute the fixed point of I, aka @code{la-item-l}, with procedure
  956. ;; for each item [A => x.By, a] in I
  957. ;; each production B => z in G
  958. ;; and each terminal b in FIRST(ya)
  959. ;; such that [B => .z, b] is not in I do
  960. ;; add [B => .z, b] to I
  961. ;; The routine @code{fixpoint} operates on one element of the input set.
  962. (prune-assoc
  963. (fixpoint
  964. (lambda (la-item seed)
  965. (let* ((item (car la-item)) (toks (cdr la-item)) (symb (looking-at item)))
  966. (cond
  967. ((last-item? (car la-item)) seed)
  968. ((terminal? (looking-at (car la-item))) seed)
  969. (else
  970. (let iter ((seed seed) (pr (prule-range symb)))
  971. (cond
  972. ((null? pr) seed)
  973. (else
  974. (iter (merge-la-item seed (first-item (car pr))
  975. (first-following item toks))
  976. (range-next pr)))))))))
  977. la-item-l)))
  978. ;; @deffn {Procedure} kit-add kit-v tokens sx item
  979. ;; Add @code{tokens} to the list of lookaheads for the (kernel) @code{item}
  980. ;; in state @code{sx}. This is a helper for @code{step2}.
  981. ;; @end deffn
  982. (define (kit-add kit-v tokens kx item)
  983. (let* ((al (vector-ref kit-v kx)) ; a-list for k-set kx
  984. (ar (assoc item al)) ; tokens for item
  985. (sd (if (pair? ar) ; set difference
  986. (lset-difference eqv? tokens (cdr ar))
  987. tokens)))
  988. (cond ;; no entry, update entry, no update
  989. ((null? tokens) #f)
  990. ((not ar) (vector-set! kit-v kx (acons item tokens al)) #t)
  991. ((pair? sd) (set-cdr! ar (append sd (cdr ar))) #t)
  992. (else #f))))
  993. ;; @deffn {Procedure} kip-add kip-v sx0 it0 sx1 it1
  994. ;; This is a helper for step2. It updates kip-v with a propagation from
  995. ;; state @code{sx0}, item @code{it0} to state @code{sx1}, item @code{it1}.
  996. ;; [kip-v sx0] -> (it0 . ((sx1 . it1)
  997. ;; @end deffn
  998. (define (kip-add kip-v sx0 it0 sx1 it1)
  999. (let* ((al (vector-ref kip-v sx0)) (ar (assoc it0 al)))
  1000. (cond
  1001. ((not ar)
  1002. (vector-set! kip-v sx0 (acons it0 (list (cons sx1 it1)) al)) #t)
  1003. ((member it1 (cdr ar)) #f)
  1004. (else
  1005. (set-cdr! ar (acons sx1 it1 (cdr ar))) #t))))
  1006. ;; @deffn {Procedure} step2 p-mach-1 => p-mach-2
  1007. ;; This implements steps 2 and 3 of Algorithm 4.13 on p. 242 of the DB.
  1008. ;; The a-list @code{p-mach-1} includes the kernel itemsets and transitions
  1009. ;; from @code{step1}. This routine adds two entries to the a-list:
  1010. ;; the initial set of lookahead tokens in a vector associated with key
  1011. ;; @code{'kit-v} and a vector of spontaneous propagations associated with
  1012. ;; key @code{'kip-v}.
  1013. ;; @example
  1014. ;; for-each item I in some itemset
  1015. ;; for-each la-item J in closure(I,#)
  1016. ;; for-each token T in lookaheads(J)
  1017. ;; if LA is #, then add to J propagate-to list
  1018. ;; otherwise add T to spontaneously-generated list
  1019. ;; @end example
  1020. ;; @end deffn
  1021. (define (step2 p-mach)
  1022. (let* ((kis-v (assq-ref p-mach 'kis-v))
  1023. (kix-v (assq-ref p-mach 'kix-v)) ; transitions?
  1024. (nkset (vector-length kis-v)) ; number of k-item-sets
  1025. ;; kernel-itemset tokens
  1026. (kit-v (make-vector nkset '())) ; sx => alist: (item latoks)
  1027. ;; kernel-itemset propagations
  1028. (kip-v (make-vector nkset '()))) ; sx0 => ((ita (sx1a . it1a) (sx2a
  1029. (vector-set! kit-v 0 (closure (list (list '(0 . 0) '$end))))
  1030. (let iter ((kx -1) (kset '()))
  1031. (cond
  1032. ((pair? kset)
  1033. (for-each
  1034. (lambda (la-item)
  1035. (let* ((item (car la-item)) ; closure item
  1036. (tokl (cdr la-item)) ; tokens
  1037. (sym (looking-at item)) ; transition symbol
  1038. (item1 (next-item item)) ; next item after sym
  1039. (sx1 (assq-ref (vector-ref kix-v kx) sym)) ; goto(I,sym)
  1040. (item0 (car kset))) ; kernel item
  1041. (kit-add kit-v (delq '$@ tokl) sx1 item1) ; spontaneous
  1042. (if (memq '$@ tokl) ; propagates
  1043. (kip-add kip-v kx item0 sx1 item1))))
  1044. (remove ;; todo: check this remove
  1045. (lambda (li) (last-item? (car li)))
  1046. (closure (list (cons (car kset) '($@))))))
  1047. (iter kx (cdr kset)))
  1048. ((< (1+ kx) nkset)
  1049. (iter (1+ kx)
  1050. ;; End-items don't shift, so don't propagate.
  1051. (remove last-item? (vector-ref kis-v (1+ kx)))))))
  1052. (cons* (cons 'kit-v kit-v) (cons 'kip-v kip-v) p-mach)))
  1053. ;; debug for step2
  1054. (define (pp-kit ix kset)
  1055. (fmtout "~S:\n" ix)
  1056. (for-each
  1057. (lambda (item) (fmtout " ~A, ~S\n" (pp-item (car item)) (cdr item)))
  1058. kset))
  1059. (define (pp-kit-v kit-v)
  1060. (fmtout "spontaneous:\n")
  1061. (vector-for-each pp-kit kit-v))
  1062. (define (pp-kip ix kset)
  1063. (for-each
  1064. (lambda (x)
  1065. (fmtout "~S: ~A\n" ix (pp-item (car x)))
  1066. (for-each
  1067. (lambda (y) (fmtout " => ~S: ~A\n" (car y) (pp-item (cdr y))))
  1068. (cdr x)))
  1069. kset))
  1070. (define (pp-kip-v kip-v)
  1071. (fmtout "propagate:\n")
  1072. (vector-for-each pp-kip kip-v))
  1073. ;; @deffn {Procedure} step3 p-mach-2 => p-mach-3
  1074. ;; Execute nyacc step 3, where p-mach means ``partial machine''.
  1075. ;; This implements step 4 of Algorithm 4.13 from the DB.
  1076. ;; @end deffn
  1077. (define (step3 p-mach)
  1078. (let* ((kit-v (assq-ref p-mach 'kit-v))
  1079. (kip-v (assq-ref p-mach 'kip-v))
  1080. (nkset (vector-length kit-v)))
  1081. (let iter ((upd #t) ; token propagated?
  1082. (kx -1) ; current index
  1083. (ktal '()) ; (item . LA) list for kx
  1084. (toks '()) ; LA tokens being propagated
  1085. (item '()) ; from item
  1086. (prop '())) ; to items
  1087. (cond
  1088. ((pair? prop)
  1089. ;; Propagate lookaheads.
  1090. (let* ((sx1 (caar prop)) (it1 (cdar prop)))
  1091. (iter (or (kit-add kit-v toks sx1 it1) upd)
  1092. kx ktal toks item (cdr prop))))
  1093. ((pair? ktal)
  1094. ;; Process the next (item . tokl) in the alist ktal.
  1095. (iter upd kx (cdr ktal) (cdar ktal) (caar ktal)
  1096. (assoc-ref (vector-ref kip-v kx) (caar ktal))))
  1097. ((< (1+ kx) nkset)
  1098. ;; Process the next itemset.
  1099. (iter upd (1+ kx) (vector-ref kit-v (1+ kx)) '() '() '()))
  1100. (upd
  1101. ;; Have updates, rerun.
  1102. (iter #f 0 '() '() '() '()))))
  1103. p-mach))
  1104. ;; @deffn {Procedure} reductions kit-v sx => ((tokA gxA1 ...) ...)
  1105. ;; This is a helper for @code{step4}.
  1106. ;; Return an a-list of reductions for state @code{sx}.
  1107. ;; The a-list pairs are make of a token and a list of prule indicies.
  1108. ;; CHECK the following. We are brute-force using @code{closure} here.
  1109. ;; It works, but there should be a better algorithm.
  1110. ;; Note on reductions: We reduce if the kernel-item is an end-item or a
  1111. ;; non-kernel item with an epsilon-production. That is, if we have a
  1112. ;; kernel item of the form
  1113. ;; @example
  1114. ;; A => abc.
  1115. ;; @end example
  1116. ;; or if we have the non-kernel item of the form
  1117. ;; @example
  1118. ;; B => .de
  1119. ;; @end example
  1120. ;; where FIRST(de,#) includes #. See the second paragraph under ``Efficient
  1121. ;; Construction of LALR Parsing Tables'' in DB Sec 4.7.
  1122. ;; @end deffn
  1123. (define (old-reductions kit-v sx)
  1124. (let iter ((ral '()) ; result: reduction a-list
  1125. (lais (vector-ref kit-v sx)) ; la-item list
  1126. (toks '()) ; kernel la-item LA tokens
  1127. (itms '()) ; all items
  1128. (gx #f) ; rule reduced by tl
  1129. (tl '())) ; LA-token list
  1130. (cond
  1131. ((pair? tl) ;; add (token . p-rule) to reduction list
  1132. (let* ((tk (car tl)) (rp (assq tk ral)))
  1133. (cond
  1134. ;; already have this, skip to next token
  1135. ((and rp (memq gx (cdr rp)))
  1136. (iter ral lais toks itms gx (cdr tl)))
  1137. (rp
  1138. ;; have token, add prule
  1139. (set-cdr! rp (cons gx (cdr rp)))
  1140. (iter ral lais toks itms gx (cdr tl)))
  1141. (else
  1142. ;; add token w/ prule
  1143. (iter (cons (list tk gx) ral) lais toks itms gx (cdr tl))))))
  1144. ((pair? itms)
  1145. (if (last-item? (car itms))
  1146. ;; last item, add it
  1147. (iter ral lais toks (cdr itms) (caar itms) toks)
  1148. ;; skip to next
  1149. (iter ral lais toks (cdr itms) 0 '())))
  1150. ((pair? lais) ;; process next la-item
  1151. (iter ral (cdr lais) (cdar lais) (expand-k-item (caar lais)) 0 '()))
  1152. (else ral))))
  1153. ;; I think the above is broken because I'm not including the proper tail
  1154. ;; string. The following just uses closure to do the job. It works but
  1155. ;; may not be very efficient: seems a bit brute force.
  1156. (define (new-reductions kit-v sx)
  1157. (let iter ((ral '()) ; result: reduction a-list
  1158. (klais (vector-ref kit-v sx)) ; kernel la-item list
  1159. (laits '()) ; all la-items
  1160. (gx #f) ; rule reduced by tl
  1161. (tl '())) ; LA-token list
  1162. (cond
  1163. ((pair? tl) ;; add (token . p-rule) to reduction list
  1164. (let* ((tk (car tl)) (rp (assq tk ral)))
  1165. (cond
  1166. ((and rp (memq gx (cdr rp)))
  1167. ;; already have this, skip to next token
  1168. (iter ral klais laits gx (cdr tl)))
  1169. (rp
  1170. ;; have token, add prule
  1171. (set-cdr! rp (cons gx (cdr rp)))
  1172. (iter ral klais laits gx (cdr tl)))
  1173. (else
  1174. ;; add token w/ prule
  1175. (iter (cons (list tk gx) ral) klais laits gx (cdr tl))))))
  1176. ((pair? laits) ;; process a la-itemset
  1177. (if (last-item? (caar laits))
  1178. ;; last item, add it
  1179. (iter ral klais (cdr laits) (caaar laits) (cdar laits))
  1180. ;; else skip to next
  1181. (iter ral klais (cdr laits) 0 '())))
  1182. ((pair? klais) ;; expand next kernel la-item
  1183. ;; There is a cheaper way than closure to do this but for now ...
  1184. (iter ral (cdr klais) (closure (list (car klais))) 0 '()))
  1185. (else
  1186. ral))))
  1187. (define reductions new-reductions)
  1188. ;; Generate parse-action-table from the shift a-list and reduce a-list.
  1189. ;; This is a helper for @code{step4}. It converts a list of state transitions
  1190. ;; and a list of reductions into a parse-action table of shift, reduce,
  1191. ;; accept, shift-reduce conflict or reduce-reduce conflict.
  1192. ;; The actions take the form:
  1193. ;; @example
  1194. ;; (shift . <dst-state>)
  1195. ;; (reduce . <rule-index>)
  1196. ;; (accept . 0)
  1197. ;; (srconf . (<dst-state> . <p-rule>))
  1198. ;; (rrconf . <list of p-rules indices>)
  1199. ;; @end example
  1200. ;; If a shift has multiple reduce conflicts we report only one reduction.
  1201. (define (gen-pat sft-al red-al)
  1202. (let iter ((res '()) (sal sft-al) (ral red-al))
  1203. (cond
  1204. ((pair? sal)
  1205. (let* ((term (caar sal)) ; terminal
  1206. (goto (cdar sal)) ; target state
  1207. (redp (assq term ral)) ; a-list entry, may be removed
  1208. ;;(redl (if redp (cdr redp) #f))) ; reductions on terminal
  1209. (redl (and=> redp cdr))) ; reductions on terminal
  1210. (cond
  1211. ((and redl (pair? (cdr redl)))
  1212. ;; This means we have a shift-reduce and reduce-reduce conflicts.
  1213. ;; We record only one shift-reduce and keep the reduce-reduce.
  1214. (iter (cons (cons* term 'srconf goto (car redl)) res)
  1215. (cdr sal) ral))
  1216. (redl
  1217. ;; The terminal (aka token) signals a single reduction. This means
  1218. ;; we have one shift-reduce conflict. We have a chance to repair
  1219. ;; the parser using precedence/associativity rules so we remove the
  1220. ;; reduction from the reduction-list.
  1221. (iter (cons (cons* term 'srconf goto (car redl)) res)
  1222. (cdr sal) (delete redp ral)))
  1223. (else
  1224. ;; The terminal (aka token) signals a shift only.
  1225. (iter (cons (cons* term 'shift goto) res)
  1226. (cdr sal) ral)))))
  1227. ((pair? ral)
  1228. (let ((term (caar ral)) (rest (cdar ral)))
  1229. ;; We keep 'accept as explict action. Another option is to reduce and
  1230. ;; have 0-th p-rule action generate return from parser (via prompt?).
  1231. (iter
  1232. (cons (cons term
  1233. (cond ;; => action and arg(s)
  1234. ((zero? (car rest)) (cons 'accept 0))
  1235. ((zero? (car rest)) (cons 'reduce (car rest)))
  1236. ((> (length rest) 1) (cons 'rrconf rest))
  1237. (else (cons 'reduce (car rest)))))
  1238. res) sal (cdr ral))))
  1239. (else res))))
  1240. ;; @deffn {Procedure} step4 p-mach-0 => p-mach-1
  1241. ;; This generates the parse action table from the itemsets and then applies
  1242. ;; precedence and associativity rules to eliminate shift-reduce conflicts
  1243. ;; where possible. The output includes the parse action table (entry
  1244. ;; @code{'pat-v} and TBD (list of errors in @code{'err-l}).
  1245. ;;.per-state: alist by symbol:
  1246. ;; (symb <id>) if <id> > 0 SHIFT to <id>, else REDUCE by <id> else
  1247. ;; so ('$end . 0) means ACCEPT!
  1248. ;; but 0 for SHIFT and REDUCE, but reduce 0 is really ACCEPT
  1249. ;; if reduce by zero we are done. so never hit state zero accept on ACCEPT?
  1250. ;; For each state, the element of pat-v looks like
  1251. ;; ((tokenA . (reduce . 79)) (tokenB . (reduce . 91)) ... )
  1252. ;; @end deffn
  1253. (define (step4 p-mach)
  1254. (define (setup-assc assc)
  1255. (fold (lambda (al seed)
  1256. (append (x-flip al) seed)) '() assc))
  1257. (define (setup-prec prec)
  1258. (let iter ((res '()) (rl '()) (hd '()) (pl '()) (pll prec))
  1259. (cond
  1260. ((pair? pl)
  1261. (let* ((p (car pl)) (hdp (x-comb hd p))
  1262. (pp (remove (lambda (p) (eqv? (car p) (cdr p))) (x-comb p p))))
  1263. (iter res (append rl hdp pp) (car pl) (cdr pl) pll)))
  1264. ((pair? rl) (iter (append res rl) '() hd pl pll))
  1265. ((pair? pll) (iter res rl '() (car pll) (cdr pll)))
  1266. (else res))))
  1267. (define (prev-sym act its)
  1268. (let* ((a act)
  1269. (tok (car a)) (sft (caddr a)) (red (cdddr a))
  1270. ;; @code{pit} is the end-item in the p-rule to be reduced.
  1271. (pit (prev-item (prev-item (cons red -1))))
  1272. ;; @code{psy} is the last symbol in the p-rule to be reduced.
  1273. (psy (looking-at pit)))
  1274. psy))
  1275. (define (uniqmax-prec prl rpl prec)
  1276. (let iter ((tie #f)
  1277. (gx (car prl)) (mx (car rpl))
  1278. (prl (cdr prl)) (rpl (cdr rpl)))
  1279. (if (null? prl) (and (not tie) gx)
  1280. (let ((cmp (prece mx (car rpl) prec)))
  1281. (case cmp
  1282. ((lt) (iter #f (car prl) (car rpl) (cdr prl) (cdr rpl)))
  1283. ((gt) (iter tie gx mx (cdr prl) (cdr rpl)))
  1284. ((eq) (iter #t gx mx (cdr prl) (cdr rpl)))
  1285. ((#f) #f))))))
  1286. (let* ((kis-v (assq-ref p-mach 'kis-v)) ; states
  1287. (kit-v (assq-ref p-mach 'kit-v)) ; la-toks
  1288. (kix-v (assq-ref p-mach 'kix-v)) ; transitions
  1289. (assc (assq-ref p-mach 'assc)) ; associativity rules
  1290. (assc (setup-assc assc)) ; trying it
  1291. (prec (assq-ref p-mach 'prec)) ; precedence rules
  1292. (prec (setup-prec prec)) ; trying it
  1293. (nst (vector-length kis-v)) ; number of states
  1294. (pat-v (make-vector nst '())) ; parse-act tab /state
  1295. (rat-v (make-vector nst '())) ; removed-act tab /state
  1296. (gen-pat-ix (lambda (ix) ; pat from shifts & reduc's
  1297. (gen-pat (vector-ref kix-v ix) (reductions kit-v ix))))
  1298. (prp-v (assq-ref p-mach 'prp-v)) ; per-rule precedence
  1299. (tl (assq-ref p-mach 'terminals))) ; for error msgs
  1300. ;; We run through each itemset.
  1301. ;; @enumerate
  1302. ;; @item We have a-list of symbols to shift state (i.e., @code{kix-v}).
  1303. ;; @item We generate a list of tokens to reduction from @code{kit-v}.
  1304. ;; @end enumerate
  1305. ;; Q: should '$end be edited out of shifts?
  1306. ;; kit-v is vec of a-lists of form ((item tok1 tok2 ...) ...)
  1307. ;; turn to (tok1 item1 item2 ...)
  1308. (let iter ((ix 0) ; state index
  1309. (pat '()) ; parse-action table
  1310. (rat '()) ; removed-action table
  1311. (wrn '()) ; warnings on unsolicited removals
  1312. (ftl '()) ; fatal conflicts
  1313. (actl (gen-pat-ix 0))) ; action list
  1314. (cond
  1315. ((pair? actl)
  1316. (case (cadar actl)
  1317. ((shift reduce accept)
  1318. (iter ix (cons (car actl) pat) rat wrn ftl (cdr actl)))
  1319. ((srconf)
  1320. (let* ((act (car actl))
  1321. (tok (car act)) (sft (caddr act)) (red (cdddr act))
  1322. (prp (vector-ref prp-v red))
  1323. (psy (prev-sym act (vector-ref kis-v ix)))
  1324. (preced (or (and prp (prece prp tok prec)) ; rule-based
  1325. (prece psy tok prec))) ; oper-based
  1326. (sft-a (cons* tok 'shift sft))
  1327. (red-a (cons* tok 'reduce red)))
  1328. (call-with-values
  1329. (lambda ()
  1330. ;; Use precedence or, if =, associativity.
  1331. (case preced
  1332. ((gt)
  1333. (values red-a (cons sft-a 'pre) #f #f))
  1334. ((lt)
  1335. (values sft-a (cons red-a 'pre) #f #f))
  1336. ((eq) ;; Now use associativity
  1337. (case (assq-ref assc tok)
  1338. ((left)
  1339. (values red-a (cons sft-a 'ass) #f #f))
  1340. ((right)
  1341. (values sft-a (cons red-a 'ass) #f #f))
  1342. ((nonassoc)
  1343. (values (cons* tok 'error red) #f #f (cons ix act)))
  1344. (else
  1345. (values sft-a (cons red-a 'def) (cons ix act) #f))))
  1346. (else ;; Or default, which is shift.
  1347. (values sft-a (cons red-a 'def) (cons ix act) #f))))
  1348. (lambda (a r w f)
  1349. (iter ix
  1350. (if a (cons a pat) pat)
  1351. (if r (cons r rat) rat)
  1352. (if w (cons w wrn) wrn)
  1353. (if f (cons f ftl) ftl)
  1354. (cdr actl))))))
  1355. ((rrconf)
  1356. (let* ((act (car actl))
  1357. (tok (car act)) ;;(sft (caddr act)) (red (cdddr act))
  1358. (prl (cddr act)) ; p-rule rrconf list
  1359. (rpl (map (lambda (pr) (vector-ref prp-v pr)) prl)) ; prec's
  1360. (uniq (uniqmax-prec prl rpl prec))) ; unique rule to use
  1361. (if uniq
  1362. (iter ix (cons (cons* (caar actl) 'reduce uniq) pat)
  1363. rat ;; need to update by filtering out uniq
  1364. wrn ftl (cdr actl))
  1365. (iter ix (cons (car actl) pat) rat wrn
  1366. (cons (cons ix (car actl)) ftl) (cdr actl)))))
  1367. (else
  1368. (error "PROBLEM"))))
  1369. ((null? actl)
  1370. (vector-set! pat-v ix pat)
  1371. (vector-set! rat-v ix rat)
  1372. (iter ix pat rat wrn ftl #f))
  1373. ((< (1+ ix) nst)
  1374. (iter (1+ ix) '() '() wrn ftl (gen-pat-ix (1+ ix))))
  1375. (else
  1376. (let* ((attr (assq-ref p-mach 'attr))
  1377. (expect (assq-ref attr 'expect))) ; expected # srconf
  1378. (if (not (= (length wrn) expect))
  1379. (for-each (lambda (m) (fmterr "+++ warning: ~A\n" (conf->str m)))
  1380. (reverse wrn)))
  1381. (for-each
  1382. (lambda (m) (fmterr "*** fatal: ~A\n" (conf->str m)))
  1383. (reverse ftl))))))
  1384. ;; Return mach with parse-action and removed-action tables.
  1385. (cons* (cons 'pat-v pat-v) (cons 'rat-v rat-v) p-mach)))
  1386. ;; @deffn {Procedure} conf->str cfl => string
  1387. ;; map conflict (e.g., @code{('rrconf 1 . 2}) to string.
  1388. ;; @end deffn
  1389. (define (conf->str cfl)
  1390. (let* ((st (list-ref cfl 0)) (tok (list-ref cfl 1)) (typ (list-ref cfl 2))
  1391. (core (fluid-ref *lalr-core*)) (terms (core-terminals core)))
  1392. (fmtstr "in state ~A, ~A conflict on ~A"
  1393. st
  1394. (case typ
  1395. ((srconf) "shift-reduce")
  1396. ((rrconf) "reduce-reduce")
  1397. (else "unknown"))
  1398. (obj->str (find-terminal tok terms)))))
  1399. ;; @deffn {Procedure} gen-match-table mach => mach
  1400. ;; Generate the match-table for a machine. The match table is a list of
  1401. ;; pairs: the car is the token used in the grammar specification, the cdr
  1402. ;; is the symbol that should be returned by the lexical analyzer.
  1403. ;;
  1404. ;; The match-table may be passed to
  1405. ;; the lexical analyzer builder to identify strings or string-types as tokens.
  1406. ;; The associated key in the machine is @code{mtab}.
  1407. ;; @enumerate
  1408. ;; @item
  1409. ;; @sc{nyacc}-reserved symbols are provided as symbols
  1410. ;; @example
  1411. ;; $ident -> ($ident . $ident)
  1412. ;; @end example
  1413. ;; @item
  1414. ;; Terminals used as symbols (@code{'comment} versus @code{"comment"}) are
  1415. ;; provided as symbols. The spec parser will provide a warning if symbols
  1416. ;; are used in both ways.
  1417. ;; @item
  1418. ;; Others are provided as strings.
  1419. ;; @end enumerate
  1420. ;; The procedure @code{hashify-machine} will convert the cdrs to integers.
  1421. ;; Test: "$abc" => ("$abc" '$abc) '$abc => ('$abc . '$abc) @*
  1422. ;; Note: Adding in $start sym for interactive parser helper.
  1423. ;; @end deffn
  1424. (define (gen-match-table mach)
  1425. (acons 'mtab
  1426. (cons (cons '$start (lalr-start mach))
  1427. (map (lambda (term) (cons term (atomize term)))
  1428. (assq-ref mach 'terminals)))
  1429. mach))
  1430. ;; @deffn {Procedure} add-recovery-logic! mach => mach
  1431. ;; Target of transition from @code{'$error} should have a default rule that
  1432. ;; loops back.
  1433. ;; @end deffn
  1434. (define (add-recovery-logic-1 mach)
  1435. (let* ((kis-v (assq-ref mach 'kis-v))
  1436. (rhs-v (assq-ref mach 'rhs-v))
  1437. (pat-v (assq-ref mach 'pat-v))
  1438. (n (vector-length pat-v)))
  1439. (vector-for-each
  1440. (lambda (kx kis)
  1441. ;;(fmtout "kis=~S\n " kis)
  1442. (for-each
  1443. (lambda (ki)
  1444. (let* ((pi (prev-item ki))
  1445. (rhs (vector-ref rhs-v (car pi))))
  1446. (when (and (not (negative? (cdr pi)))
  1447. (eqv? '$error (looking-at pi)))
  1448. (vector-set! pat-v kx
  1449. (append
  1450. (vector-ref pat-v kx)
  1451. `(($default shift . ,kx)))))))
  1452. kis)
  1453. #f)
  1454. kis-v)
  1455. mach))
  1456. (define (add-recovery-logic! mach)
  1457. (let ((prev-core (fluid-ref *lalr-core*)))
  1458. (dynamic-wind
  1459. (lambda () (fluid-set! *lalr-core* (make-core/extras mach)))
  1460. (lambda () (add-recovery-logic-1 mach))
  1461. (lambda () (fluid-set! *lalr-core* prev-core)))))
  1462. ;; to build parser, need:
  1463. ;; pat-v - parse action table
  1464. ;; ref-v - references
  1465. ;; len-v - rule lengths
  1466. ;; rto-v - hashed lhs symbols (rto = reduce to)
  1467. ;; to print itemsets need:
  1468. ;; lhs-v - left hand sides
  1469. ;; rhs-v - right hand sides
  1470. ;; kis-v - itemsets
  1471. ;; pat-v - action table
  1472. ;; @deffn {Procedure} restart-spec [spec|mach] start => spec
  1473. ;; This generates a new spec with a different start.
  1474. ;; @example
  1475. ;; (restart-spec clang-spec 'expression) => cexpr-spec
  1476. ;; @end example
  1477. ;; @end deffn
  1478. (define (restart-spec spec start)
  1479. (let* ((rhs-v (vector-copy (assq-ref spec 'rhs-v))))
  1480. (vector-set! rhs-v 0 (vector start))
  1481. (cons* (cons 'rhs-v rhs-v)
  1482. (member '(restart-tail . #t) spec))))
  1483. ;; @deffn {Procedure} make-lalr-machine spec => pgen
  1484. ;; Generate a-list of items used for building/debugging parsers.
  1485. ;; It might be useful to add hashify and compact with keyword arguments.
  1486. ;; @end deffn
  1487. (define (make-lalr-machine spec)
  1488. "- Procedure: make-lalr-machine spec => pgen
  1489. Generate a-list of items used for building/debugging parsers. It
  1490. might be useful to add hashify and compact with keyword arguments."
  1491. (if (not spec) (error "make-lalr-machine: expecting valid specification"))
  1492. (let ((prev-core (fluid-ref *lalr-core*)))
  1493. (dynamic-wind
  1494. (lambda () (fluid-set! *lalr-core* (make-core/extras spec)))
  1495. (lambda ()
  1496. (let* ((sm1 (step1 spec))
  1497. (sm2 (step2 sm1))
  1498. (sm3 (step3 sm2))
  1499. (sm4 (step4 sm3))
  1500. (sm5 (gen-match-table sm4)))
  1501. (cons*
  1502. (cons 'len-v (vector-map (lambda (i v) (vector-length v))
  1503. (assq-ref sm5 'rhs-v)))
  1504. (cons 'rto-v (vector-copy (assq-ref sm5 'lhs-v))) ; "reduce to"
  1505. sm5)))
  1506. (lambda () (fluid-set! *lalr-core* prev-core)))))
  1507. ;; for debugging
  1508. (define (make-LR0-machine spec)
  1509. (if (not spec) (error "make-LR0-machine: expecting valid specification"))
  1510. (let ((prev-core (fluid-ref *lalr-core*)))
  1511. (dynamic-wind
  1512. (lambda () (fluid-set! *lalr-core* (make-core/extras spec)))
  1513. (lambda () (step1 spec))
  1514. (lambda () (fluid-set! *lalr-core* prev-core)))))
  1515. ;; @deffn {Procedure} with-spec spec proc arg ...
  1516. ;; Execute with spec or mach.
  1517. ;; @end deffn
  1518. (define (with-spec spec proc . args)
  1519. (if (not spec) (error "with-spec: expecting valid specification"))
  1520. (let ((prev-core (fluid-ref *lalr-core*)))
  1521. (dynamic-wind
  1522. (lambda () (fluid-set! *lalr-core* (make-core/extras spec)))
  1523. (lambda () (apply proc args))
  1524. (lambda () (fluid-set! *lalr-core* prev-core)))))
  1525. ;; @deffn {Procedure} lalr-match-table mach => match-table
  1526. ;; Get the match-table
  1527. ;; @end deffn
  1528. (define (lalr-match-table mach)
  1529. (assq-ref mach 'mtab))
  1530. ;; @deffn {Procedure} machine-compacted? mach => #t|#f
  1531. ;; Indicate if the machine has been compacted.
  1532. ;; TODO: needs update to deal with error recovery hooks.
  1533. ;; @end deffn
  1534. (define (machine-compacted? mach)
  1535. ;; Works by searching for $default phony-token.
  1536. (call-with-prompt 'got-it
  1537. ;; Search for '$default. If not found return #f.
  1538. (lambda ()
  1539. (vector-for-each
  1540. (lambda (ix pat)
  1541. (for-each
  1542. (lambda (a) (if (or (eqv? (car a) '$default) (eqv? (car a) 1))
  1543. (abort-to-prompt 'got-it)))
  1544. pat))
  1545. (assq-ref mach 'pat-v))
  1546. #f)
  1547. ;; otherwise, return #t.
  1548. (lambda () #t)))
  1549. ;; @deffn {Procedure} compact-machine mach [#:keep 0] [#:keepers '()] => mach
  1550. ;; A "filter" to compact the parse table. For each state this will replace
  1551. ;; the most populus set of reductions of the same production rule with a
  1552. ;; default production. However, reductions triggered by @var{keepers} and
  1553. ;; the required keeper -- @code{'$error} -- are not counted. The keepers
  1554. ;; can then be trapped by the parser (e.g., to skip un-accounted comments).
  1555. ;; @end deffn
  1556. (define* (compact-machine mach #:key (keep 0) (keepers '()))
  1557. (if (< keep 0) (error "expecting keep > 0"))
  1558. (let* ((pat-v (assq-ref mach 'pat-v))
  1559. (nst (vector-length pat-v))
  1560. (hashed (number? (caar (vector-ref pat-v 0)))) ; hashified?
  1561. (reduce? (if hashed
  1562. (lambda (a) (and (number? a) (negative? a)))
  1563. (lambda (a) (eq? 'reduce (car a)))))
  1564. (reduce-pr (if hashed abs cdr))
  1565. (reduce-to? (if hashed
  1566. (lambda (a r) (eqv? (- r) a))
  1567. (lambda (a r) (and (eq? 'reduce (car a))
  1568. (eqv? r (cdr a))))))
  1569. (mk-default (if hashed
  1570. (lambda (r) (cons $default (- r)))
  1571. (lambda (r) `($default reduce . ,r))))
  1572. (mtab (assq-ref mach 'mtab))
  1573. (keepers (map (lambda (k) (assq-ref mtab k))
  1574. (cons '$error keepers))))
  1575. ;; Keep an a-list mapping reduction prod-rule => count.
  1576. (let iter ((sx nst) (trn-l #f) (cnt-al '()) (p-max '(0 . 0)))
  1577. (cond
  1578. ((pair? trn-l)
  1579. (cond
  1580. ((not (reduce? (cdar trn-l)))
  1581. ;; A shift, so not a candidate for default reduction.
  1582. (iter sx (cdr trn-l) cnt-al p-max))
  1583. ((memq (caar trn-l) keepers)
  1584. ;; Don't consider keepers because these will not be included.
  1585. (iter sx (cdr trn-l) cnt-al p-max))
  1586. (else
  1587. ;; A reduction, so update the count for reducing this prod-rule.
  1588. (let* ((ix (reduce-pr (cdar trn-l)))
  1589. (cnt (1+ (or (assq-ref cnt-al ix) 0)))
  1590. (cnt-p (cons ix cnt)))
  1591. (iter sx (cdr trn-l) (cons cnt-p cnt-al)
  1592. (if (> cnt (cdr p-max)) cnt-p p-max))))))
  1593. ((null? trn-l)
  1594. ;; We have processed all transitions. If more than @code{keep} common
  1595. ;; reductions then generate default rule to replace those.
  1596. (if (> (cdr p-max) keep)
  1597. (vector-set!
  1598. pat-v sx
  1599. (fold-right
  1600. (lambda (trn pat) ;; transition action
  1601. ;; If not a comment and reduces to the most-popular prod-rule
  1602. ;; then transfer to the default transition.
  1603. (if (and (not (memq (car trn) keepers))
  1604. (reduce-to? (cdr trn) (car p-max)))
  1605. pat
  1606. (cons trn pat)))
  1607. (list (mk-default (car p-max))) ;; default is last
  1608. (vector-ref pat-v sx))))
  1609. (iter sx #f #f #f))
  1610. ((positive? sx) ;; next state
  1611. (iter (1- sx) (vector-ref pat-v (1- sx)) '() '(0 . 0)))))
  1612. mach))
  1613. ;;.@section Using hash tables
  1614. ;; The lexical analyzer will generate tokens. The parser generates state
  1615. ;; transitions based on these tokens. When we build a lexical analyzer
  1616. ;; (via @code{make-lexer}) we provide a list of strings to detect along with
  1617. ;; associated tokens to return to the parser. By default the tokens returned
  1618. ;; are symbols or characters. But these could as well be integers. Also,
  1619. ;; the parser uses symbols to represent non-terminals, which are also used
  1620. ;; to trigger state transitions. We could use integers instead of symbols
  1621. ;; and characters by mapping via a hash table. We will bla bla bla.
  1622. ;; There are also standard tokens we need to worry about. These are
  1623. ;; @enumerate
  1624. ;; @item the @code{$end} marker
  1625. ;; @item identifiers (using the symbolic token @code{$ident}
  1626. ;; @item non-negative integers (using the symbolic token @code{$fixed})
  1627. ;; @item non-negative floats (using the symbolic token @code{$float})
  1628. ;; @item @code{$default} => 0
  1629. ;; @end enumerate
  1630. ;; And action
  1631. ;; @enumerate
  1632. ;; @item positive => shift
  1633. ;; @item negative => reduce
  1634. ;; @item zero => accept
  1635. ;; @end enumerate
  1636. ;; However, if these are used they should appear in the spec's terminal list.
  1637. ;; For the hash table we use positive integers for terminals and negative
  1638. ;; integers for non-terminals. To apply such a hash table we need to:
  1639. ;; @enumerate
  1640. ;; @item from the spec's list of terminals (aka tokens), generate a list of
  1641. ;; terminal to integer pairs (and vice versa)
  1642. ;; @item from the spec's list of non-terminals generate a list of symbols
  1643. ;; to integers and vice versa.
  1644. ;; @item Go through the parser-action table and convert symbols and characters
  1645. ;; to integers
  1646. ;; @item Go through the XXX list passed to the lexical analyizer and replace
  1647. ;; symbols and characters with integers.
  1648. ;; @end enumerate
  1649. ;; One issue we need to deal with is separating out the identifier-like
  1650. ;; terminals (aka keywords) from those that are not identifier-like. I guess
  1651. ;; this should be done as part of @code{make-lexer}, by filtering the token
  1652. ;; list through the ident-reader.
  1653. ;; NOTE: The parser is hardcoded to assume that the phony token for the
  1654. ;; default (reduce) action is @code{'$default} for unhashed machine or
  1655. ;; @code{-1} for a hashed machine. In addition, we use @code{-2} for
  1656. ;; @code{$end}.
  1657. ;; NEW: need to add reduction of ERROR
  1658. ;; @deffn {Procedure} machine-hashed? mach => #t|#f
  1659. ;; Indicate if the machine has been hashed.
  1660. ;; @end deffn
  1661. (define (machine-hashed? mach)
  1662. ;; If hashed, the parse action for rule 0 will always be a number.
  1663. (number? (caar (vector-ref (assq-ref mach 'pat-v) 0))))
  1664. ;; @deffn {Procedure} hashify-machine mach => mach
  1665. ;; @end deffn
  1666. (define (hashify-machine mach)
  1667. (if (machine-hashed? mach) mach
  1668. (let* ((terminals (assq-ref mach 'terminals))
  1669. (non-terms (assq-ref mach 'non-terms))
  1670. (lhs-v (assq-ref mach 'lhs-v))
  1671. (sm ;; = (cons sym->int int->sym)
  1672. (let iter ((si (list (cons '$default $default)
  1673. (cons '$error $error)))
  1674. (is (list (cons $default '$default)
  1675. (cons '$error $error)))
  1676. (ix (1+ (max $default $error 0)))
  1677. (tl terminals) (nl non-terms))
  1678. (if (null? nl) (cons (reverse si) (reverse is))
  1679. (let* ((s (atomize (if (pair? tl) (car tl) (car nl))))
  1680. (tl1 (if (pair? tl) (cdr tl) tl))
  1681. (nl1 (if (pair? tl) nl (cdr nl))))
  1682. (iter (acons s ix si) (acons ix s is) (1+ ix) tl1 nl1)))))
  1683. (sym->int (lambda (s) (assq-ref (car sm) s)))
  1684. ;;
  1685. (pat-v0 (assq-ref mach 'pat-v))
  1686. (npat (vector-length pat-v0))
  1687. (pat-v1 (make-vector npat '())))
  1688. ;; replace symbol/chars with integers
  1689. (let iter1 ((ix 0))
  1690. (unless (= ix npat)
  1691. (let iter2 ((al1 '()) (al0 (vector-ref pat-v0 ix)))
  1692. (if (null? al0) (vector-set! pat-v1 ix (reverse al1))
  1693. (let* ((a0 (car al0))
  1694. ;; tk: token; ac: action; ds: destination
  1695. (tk (car a0)) (ac (cadr a0)) (ds (cddr a0))
  1696. ;; t: encoded token; d: encoded destination
  1697. (t (sym->int tk))
  1698. (d (case ac
  1699. ((shift) ds) ((reduce) (- ds))
  1700. ((accept) 0) (else #f))))
  1701. ;; If a rule is not used then ??? and then what? 180901
  1702. ;;(cond
  1703. ;; (t (iter2 (acons t d al1) (cdr al0)))
  1704. ;; (else (iter2 (acons 0 0 al1) (cdr al0)))))))
  1705. (unless t
  1706. (fmterr "~S ~S ~S\n" tk ac ds)
  1707. (error "expect something"))
  1708. (iter2 (acons t d al1) (cdr al0)))))
  1709. (iter1 (1+ ix))))
  1710. ;;
  1711. (cons*
  1712. (cons 'pat-v pat-v1)
  1713. (cons 'siis sm) ;; sm = (cons sym->int int->sym)
  1714. (cons 'mtab
  1715. (let iter ((mt1 '()) (mt0 (assq-ref mach 'mtab)))
  1716. (if (null? mt0) (reverse mt1)
  1717. (iter (cons (cons (caar mt0) (sym->int (cdar mt0))) mt1)
  1718. (cdr mt0)))))
  1719. ;; reduction symbols = lhs:
  1720. (cons 'rto-v (vector-map (lambda (i v) (sym->int v)) lhs-v))
  1721. mach))))
  1722. ;; === grammar/machine printing ======
  1723. ;; @deffn {Procedure} elt->str elt terms => string
  1724. ;; @end deffn
  1725. (define (elt->str elt terms)
  1726. (or (and=> (find-terminal elt terms) obj->str)
  1727. (symbol->string elt)))
  1728. ;; @deffn {Procedure} pp-rule indent gx [port]
  1729. ;; Pretty-print a production rule.
  1730. ;; @end deffn
  1731. (define (pp-rule il gx . rest)
  1732. (let* ((port (if (pair? rest) (car rest) (current-output-port)))
  1733. (core (fluid-ref *lalr-core*))
  1734. (lhs (vector-ref (core-lhs-v core) gx))
  1735. (rhs (vector-ref (core-rhs-v core) gx))
  1736. (tl (core-terminals core)))
  1737. (display (substring " " 0 (min il 20)) port)
  1738. (fmt port "~A =>" lhs)
  1739. (vector-for-each (lambda (ix e) (fmt port " ~A" (elt->str e tl))) rhs)
  1740. (newline port)))
  1741. ;; @deffn {Procedure} pp-item item => string
  1742. ;; This could be called item->string.
  1743. ;; This needs terminals to work correctly, like pp-lalr-grammar.
  1744. ;; @end deffn
  1745. (define (pp-item item)
  1746. (let* ((core (fluid-ref *lalr-core*))
  1747. (tl (core-terminals core))
  1748. (gx (car item))
  1749. (lhs (vector-ref (core-lhs-v core) gx))
  1750. (rhs (vector-ref (core-rhs-v core) gx))
  1751. (rhs-len (vector-length rhs)))
  1752. (apply
  1753. string-append
  1754. (let iter ((rx 0) (sl (list (fmtstr "~S =>" lhs))))
  1755. (if (= rx rhs-len)
  1756. (append sl (if (= -1 (cdr item)) '(" .") '()))
  1757. (iter (1+ rx)
  1758. (append
  1759. sl (if (= rx (cdr item)) '(" .") '())
  1760. (let ((e (vector-ref rhs rx)))
  1761. (list (string-append " " (elt->str e tl)))))))))))
  1762. ;; @deffn {Procedure} pp-lalr-notice spec [port]
  1763. ;; This prints the text in the @code{notice} construct of a language spec.
  1764. ;; @end deffn
  1765. (define (pp-lalr-notice spec . rest)
  1766. "- Procedure: pp-lalr-notice spec [port]
  1767. This prints the text in the 'notice' construct of a language spec."
  1768. (let* ((port (if (pair? rest) (car rest) (current-output-port)))
  1769. (notice (assq-ref (assq-ref spec 'attr) 'notice))
  1770. (lines (if notice (string-split notice #\newline) '())))
  1771. (for-each (lambda (l) (simple-format port " ~A\n" l)) lines)
  1772. (newline)))
  1773. ;; @deffn {Procedure} pp-lalr-grammar spec [port]
  1774. ;; Pretty-print the grammar to the specified port, or current output.
  1775. ;; @end deffn
  1776. (define (pp-lalr-grammar spec . rest)
  1777. "- Procedure: pp-lalr-grammar spec [port]
  1778. Pretty-print the grammar to the specified port, or current output."
  1779. (let* ((port (if (pair? rest) (car rest) (current-output-port)))
  1780. (lhs-v (assq-ref spec 'lhs-v))
  1781. (rhs-v (assq-ref spec 'rhs-v))
  1782. (nrule (vector-length lhs-v))
  1783. (act-v (assq-ref spec 'act-v))
  1784. ;;(prp-v (assq-ref mach 'prp-v)) ; per-rule precedence
  1785. (terms (assq-ref spec 'terminals))
  1786. (prev-core (fluid-ref *lalr-core*)))
  1787. (fluid-set! *lalr-core* (make-core spec)) ; OR dynamic-wind ???
  1788. ;; Print out the grammar.
  1789. (do ((i 0 (1+ i))) ((= i nrule))
  1790. (let* ((lhs (vector-ref lhs-v i)) (rhs (vector-ref rhs-v i)))
  1791. (if #f
  1792. (pp-rule 0 i)
  1793. (begin
  1794. (fmt port "~A ~A =>" i lhs)
  1795. (vector-for-each
  1796. (lambda (ix e) (fmt port " ~A" (elt->str e terms)))
  1797. rhs)
  1798. ;;(fmt port "\t~S" (vector-ref act-v i))
  1799. (newline port)))))
  1800. (newline port)
  1801. (fluid-set! *lalr-core* prev-core)))
  1802. ;; @deffn {Procedure} pp-lalr-machine mach [port]
  1803. ;; Print the states of the parser with items and shift/reduce actions.
  1804. ;; @end deffn
  1805. (define (pp-lalr-machine mach . rest)
  1806. "- Procedure: pp-lalr-machine mach [port]
  1807. Print the states of the parser with items and shift/reduce actions."
  1808. (let* ((port (if (pair? rest) (car rest) (current-output-port)))
  1809. (lhs-v (assq-ref mach 'lhs-v))
  1810. (rhs-v (assq-ref mach 'rhs-v))
  1811. (nrule (vector-length lhs-v))
  1812. (pat-v (assq-ref mach 'pat-v))
  1813. (rat-v (assq-ref mach 'rat-v))
  1814. (kis-v (assq-ref mach 'kis-v))
  1815. (kit-v (assq-ref mach 'kit-v))
  1816. (nst (vector-length kis-v)) ; number of states
  1817. (i->s (or (and=> (assq-ref mach 'siis) cdr) '()))
  1818. (terms (assq-ref mach 'terminals))
  1819. (prev-core (fluid-ref *lalr-core*)))
  1820. (fluid-set! *lalr-core* (make-core mach))
  1821. ;; Print out the itemsets and shift reduce actions.
  1822. (do ((i 0 (1+ i))) ((= i nst))
  1823. (let* ((state (vector-ref kis-v i))
  1824. (pat (vector-ref pat-v i))
  1825. (rat (if rat-v (vector-ref rat-v i) '())))
  1826. (fmt port "~A:" i) ; itemset index (aka state index)
  1827. (for-each
  1828. (lambda (k-item)
  1829. (for-each ; item, print it
  1830. (lambda (item)
  1831. (fmt port "\t~A" (pp-item item))
  1832. ;; show lookaheads:
  1833. (if (and #f (negative? (cdr item)) kit-v (equal? item k-item))
  1834. (fmt port " ~A"
  1835. (map (lambda (tok) (elt->str tok terms))
  1836. (assoc-ref (vector-ref kit-v i) k-item))))
  1837. (fmt port "\n"))
  1838. (expand-k-item k-item)))
  1839. state)
  1840. (for-each ; action, print it
  1841. (lambda (act)
  1842. (if (pair? (cdr act))
  1843. (let ((sy (car act)) (pa (cadr act)) (gt (cddr act)))
  1844. (case pa
  1845. ((srconf)
  1846. (fmt port "\t\t~A => CONFLICT: shift ~A, reduce ~A\n"
  1847. (elt->str sy terms) (car gt) (cdr gt)))
  1848. ((rrconf)
  1849. (fmt port "\t\t~A => CONFLICT: reduce ~A\n"
  1850. (elt->str sy terms)
  1851. (string-join (map number->string gt) ", reduce ")))
  1852. (else
  1853. (fmt port "\t\t~A => ~A ~A\n" (elt->str sy terms) pa gt))))
  1854. (let* ((sy (car act)) (p (cdr act))
  1855. (pa (cond ((eq? #f p) 'CONFLICT)
  1856. ((positive? p) 'shift)
  1857. ((negative? p) 'reduce)
  1858. (else 'accept)))
  1859. (gt (if p (abs p) "")))
  1860. (fmt port "\t\t~A => ~A ~A\n"
  1861. (elt->str (assq-ref i->s sy) terms)
  1862. pa gt))))
  1863. pat)
  1864. (for-each ; action, print it
  1865. (lambda (ra)
  1866. ;; FIX: indicate if precedence removed by user rule or default
  1867. (fmt port "\t\t[~A => ~A ~A] REMOVED by ~A\n"
  1868. (elt->str (caar ra) terms) (cadar ra) (cddar ra)
  1869. (case (cdr ra)
  1870. ((pre) "precedence")
  1871. ((ass) "associativity")
  1872. ((def) "default shift")
  1873. (else (cdr ra)))))
  1874. rat)
  1875. (newline)))
  1876. (fluid-set! *lalr-core* prev-core)
  1877. (values)))
  1878. ;; === output routines ===============
  1879. (define (write-notice mach port)
  1880. (let* ((comm-leader ";; ")
  1881. (notice (assq-ref (assq-ref mach 'attr) 'notice))
  1882. (lines (if notice (string-split notice #\newline) '())))
  1883. (for-each
  1884. (lambda (l) (fmt port "~A~A\n" comm-leader l))
  1885. lines)
  1886. (if (pair? lines) (newline port))))
  1887. (define (drop-dot-new filename)
  1888. (if (string-suffix? ".new" filename)
  1889. (string-drop-right filename 4)
  1890. filename))
  1891. ;; @deffn {Procedure} write-lalr-tables mach filename [optons]
  1892. ;; Options are
  1893. ;; @table code
  1894. ;; @item #:prefix prefix-string
  1895. ;; The prefix for generating table names. The default is @code{""}.
  1896. ;; @item #:lang output-lang-symbol
  1897. ;; This specifies the output language. Currently only the default
  1898. ;; @code{'scheme} is supported.
  1899. ;; @end table
  1900. ;; @noindent
  1901. ;; For example,
  1902. ;; @example
  1903. ;; write-lalr-tables mach "tables.scm"
  1904. ;; write-lalr-tables mach "tables.tcl" #:lang 'tcl
  1905. ;; @end example
  1906. ;; @end deffn
  1907. (define* (write-lalr-tables mach filename #:key (lang 'scheme) (prefix ""))
  1908. (define (write-table mach name port)
  1909. (let ((sexp (assq-ref mach name)))
  1910. (if (pair? sexp)
  1911. (fmt port "(define ~A~A\n '" prefix name)
  1912. (fmt port "(define ~A~A\n " prefix name))
  1913. (ugly-print (assq-ref mach name) port
  1914. #:per-line-prefix " " #:trim-ends #t)
  1915. ;;(ugly-print (assq-ref mach name) port)
  1916. (fmt port ")\n\n")))
  1917. (call-with-output-file filename
  1918. (lambda (port)
  1919. (fmt port ";; ~A\n\n" (drop-dot-new filename))
  1920. (write-notice mach port)
  1921. (write-table mach 'len-v port)
  1922. (write-table mach 'pat-v port)
  1923. (write-table mach 'rto-v port)
  1924. (write-table mach 'mtab port)
  1925. ;; generate alist
  1926. (fmt port "(define ~Atables\n (list\n" prefix)
  1927. (fmt port " (cons 'len-v ~Alen-v)\n" prefix)
  1928. (fmt port " (cons 'pat-v ~Apat-v)\n" prefix)
  1929. (fmt port " (cons 'rto-v ~Arto-v)\n" prefix)
  1930. (fmt port " (cons 'mtab ~Amtab)))\n\n" prefix)
  1931. (display ";;; end tables" port)
  1932. (newline port))))
  1933. ;; @deffn {Procedure} write-lalr-actions mach filename [#:lang output-lang]
  1934. ;; For example,
  1935. ;; @example
  1936. ;; write-lalr-actions mach "actions.scm"
  1937. ;; write-lalr-actions mach "actions.tcl" #:lang 'tcl
  1938. ;; @end example
  1939. ;; @end deffn
  1940. (define* (write-lalr-actions mach filename #:key (lang 'scheme) (prefix ""))
  1941. (define (pp-rule/ts gx)
  1942. (let* ((core (fluid-ref *lalr-core*))
  1943. (lhs (vector-ref (core-lhs-v core) gx))
  1944. (rhs (vector-ref (core-rhs-v core) gx))
  1945. (tl (core-terminals core))
  1946. (line (string-append
  1947. (symbol->string lhs) " => "
  1948. (string-join
  1949. (map (lambda (elt) (elt->str elt tl))
  1950. (vector->list rhs))
  1951. " "))))
  1952. (if (> (string-length line) 72)
  1953. (string-append (substring/shared line 0 69) "...")
  1954. line)))
  1955. (define (NEW-pp-rule/ts gx)
  1956. ;; TBD: use start for zeroth rule
  1957. (let* ((core (fluid-ref *lalr-core*))
  1958. (lhs (vector-ref (core-lhs-v core) gx))
  1959. (rhs (vector-ref (core-rhs-v core) gx))
  1960. (tl (core-terminals core))
  1961. (line (string-append
  1962. (symbol->string lhs) " => "
  1963. (string-join
  1964. (map (lambda (elt) (elt->str elt tl))
  1965. (vector->list rhs))
  1966. " "))))
  1967. (if (> (string-length line) 72)
  1968. (string-append (substring/shared line 0 69) "...")
  1969. line)))
  1970. (define (write-actions mach port)
  1971. (with-fluid*
  1972. *lalr-core* (make-core mach)
  1973. (lambda ()
  1974. (fmt port "(define ~Aact-v\n (vector\n" prefix)
  1975. (vector-for-each
  1976. (lambda (gx actn)
  1977. (fmt port " ;; ~A\n" (pp-rule/ts gx))
  1978. (pretty-print (wrap-action actn) port #:per-line-prefix " "))
  1979. (assq-ref mach 'act-v))
  1980. (fmt port " ))\n\n"))))
  1981. (call-with-output-file filename
  1982. (lambda (port)
  1983. (fmt port ";; ~A\n\n" (drop-dot-new filename))
  1984. (write-notice mach port)
  1985. (write-actions mach port)
  1986. (display ";;; end tables" port)
  1987. (newline port))))
  1988. ;;; --- last line ---