123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463 |
- \input texinfo @c -*-texinfo-*-
- @comment %**start of header
- @setfilename bison.info
- @settitle Bison A2.2
- @setchapternewpage odd
- @c SMALL BOOK version
- @c This edition has been formatted so that you can format and print it in
- @c the smallbook format.
- @c @smallbook
- @c next time, consider using @set for edition number, etc...
- @c Set following if you have the new `shorttitlepage' command
- @c @clear shorttitlepage-enabled
- @c @set shorttitlepage-enabled
- @c ISPELL CHECK: done, 14 Jan 1993 --bob
- @c Check COPYRIGHT dates. should be updated in the titlepage, ifinfo
- @c titlepage; should NOT be changed in the GPL. --mew
- @iftex
- @syncodeindex fn cp
- @syncodeindex vr cp
- @syncodeindex tp cp
- @end iftex
- @ifinfo
- @synindex fn cp
- @synindex vr cp
- @synindex tp cp
- @end ifinfo
- @comment %**end of header
- @ifinfo
- This file documents the Bison parser generator.
- Copyright (C) 1988, 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
- Modified (1993) from bison-1.22 by
- Wilfred J. Hansen (wjh+@@cmu.edu),
- Andrew Consortium, Carnegie Mellon University
- CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD
- TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
- AND FITNESS. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE FOR
- ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- Permission is granted to make and distribute verbatim copies of
- this manual provided the copyright notice and this permission notice
- are preserved on all copies.
- @ignore
- Permission is granted to process this file through Tex and print the
- results, provided the printed document carries copying permission
- notice identical to this one except for the removal of this paragraph
- (this paragraph not being relevant to the printed manual).
- @end ignore
- Permission is granted to copy and distribute modified versions of this
- manual under the conditions for verbatim copying, provided also that the
- sections entitled ``GNU General Public License'' and ``Conditions for
- Using Bison'' are included exactly as in the original, and provided that
- the entire resulting derived work is distributed under the terms of a
- permission notice identical to this one.
- Permission is granted to copy and distribute translations of this manual
- into another language, under the above conditions for modified versions,
- except that the sections entitled ``GNU General Public License'',
- ``Conditions for Using Bison'' and this permission notice may be
- included in translations approved by the Free Software Foundation
- instead of in the original English.
- @end ifinfo
- @ifset shorttitlepage-enabled
- @shorttitlepage Bison
- @end ifset
- @titlepage
- @title Bison
- @subtitle The YACC-compatible Parser Generator
- @subtitle April 1993, Bison Version A2.2
- @author by Charles Donnelly and Richard Stallman
- Modified (1993) from bison-1.22 by
- Wilfred J. Hansen (wjh+@@cmu.edu),
- Andrew Consortium, Carnegie Mellon University
- @page
- @vskip 0pt plus 1filll
- Copyright @copyright{} 1988, 1989, 1990, 1991, 1992 Free Software
- Foundation
- @sp 2
- The version corresponding to Bison 1.22 is @*
- Published by the Free Software Foundation @*
- 675 Massachusetts Avenue @*
- Cambridge, MA 02139 USA @*
- Printed copies are available for $15 each.@*
- ISBN-1-882114-30-2
- Permission is granted to make and distribute verbatim copies of
- this manual provided the copyright notice and this permission notice
- are preserved on all copies.
- @ignore
- Permission is granted to process this file through TeX and print the
- results, provided the printed document carries copying permission
- notice identical to this one except for the removal of this paragraph
- (this paragraph not being relevant to the printed manual).
- @end ignore
- Permission is granted to copy and distribute modified versions of this
- manual under the conditions for verbatim copying, provided also that the
- sections entitled ``GNU General Public License'' and ``Conditions for
- Using Bison'' are included exactly as in the original, and provided that
- the entire resulting derived work is distributed under the terms of a
- permission notice identical to this one.
- Permission is granted to copy and distribute translations of this manual
- into another language, under the above conditions for modified versions,
- except that the sections entitled ``GNU General Public License'',
- ``Conditions for Using Bison'' and this permission notice may be
- included in translations approved by the Free Software Foundation
- instead of in the original English.
- @sp 2
- Cover art by Etienne Suvasa.
- @end titlepage
- @page
- @node Top, Introduction, (dir), (dir)
- @ifinfo
- This manual documents version A2.2 of Bison
- @end ifinfo
- @menu
- * Introduction::
- * Conditions::
- * Copying:: The GNU General Public License says
- how you can copy and share Bison
- Tutorial sections:
- * Concepts:: Basic concepts for understanding Bison.
- * Examples:: Three simple explained examples of using Bison.
- Reference sections:
- * Grammar File:: Writing Bison declarations and rules.
- * Interface:: C-language interface to the parser function @code{yyparse}.
- * Algorithm:: How the Bison parser works at run-time.
- * Error Recovery:: Writing rules for error recovery.
- * Context Dependency:: What to do if your language syntax is too
- messy for Bison to handle straightforwardly.
- * Debugging:: Debugging Bison parsers that parse wrong.
- * Invocation:: How to run Bison (to produce the parser source file).
- * Table of Symbols:: All the keywords of the Bison language are explained.
- * Parser Symbols:: Symbols used by the parser.
- * Glossary:: Basic concepts are explained.
- * Index:: Cross-references to the text.
- --- The Detailed Node Listing ---
- The Concepts of Bison
- * Language and Grammar:: Languages and context-free grammars,
- as mathematical ideas.
- * Grammar in Bison:: How we represent grammars for Bison's sake.
- * Semantic Values:: Each token or syntactic grouping can have
- a semantic value (the value of an integer,
- the name of an identifier, etc.).
- * Semantic Actions:: Each rule can have an action containing C code.
- * Bison Parser:: What are Bison's input and output,
- how is the output used?
- * Stages:: Stages in writing and running Bison grammars.
- * Grammar Layout:: Overall structure of a Bison grammar file.
- Examples
- * RPN Calc:: Reverse polish notation calculator;
- a first example with no operator precedence.
- * Infix Calc:: Infix (algebraic) notation calculator.
- Operator precedence is introduced.
- * Simple Error Recovery:: Continuing after syntax errors.
- * Multi-function Calc:: Calculator with memory and trig functions.
- It uses multiple data-types for semantic values.
- * Exercises:: Ideas for improving the multi-function calculator.
- Reverse Polish Notation Calculator
- * Decls: Rpcalc Decls. Bison and C declarations for rpcalc.
- * Rules: Rpcalc Rules. Grammar Rules for rpcalc, with explanation.
- * Lexer: Rpcalc Lexer. The lexical analyzer.
- * Main: Rpcalc Main. The controlling function.
- * Error: Rpcalc Error. The error reporting function.
- * Gen: Rpcalc Gen. Running Bison on the grammar file.
- * Comp: Rpcalc Compile. Run the C compiler on the output code.
- Grammar Rules for @code{rpcalc}
- * Rpcalc Input::
- * Rpcalc Line::
- * Rpcalc Expr::
- Multi-Function Calculator: @code{mfcalc}
- * Decl: Mfcalc Decl. Bison declarations for multi-function calculator.
- * Rules: Mfcalc Rules. Grammar rules for the calculator.
- * Symtab: Mfcalc Symtab. Symbol table management subroutines.
- Bison Grammar Files
- * Grammar Outline:: Overall layout of the grammar file.
- * Symbols:: Terminal and nonterminal symbols.
- * Rules:: How to write grammar rules.
- * Recursion:: Writing recursive rules.
- * Semantics:: Semantic values and actions.
- * Declarations:: All kinds of Bison declarations are described here.
- * Multiple Parsers:: Putting more than one Bison parser in one program.
- Outline of a Bison Grammar
- * C Declarations:: Syntax and usage of the C declarations section.
- * Bison Declarations:: Syntax and usage of the Bison declarations section.
- * Grammar Rules:: Syntax and usage of the grammar rules section.
- * C Code:: Syntax and usage of the additional C code section.
- Defining Language Semantics
- * Value Type:: Specifying one data type for all semantic values.
- * Multiple Types:: Specifying several alternative data types.
- * Actions:: An action is the semantic definition of a grammar rule.
- * Action Types:: Specifying data types for actions to operate on.
- * Mid-Rule Actions:: Most actions go at the end of a rule.
- This says when, why and how to use the exceptional
- action in the middle of a rule.
- Bison Declarations
- * Token Decl:: Declaring terminal symbols.
- * Precedence Decl:: Declaring terminals with precedence and associativity.
- * Union Decl:: Declaring the set of all semantic value types.
- * Type Decl:: Declaring the choice of type for a nonterminal symbol.
- * Expect Decl:: Suppressing warnings about shift/reduce conflicts.
- * Start Decl:: Specifying the start symbol.
- * Pure Decl:: Requesting a reentrant parser.
- * Decl Summary:: Table of all Bison declarations.
- Parser C-Language Interface
- * Parser Function:: How to call @code{yyparse} and what it returns.
- * Lexical:: You must supply a function @code{yylex}
- which reads tokens.
- * Error Reporting:: You must supply a function @code{yyerror}.
- * Action Features:: Special features for use in actions.
- The Lexical Analyzer Function @code{yylex}
- * Calling Convention:: How @code{yyparse} calls @code{yylex}.
- * Token Values:: How @code{yylex} must return the semantic value
- of the token it has read.
- * Token Positions:: How @code{yylex} must return the text position
- (line number, etc.) of the token, if the
- actions want that.
- * Pure Calling:: How the calling convention differs
- in a pure parser (@pxref{Pure Decl, ,A Pure (Reentrant) Parser}).
- The Bison Parser Algorithm
- * Look-Ahead:: Parser looks one token ahead when deciding what to do.
- * Shift/Reduce:: Conflicts: when either shifting or reduction is valid.
- * Precedence:: Operator precedence works by resolving conflicts.
- * Contextual Precedence:: When an operator's precedence depends on context.
- * Parser States:: The parser is a finite-state-machine with stack.
- * Reduce/Reduce:: When two rules are applicable in the same situation.
- * Mystery Conflicts:: Reduce/reduce conflicts that look unjustified.
- * Stack Overflow:: What happens when stack gets full. How to avoid it.
- Operator Precedence
- * Why Precedence:: An example showing why precedence is needed.
- * Using Precedence:: How to specify precedence in Bison grammars.
- * Precedence Examples:: How these features are used in the previous example.
- * How Precedence:: How they work.
- Handling Context Dependencies
- * Semantic Tokens:: Token parsing can depend on the semantic context.
- * Lexical Tie-ins:: Token parsing can depend on the syntactic context.
- * Tie-in Recovery:: Lexical tie-ins have implications for how
- error recovery rules must be written.
- Invoking Bison
- * Bison Options:: All the options described in detail,
- in alphabetical order by short options.
- * Option Cross Key:: Alphabetical list of long options.
- * VMS Invocation:: Bison command syntax on VMS.
- @end menu
- @node Introduction, Conditions, Top, Top
- @unnumbered Introduction
- @cindex introduction
- @dfn{Bison} is a general-purpose parser generator that converts a
- grammar description for an LALR(1) context-free grammar into a C
- program to parse that grammar. Once you are proficient with Bison,
- you may use it to develop a wide range of language parsers, from those
- used in simple desk calculators to complex programming languages.
- Bison is upward compatible with Yacc: all properly-written Yacc grammars
- ought to work with Bison with no change. Anyone familiar with Yacc
- should be able to use Bison with little trouble. You need to be fluent in
- C programming in order to use Bison or to understand this manual.
- We begin with tutorial chapters that explain the basic concepts of using
- Bison and show three explained examples, each building on the last. If you
- don't know Bison or Yacc, start by reading these chapters. Reference
- chapters follow which describe specific aspects of Bison in detail.
- Bison was written primarily by Robert Corbett; Richard Stallman made
- it Yacc-compatible.
- This edition corresponds to Bison version A2.2 as adapted from Bison-1.22
- by Wilfred J. Hansen, Andrew Consortium, Carnegie Mellon University. It
- differs from earlier versions in three major ways:
- @itemize @bullet
- @item
- Errors in the input grammar are not fatal; all errors are
- found in one pass. A complete example and test case is
- the file mess.y in the distribution.
-
- @item
- Tokens may now be specified as multiple-character strings:
- "<=" can appear where formerly would have to be LESSEQ.
- See the section @pxref{Symbols, ,Symbols - Terminal and Nonterminal}.
- @item
- The -n switch produces the parser tables without including
- the parser; a project can now use its own parser
- and avoid the GNU license for the resulting application.
- See the @samp{--no-parser} switch.
- @end itemize
- @node Conditions, Copying, Introduction, Top
- @unnumbered Conditions for Using Bison
- Unless you execute Bison with the @samp{-n} or @samp{--no-parser} switch,
- Bison grammars can be used only in programs that are free software. This
- is in contrast to what happens with the GNU C compiler and the other
- GNU programming tools.
- The reason Bison is special is that the output of the Bison utility---the
- Bison parser file---contains a verbatim copy of a sizable piece of Bison,
- which is the code for the @code{yyparse} function. (The actions from your
- grammar are inserted into this function at one point, but the rest of the
- function is not changed.)
- As a result, the Bison parser file is covered by the same copying
- conditions that cover Bison itself and the rest of the GNU system: any
- program containing it has to be distributed under the standard GNU copying
- conditions.
- Occasionally people who would like to use Bison to develop proprietary
- programs complain about this.
- We don't particularly sympathize with their complaints. The purpose of the
- GNU project is to promote the right to share software and the practice of
- sharing software; it is a means of changing society. The people who
- complain are planning to be uncooperative toward the rest of the world; why
- should they deserve our help in doing so?
- However, it's possible that a change in these conditions might encourage
- computer companies to use and distribute the GNU system. If so, then we
- might decide to change the terms on @code{yyparse} as a matter of the
- strategy of promoting the right to share. Such a change would be
- irrevocable. Since we stand by the copying permissions we have announced,
- we cannot withdraw them once given.
- We mustn't make an irrevocable change hastily. We have to wait until there
- is a complete GNU system and there has been time to learn how this issue
- affects its reception.
- In the meantime, those wishing to utilize bison without utilizing the bison
- parser can write their own parser and utilize only the tables generated
- from the @samp{-n} or @samp{--no-parser} switch.
- @node Copying, Concepts, Conditions, Top
- @unnumbered GNU GENERAL PUBLIC LICENSE
- @center Version 2, June 1991
- @display
- Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
- 675 Mass Ave, Cambridge, MA 02139, USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
- @end display
- @unnumberedsec Preamble
- The licenses for most software are designed to take away your
- freedom to share and change it. By contrast, the GNU General Public
- License is intended to guarantee your freedom to share and change free
- software---to make sure the software is free for all its users. This
- General Public License applies to most of the Free Software
- Foundation's software and to any other program whose authors commit to
- using it. (Some other Free Software Foundation software is covered by
- the GNU Library General Public License instead.) You can apply it to
- your programs, too.
- When we speak of free software, we are referring to freedom, not
- price. Our General Public Licenses are designed to make sure that you
- have the freedom to distribute copies of free software (and charge for
- this service if you wish), that you receive source code or can get it
- if you want it, that you can change the software or use pieces of it
- in new free programs; and that you know you can do these things.
- To protect your rights, we need to make restrictions that forbid
- anyone to deny you these rights or to ask you to surrender the rights.
- These restrictions translate to certain responsibilities for you if you
- distribute copies of the software, or if you modify it.
- For example, if you distribute copies of such a program, whether
- gratis or for a fee, you must give the recipients all the rights that
- you have. You must make sure that they, too, receive or can get the
- source code. And you must show them these terms so they know their
- rights.
- We protect your rights with two steps: (1) copyright the software, and
- (2) offer you this license which gives you legal permission to copy,
- distribute and/or modify the software.
- Also, for each author's protection and ours, we want to make certain
- that everyone understands that there is no warranty for this free
- software. If the software is modified by someone else and passed on, we
- want its recipients to know that what they have is not the original, so
- that any problems introduced by others will not reflect on the original
- authors' reputations.
- Finally, any free program is threatened constantly by software
- patents. We wish to avoid the danger that redistributors of a free
- program will individually obtain patent licenses, in effect making the
- program proprietary. To prevent this, we have made it clear that any
- patent must be licensed for everyone's free use or not licensed at all.
- The precise terms and conditions for copying, distribution and
- modification follow.
- @iftex
- @unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
- @end iftex
- @ifinfo
- @center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
- @end ifinfo
- @enumerate 0
- @item
- This License applies to any program or other work which contains
- a notice placed by the copyright holder saying it may be distributed
- under the terms of this General Public License. The ``Program'', below,
- refers to any such program or work, and a ``work based on the Program''
- means either the Program or any derivative work under copyright law:
- that is to say, a work containing the Program or a portion of it,
- either verbatim or with modifications and/or translated into another
- language. (Hereinafter, translation is included without limitation in
- the term ``modification''.) Each licensee is addressed as ``you''.
- Activities other than copying, distribution and modification are not
- covered by this License; they are outside its scope. The act of
- running the Program is not restricted, and the output from the Program
- is covered only if its contents constitute a work based on the
- Program (independent of having been made by running the Program).
- Whether that is true depends on what the Program does.
- @item
- You may copy and distribute verbatim copies of the Program's
- source code as you receive it, in any medium, provided that you
- conspicuously and appropriately publish on each copy an appropriate
- copyright notice and disclaimer of warranty; keep intact all the
- notices that refer to this License and to the absence of any warranty;
- and give any other recipients of the Program a copy of this License
- along with the Program.
- You may charge a fee for the physical act of transferring a copy, and
- you may at your option offer warranty protection in exchange for a fee.
- @item
- You may modify your copy or copies of the Program or any portion
- of it, thus forming a work based on the Program, and copy and
- distribute such modifications or work under the terms of Section 1
- above, provided that you also meet all of these conditions:
- @enumerate a
- @item
- You must cause the modified files to carry prominent notices
- stating that you changed the files and the date of any change.
- @item
- You must cause any work that you distribute or publish, that in
- whole or in part contains or is derived from the Program or any
- part thereof, to be licensed as a whole at no charge to all third
- parties under the terms of this License.
- @item
- If the modified program normally reads commands interactively
- when run, you must cause it, when started running for such
- interactive use in the most ordinary way, to print or display an
- announcement including an appropriate copyright notice and a
- notice that there is no warranty (or else, saying that you provide
- a warranty) and that users may redistribute the program under
- these conditions, and telling the user how to view a copy of this
- License. (Exception: if the Program itself is interactive but
- does not normally print such an announcement, your work based on
- the Program is not required to print an announcement.)
- @end enumerate
- These requirements apply to the modified work as a whole. If
- identifiable sections of that work are not derived from the Program,
- and can be reasonably considered independent and separate works in
- themselves, then this License, and its terms, do not apply to those
- sections when you distribute them as separate works. But when you
- distribute the same sections as part of a whole which is a work based
- on the Program, the distribution of the whole must be on the terms of
- this License, whose permissions for other licensees extend to the
- entire whole, and thus to each and every part regardless of who wrote it.
- Thus, it is not the intent of this section to claim rights or contest
- your rights to work written entirely by you; rather, the intent is to
- exercise the right to control the distribution of derivative or
- collective works based on the Program.
- In addition, mere aggregation of another work not based on the Program
- with the Program (or with a work based on the Program) on a volume of
- a storage or distribution medium does not bring the other work under
- the scope of this License.
- @item
- You may copy and distribute the Program (or a work based on it,
- under Section 2) in object code or executable form under the terms of
- Sections 1 and 2 above provided that you also do one of the following:
- @enumerate a
- @item
- Accompany it with the complete corresponding machine-readable
- source code, which must be distributed under the terms of Sections
- 1 and 2 above on a medium customarily used for software interchange; or,
- @item
- Accompany it with a written offer, valid for at least three
- years, to give any third party, for a charge no more than your
- cost of physically performing source distribution, a complete
- machine-readable copy of the corresponding source code, to be
- distributed under the terms of Sections 1 and 2 above on a medium
- customarily used for software interchange; or,
- @item
- Accompany it with the information you received as to the offer
- to distribute corresponding source code. (This alternative is
- allowed only for noncommercial distribution and only if you
- received the program in object code or executable form with such
- an offer, in accord with Subsection b above.)
- @end enumerate
- The source code for a work means the preferred form of the work for
- making modifications to it. For an executable work, complete source
- code means all the source code for all modules it contains, plus any
- associated interface definition files, plus the scripts used to
- control compilation and installation of the executable. However, as a
- special exception, the source code distributed need not include
- anything that is normally distributed (in either source or binary
- form) with the major components (compiler, kernel, and so on) of the
- operating system on which the executable runs, unless that component
- itself accompanies the executable.
- If distribution of executable or object code is made by offering
- access to copy from a designated place, then offering equivalent
- access to copy the source code from the same place counts as
- distribution of the source code, even though third parties are not
- compelled to copy the source along with the object code.
- @item
- You may not copy, modify, sublicense, or distribute the Program
- except as expressly provided under this License. Any attempt
- otherwise to copy, modify, sublicense or distribute the Program is
- void, and will automatically terminate your rights under this License.
- However, parties who have received copies, or rights, from you under
- this License will not have their licenses terminated so long as such
- parties remain in full compliance.
- @item
- You are not required to accept this License, since you have not
- signed it. However, nothing else grants you permission to modify or
- distribute the Program or its derivative works. These actions are
- prohibited by law if you do not accept this License. Therefore, by
- modifying or distributing the Program (or any work based on the
- Program), you indicate your acceptance of this License to do so, and
- all its terms and conditions for copying, distributing or modifying
- the Program or works based on it.
- @item
- Each time you redistribute the Program (or any work based on the
- Program), the recipient automatically receives a license from the
- original licensor to copy, distribute or modify the Program subject to
- these terms and conditions. You may not impose any further
- restrictions on the recipients' exercise of the rights granted herein.
- You are not responsible for enforcing compliance by third parties to
- this License.
- @item
- If, as a consequence of a court judgment or allegation of patent
- infringement or for any other reason (not limited to patent issues),
- conditions are imposed on you (whether by court order, agreement or
- otherwise) that contradict the conditions of this License, they do not
- excuse you from the conditions of this License. If you cannot
- distribute so as to satisfy simultaneously your obligations under this
- License and any other pertinent obligations, then as a consequence you
- may not distribute the Program at all. For example, if a patent
- license would not permit royalty-free redistribution of the Program by
- all those who receive copies directly or indirectly through you, then
- the only way you could satisfy both it and this License would be to
- refrain entirely from distribution of the Program.
- If any portion of this section is held invalid or unenforceable under
- any particular circumstance, the balance of the section is intended to
- apply and the section as a whole is intended to apply in other
- circumstances.
- It is not the purpose of this section to induce you to infringe any
- patents or other property right claims or to contest validity of any
- such claims; this section has the sole purpose of protecting the
- integrity of the free software distribution system, which is
- implemented by public license practices. Many people have made
- generous contributions to the wide range of software distributed
- through that system in reliance on consistent application of that
- system; it is up to the author/donor to decide if he or she is willing
- to distribute software through any other system and a licensee cannot
- impose that choice.
- This section is intended to make thoroughly clear what is believed to
- be a consequence of the rest of this License.
- @item
- If the distribution and/or use of the Program is restricted in
- certain countries either by patents or by copyrighted interfaces, the
- original copyright holder who places the Program under this License
- may add an explicit geographical distribution limitation excluding
- those countries, so that distribution is permitted only in or among
- countries not thus excluded. In such case, this License incorporates
- the limitation as if written in the body of this License.
- @item
- The Free Software Foundation may publish revised and/or new versions
- of the General Public License from time to time. Such new versions will
- be similar in spirit to the present version, but may differ in detail to
- address new problems or concerns.
- Each version is given a distinguishing version number. If the Program
- specifies a version number of this License which applies to it and ``any
- later version'', you have the option of following the terms and conditions
- either of that version or of any later version published by the Free
- Software Foundation. If the Program does not specify a version number of
- this License, you may choose any version ever published by the Free Software
- Foundation.
- @item
- If you wish to incorporate parts of the Program into other free
- programs whose distribution conditions are different, write to the author
- to ask for permission. For software which is copyrighted by the Free
- Software Foundation, write to the Free Software Foundation; we sometimes
- make exceptions for this. Our decision will be guided by the two goals
- of preserving the free status of all derivatives of our free software and
- of promoting the sharing and reuse of software generally.
- @iftex
- @heading NO WARRANTY
- @end iftex
- @ifinfo
- @center NO WARRANTY
- @end ifinfo
- @item
- BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
- FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
- OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
- PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
- OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
- TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
- PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
- REPAIR OR CORRECTION.
- @item
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
- WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
- REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
- INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
- OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
- TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
- YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
- PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGES.
- @end enumerate
- @iftex
- @heading END OF TERMS AND CONDITIONS
- @end iftex
- @ifinfo
- @center END OF TERMS AND CONDITIONS
- @end ifinfo
- @page
- @unnumberedsec How to Apply These Terms to Your New Programs
- If you develop a new program, and you want it to be of the greatest
- possible use to the public, the best way to achieve this is to make it
- free software which everyone can redistribute and change under these terms.
- To do so, attach the following notices to the program. It is safest
- to attach them to the start of each source file to most effectively
- convey the exclusion of warranty; and each file should have at least
- the ``copyright'' line and a pointer to where the full notice is found.
- @smallexample
- @var{one line to give the program's name and a brief idea of what it does.}
- Copyright (C) 19@var{yy} @var{name of author}
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- @end smallexample
- Also add information on how to contact you by electronic and paper mail.
- If the program is interactive, make it output a short notice like this
- when it starts in an interactive mode:
- @smallexample
- Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
- Gnomovision comes with ABSOLUTELY NO WARRANTY; for details
- type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
- @end smallexample
- The hypothetical commands @samp{show w} and @samp{show c} should show
- the appropriate parts of the General Public License. Of course, the
- commands you use may be called something other than @samp{show w} and
- @samp{show c}; they could even be mouse-clicks or menu items---whatever
- suits your program.
- You should also get your employer (if you work as a programmer) or your
- school, if any, to sign a ``copyright disclaimer'' for the program, if
- necessary. Here is a sample; alter the names:
- @smallexample
- Yoyodyne, Inc., hereby disclaims all copyright interest in the program
- `Gnomovision' (which makes passes at compilers) written by James Hacker.
- @var{signature of Ty Coon}, 1 April 1989
- Ty Coon, President of Vice
- @end smallexample
- This General Public License does not permit incorporating your program into
- proprietary programs. If your program is a subroutine library, you may
- consider it more useful to permit linking proprietary applications with the
- library. If this is what you want to do, use the GNU Library General
- Public License instead of this License.
- @node Concepts, Examples, Copying, Top
- @chapter The Concepts of Bison
- This chapter introduces many of the basic concepts without which the
- details of Bison will not make sense. If you do not already know how to
- use Bison or Yacc, we suggest you start by reading this chapter carefully.
- @menu
- * Language and Grammar:: Languages and context-free grammars,
- as mathematical ideas.
- * Grammar in Bison:: How we represent grammars for Bison's sake.
- * Semantic Values:: Each token or syntactic grouping can have
- a semantic value (the value of an integer,
- the name of an identifier, etc.).
- * Semantic Actions:: Each rule can have an action containing C code.
- * Bison Parser:: What are Bison's input and output,
- how is the output used?
- * Stages:: Stages in writing and running Bison grammars.
- * Grammar Layout:: Overall structure of a Bison grammar file.
- @end menu
- @node Language and Grammar, Grammar in Bison, , Concepts
- @section Languages and Context-Free Grammars
- @c !!! ``An expression can be an integer'' is not a valid Bison
- @c expression---Bison cannot read English! --rjc 6 Feb 1992
- @cindex context-free grammar
- @cindex grammar, context-free
- In order for Bison to parse a language, it must be described by a
- @dfn{context-free grammar}. This means that you specify one or more
- @dfn{syntactic groupings} and give rules for constructing them from their
- parts. For example, in the C language, one kind of grouping is called an
- `expression'. One rule for making an expression might be, ``An expression
- can be made of a minus sign and another expression''. Another would be,
- ``An expression can be an integer''. As you can see, rules are often
- recursive, but there must be at least one rule which leads out of the
- recursion.
- @cindex BNF
- @cindex Backus-Naur form
- The most common formal system for presenting such rules for humans to read
- is @dfn{Backus-Naur Form} or ``BNF'', which was developed in order to
- specify the language Algol 60. Any grammar expressed in BNF is a
- context-free grammar. The input to Bison is essentially machine-readable
- BNF.
- Not all context-free languages can be handled by Bison, only those
- that are LALR(1). In brief, this means that it must be possible to
- tell how to parse any portion of an input string with just a single
- token of look-ahead. Strictly speaking, that is a description of an
- LR(1) grammar, and LALR(1) involves additional restrictions that are
- hard to explain simply; but it is rare in actual practice to find an
- LR(1) grammar that fails to be LALR(1). @xref{Mystery Conflicts, ,
- Mysterious Reduce/Reduce Conflicts}, for more information on this.
- @cindex symbols (abstract)
- @cindex token
- @cindex syntactic grouping
- @cindex grouping, syntactic
- In the formal grammatical rules for a language, each kind of syntactic unit
- or grouping is named by a @dfn{symbol}. Those which are built by grouping
- smaller constructs according to grammatical rules are called
- @dfn{nonterminal symbols}; those which can't be subdivided are called
- @dfn{terminal symbols} or @dfn{token types}. We call a piece of input
- corresponding to a single terminal symbol a @dfn{token}, and a piece
- corresponding to a single nonterminal symbol a @dfn{grouping}.@refill
- We can use the C language as an example of what symbols, terminal and
- nonterminal, mean. The tokens of C are identifiers, constants (numeric and
- string), and the various keywords, arithmetic operators and punctuation
- marks. So the terminal symbols of a grammar for C include `identifier',
- `number', `string', plus one symbol for each keyword, operator or
- punctuation mark: `if', `return', `const', `static', `int', `char',
- `plus-sign', `open-brace', `close-brace', `comma' and many more. (These
- tokens can be subdivided into characters, but that is a matter of
- lexicography, not grammar.)
- Here is a simple C function subdivided into tokens:
- @example
- int /* @r{keyword `int'} */
- square (x) /* @r{identifier, open-paren,} */
- /* @r{identifier, close-paren} */
- int x; /* @r{keyword `int', identifier, semicolon} */
- @{ /* @r{open-brace} */
- return x * x; /* @r{keyword `return', identifier,} */
- /* @r{asterisk, identifier, semicolon} */
- @} /* @r{close-brace} */
- @end example
- The syntactic groupings of C include the expression, the statement, the
- declaration, and the function definition. These are represented in the
- grammar of C by nonterminal symbols `expression', `statement',
- `declaration' and `function definition'. The full grammar uses dozens of
- additional language constructs, each with its own nonterminal symbol, in
- order to express the meanings of these four. The example above is a
- function definition; it contains one declaration, and one statement. In
- the statement, each @samp{x} is an expression and so is @samp{x * x}.
- Each nonterminal symbol must have grammatical rules showing how it is made
- out of simpler constructs. For example, one kind of C statement is the
- @code{return} statement; this would be described with a grammar rule which
- reads informally as follows:
- @quotation
- A `statement' can be made of a `return' keyword, an `expression' and a
- `semicolon'.
- @end quotation
- @noindent
- There would be many other rules for `statement', one for each kind of
- statement in C.
- @cindex start symbol
- One nonterminal symbol must be distinguished as the special one which
- defines a complete utterance in the language. It is called the @dfn{start
- symbol}. In a compiler, this means a complete input program. In the C
- language, the nonterminal symbol `sequence of definitions and declarations'
- plays this role.
- For example, @samp{1 + 2} is a valid C expression---a valid part of a C
- program---but it is not valid as an @emph{entire} C program. In the
- context-free grammar of C, this follows from the fact that `expression' is
- not the start symbol.
- The Bison parser reads a sequence of tokens as its input, and groups the
- tokens using the grammar rules. If the input is valid, the end result is
- that the entire token sequence reduces to a single grouping whose symbol is
- the grammar's start symbol. If we use a grammar for C, the entire input
- must be a `sequence of definitions and declarations'. If not, the parser
- reports a syntax error.
- @node Grammar in Bison, Semantic Values, Language and Grammar, Concepts
- @section From Formal Rules to Bison Input
- @cindex Bison grammar
- @cindex grammar, Bison
- @cindex formal grammar
- A formal grammar is a mathematical construct. To define the language
- for Bison, you must write a file expressing the grammar in Bison syntax:
- a @dfn{Bison grammar} file. @xref{Grammar File, ,Bison Grammar Files}.
- A nonterminal symbol in the formal grammar is represented in Bison input
- as an identifier, like an identifier in C. By convention, it should be
- in lower case, such as @code{expr}, @code{stmt} or @code{declaration}.
- The Bison representation for a terminal symbol is also called a @dfn{token
- type}. Token types as well can be represented as C-like identifiers. By
- convention, these identifiers should be upper case to distinguish them from
- nonterminals: for example, @code{INTEGER}, @code{IDENTIFIER}, @code{IF} or
- @code{RETURN}. A terminal symbol that stands for a particular keyword in
- the language should be named after that keyword converted to upper case.
- The terminal symbol @code{error} is reserved for error recovery.
- @xref{Symbols, ,Symbols - Terminal and Nonterminal}.@refill
- A terminal symbol can also be represented as a character literal, just like
- a C character constant. You should do this whenever a token is just a
- single character (parenthesis, plus-sign, etc.): use that same character in
- a literal as the terminal symbol for that token.
- (See @pxref{Symbols, ,Symbols - Terminal and Nonterminal} for
- multiple character literal symbols, also called string tokens.)
- The grammar rules also have an expression in Bison syntax. For example,
- here is the Bison rule for a C @code{return} statement. The semicolon in
- quotes is a literal character token, representing part of the C syntax for
- the statement; the naked semicolon, and the colon, are Bison punctuation
- used in every rule.
- @example
- stmt: RETURN expr ';'
- ;
- @end example
- @noindent
- @xref{Rules, ,Syntax of Grammar Rules}.
- @node Semantic Values, Semantic Actions, Grammar in Bison, Concepts
- @section Semantic Values
- @cindex semantic value
- @cindex value, semantic
- A formal grammar selects tokens only by their classifications: for example,
- if a rule mentions the terminal symbol `integer constant', it means that
- @emph{any} integer constant is grammatically valid in that position. The
- precise value of the constant is irrelevant to how to parse the input: if
- @samp{x+4} is grammatical then @samp{x+1} or @samp{x+3989} is equally
- grammatical.@refill
- But the precise value is very important for what the input means once it is
- parsed. A compiler is useless if it fails to distinguish between 4, 1 and
- 3989 as constants in the program! Therefore, each token in a Bison grammar
- has both a token type and a @dfn{semantic value}. @xref{Semantics, ,Defining Language Semantics},
- for details.
- The token type is a terminal symbol defined in the grammar, such as
- @code{INTEGER}, @code{IDENTIFIER} or @code{','}. It tells everything
- you need to know to decide where the token may validly appear and how to
- group it with other tokens. The grammar rules know nothing about tokens
- except their types.@refill
- The semantic value has all the rest of the information about the
- meaning of the token, such as the value of an integer, or the name of an
- identifier. (A token such as @code{','} which is just punctuation doesn't
- need to have any semantic value.)
- For example, an input token might be classified as token type
- @code{INTEGER} and have the semantic value 4. Another input token might
- have the same token type @code{INTEGER} but value 3989. When a grammar
- rule says that @code{INTEGER} is allowed, either of these tokens is
- acceptable because each is an @code{INTEGER}. When the parser accepts the
- token, it keeps track of the token's semantic value.
- Each grouping can also have a semantic value as well as its nonterminal
- symbol. For example, in a calculator, an expression typically has a
- semantic value that is a number. In a compiler for a programming
- language, an expression typically has a semantic value that is a tree
- structure describing the meaning of the expression.
- @node Semantic Actions, Bison Parser, Semantic Values, Concepts
- @section Semantic Actions
- @cindex semantic actions
- @cindex actions, semantic
- In order to be useful, a program must do more than parse input; it must
- also produce some output based on the input. In a Bison grammar, a grammar
- rule can have an @dfn{action} made up of C statements. Each time the
- parser recognizes a match for that rule, the action is executed.
- @xref{Actions}.
-
- Most of the time, the purpose of an action is to compute the semantic value
- of the whole construct from the semantic values of its parts. For example,
- suppose we have a rule which says an expression can be the sum of two
- expressions. When the parser recognizes such a sum, each of the
- subexpressions has a semantic value which describes how it was built up.
- The action for this rule should create a similar sort of value for the
- newly recognized larger expression.
- For example, here is a rule that says an expression can be the sum of
- two subexpressions:
- @example
- expr: expr '+' expr @{ $$ = $1 + $3; @}
- ;
- @end example
- @noindent
- The action says how to produce the semantic value of the sum expression
- from the values of the two subexpressions.
- @node Bison Parser, Stages, Semantic Actions, Concepts
- @section Bison Output: the Parser File
- @cindex Bison parser
- @cindex Bison utility
- @cindex lexical analyzer, purpose
- @cindex parser
- When you run Bison, you give it a Bison grammar file as input. The output
- is a C source file that parses the language described by the grammar.
- This file is called a @dfn{Bison parser}. Keep in mind that the Bison
- utility and the Bison parser are two distinct programs: the Bison utility
- is a program whose output is the Bison parser that becomes part of your
- program.
- The job of the Bison parser is to group tokens into groupings according to
- the grammar rules---for example, to build identifiers and operators into
- expressions. As it does this, it runs the actions for the grammar rules it
- uses.
- The tokens come from a function called the @dfn{lexical analyzer} that you
- must supply in some fashion (such as by writing it in C). The Bison parser
- calls the lexical analyzer each time it wants a new token. It doesn't know
- what is ``inside'' the tokens (though their semantic values may reflect
- this). Typically the lexical analyzer makes the tokens by parsing
- characters of text, but Bison does not depend on this. @xref{Lexical, ,The Lexical Analyzer Function @code{yylex}}.
- The Bison parser file is C code which defines a function named
- @code{yyparse} which implements that grammar. This function does not make
- a complete C program: you must supply some additional functions. One is
- the lexical analyzer. Another is an error-reporting function which the
- parser calls to report an error. In addition, a complete C program must
- start with a function called @code{main}; you have to provide this, and
- arrange for it to call @code{yyparse} or the parser will never run.
- @xref{Interface, ,Parser C-Language Interface}.
- Aside from the token type names and the symbols in the actions you
- write, all variable and function names used in the Bison parser file
- begin with @samp{yy} or @samp{YY}. This includes interface functions
- such as the lexical analyzer function @code{yylex}, the error reporting
- function @code{yyerror} and the parser function @code{yyparse} itself.
- This also includes numerous identifiers used for internal purposes.
- Therefore, you should avoid using C identifiers starting with @samp{yy}
- or @samp{YY} in the Bison grammar file except for the ones defined in
- this manual.
- @node Stages, Grammar Layout, Bison Parser, Concepts
- @section Stages in Using Bison
- @cindex stages in using Bison
- @cindex using Bison
- The actual language-design process using Bison, from grammar specification
- to a working compiler or interpreter, has these parts:
- @enumerate
- @item
- Formally specify the grammar in a form recognized by Bison
- (@pxref{Grammar File, ,Bison Grammar Files}). For each grammatical rule in the language,
- describe the action that is to be taken when an instance of that rule
- is recognized. The action is described by a sequence of C statements.
- @item
- Write a lexical analyzer to process input and pass tokens to the
- parser. The lexical analyzer may be written by hand in C
- (@pxref{Lexical, ,The Lexical Analyzer Function @code{yylex}}). It could also be produced using Lex, but the use
- of Lex is not discussed in this manual.
- @item
- Write a controlling function that calls the Bison-produced parser.
- @item
- Write error-reporting routines.
- @end enumerate
- To turn this source code as written into a runnable program, you
- must follow these steps:
- @enumerate
- @item
- Run Bison on the grammar to produce the parser.
- @item
- Compile the code output by Bison, as well as any other source files.
- @item
- Link the object files to produce the finished product.
- @end enumerate
- @node Grammar Layout, , Stages, Concepts
- @section The Overall Layout of a Bison Grammar
- @cindex grammar file
- @cindex file format
- @cindex format of grammar file
- @cindex layout of Bison grammar
- The input file for the Bison utility is a @dfn{Bison grammar file}. The
- general form of a Bison grammar file is as follows:
- @example
- %@{
- @var{C declarations}
- %@}
- @var{Bison declarations}
- %%
- @var{Grammar rules}
- %%
- @var{Additional C code}
- @end example
- @noindent
- The @samp{%%}, @samp{%@{} and @samp{%@}} are punctuation that appears
- in every Bison grammar file to separate the sections.
- The C declarations may define types and variables used in the actions.
- You can also use preprocessor commands to define macros used there, and use
- @code{#include} to include header files that do any of these things.
- The Bison declarations declare the names of the terminal and nonterminal
- symbols, and may also describe operator precedence and the data types of
- semantic values of various symbols.
- The grammar rules define how to construct each nonterminal symbol from its
- parts.
- The additional C code can contain any C code you want to use. Often the
- definition of the lexical analyzer @code{yylex} goes here, plus subroutines
- called by the actions in the grammar rules. In a simple program, all the
- rest of the program can go here.
- @node Examples, Grammar File, Concepts, Top
- @chapter Examples
- @cindex simple examples
- @cindex examples, simple
- Now we show and explain three sample programs written using Bison: a
- reverse polish notation calculator, an algebraic (infix) notation
- calculator, and a multi-function calculator. All three have been tested
- under BSD Unix 4.3; each produces a usable, though limited, interactive
- desk-top calculator.
- These examples are simple, but Bison grammars for real programming
- languages are written the same way.
- @ifinfo
- You can copy these examples out of the Info file and into a source file
- to try them.
- @end ifinfo
- @menu
- * RPN Calc:: Reverse polish notation calculator;
- a first example with no operator precedence.
- * Infix Calc:: Infix (algebraic) notation calculator.
- Operator precedence is introduced.
- * Simple Error Recovery:: Continuing after syntax errors.
- * Multi-function Calc:: Calculator with memory and trig functions.
- It uses multiple data-types for semantic values.
- * Exercises:: Ideas for improving the multi-function calculator.
- @end menu
- @node RPN Calc, Infix Calc, , Examples
- @section Reverse Polish Notation Calculator
- @cindex reverse polish notation
- @cindex polish notation calculator
- @cindex @code{rpcalc}
- @cindex calculator, simple
- The first example is that of a simple double-precision @dfn{reverse polish
- notation} calculator (a calculator using postfix operators). This example
- provides a good starting point, since operator precedence is not an issue.
- The second example will illustrate how operator precedence is handled.
- The source code for this calculator is named @file{rpcalc.y}. The
- @samp{.y} extension is a convention used for Bison input files.
- @menu
- * Decls: Rpcalc Decls. Bison and C declarations for rpcalc.
- * Rules: Rpcalc Rules. Grammar Rules for rpcalc, with explanation.
- * Lexer: Rpcalc Lexer. The lexical analyzer.
- * Main: Rpcalc Main. The controlling function.
- * Error: Rpcalc Error. The error reporting function.
- * Gen: Rpcalc Gen. Running Bison on the grammar file.
- * Comp: Rpcalc Compile. Run the C compiler on the output code.
- @end menu
- @node Rpcalc Decls, Rpcalc Rules, , RPN Calc
- @subsection Declarations for @code{rpcalc}
- Here are the C and Bison declarations for the reverse polish notation
- calculator. As in C, comments are placed between @samp{/*@dots{}*/}.
- @example
- /* Reverse polish notation calculator. */
- %@{
- #define YYSTYPE double
- #include <math.h>
- %@}
- %token NUM
- %% /* Grammar rules and actions follow */
- @end example
- The C declarations section (@pxref{C Declarations, ,The C Declarations Section}) contains two
- preprocessor directives.
- The @code{#define} directive defines the macro @code{YYSTYPE}, thus
- specifying the C data type for semantic values of both tokens and groupings
- (@pxref{Value Type, ,Data Types of Semantic Values}). The Bison parser will use whatever type
- @code{YYSTYPE} is defined as; if you don't define it, @code{int} is the
- default. Because we specify @code{double}, each token and each expression
- has an associated value, which is a floating point number.
- The @code{#include} directive is used to declare the exponentiation
- function @code{pow}.
- The second section, Bison declarations, provides information to Bison about
- the token types (@pxref{Bison Declarations, ,The Bison Declarations Section}). Each terminal symbol that is
- not a single-character literal must be declared here. (Single-character
- literals normally don't need to be declared.) In this example, all the
- arithmetic operators are designated by single-character literals, so the
- only terminal symbol that needs to be declared is @code{NUM}, the token
- type for numeric constants.
- @node Rpcalc Rules, Rpcalc Lexer, Rpcalc Decls, RPN Calc
- @subsection Grammar Rules for @code{rpcalc}
- Here are the grammar rules for the reverse polish notation calculator.
- @example
- input: /* empty */
- | input line
- ;
- line: '\n'
- | exp '\n' @{ printf ("\t%.10g\n", $1); @}
- ;
- exp: NUM @{ $$ = $1; @}
- | exp exp '+' @{ $$ = $1 + $2; @}
- | exp exp '-' @{ $$ = $1 - $2; @}
- | exp exp '*' @{ $$ = $1 * $2; @}
- | exp exp '/' @{ $$ = $1 / $2; @}
- /* Exponentiation */
- | exp exp '^' @{ $$ = pow ($1, $2); @}
- /* Unary minus */
- | exp 'n' @{ $$ = -$1; @}
- ;
- %%
- @end example
- The groupings of the rpcalc ``language'' defined here are the expression
- (given the name @code{exp}), the line of input (@code{line}), and the
- complete input transcript (@code{input}). Each of these nonterminal
- symbols has several alternate rules, joined by the @samp{|} punctuator
- which is read as ``or''. The following sections explain what these rules
- mean.
- The semantics of the language is determined by the actions taken when a
- grouping is recognized. The actions are the C code that appears inside
- braces. @xref{Actions}.
- You must specify these actions in C, but Bison provides the means for
- passing semantic values between the rules. In each action, the
- pseudo-variable @code{$$} stands for the semantic value for the grouping
- that the rule is going to construct. Assigning a value to @code{$$} is the
- main job of most actions. The semantic values of the components of the
- rule are referred to as @code{$1}, @code{$2}, and so on.
- @menu
- * Rpcalc Input::
- * Rpcalc Line::
- * Rpcalc Expr::
- @end menu
- @node Rpcalc Input, Rpcalc Line, , Rpcalc Rules
- @subsubsection Explanation of @code{input}
- Consider the definition of @code{input}:
- @example
- input: /* empty */
- | input line
- ;
- @end example
- This definition reads as follows: ``A complete input is either an empty
- string, or a complete input followed by an input line''. Notice that
- ``complete input'' is defined in terms of itself. This definition is said
- to be @dfn{left recursive} since @code{input} appears always as the
- leftmost symbol in the sequence. @xref{Recursion, ,Recursive Rules}.
- The first alternative is empty because there are no symbols between the
- colon and the first @samp{|}; this means that @code{input} can match an
- empty string of input (no tokens). We write the rules this way because it
- is legitimate to type @kbd{Ctrl-d} right after you start the calculator.
- It's conventional to put an empty alternative first and write the comment
- @samp{/* empty */} in it.
- The second alternate rule (@code{input line}) handles all nontrivial input.
- It means, ``After reading any number of lines, read one more line if
- possible.'' The left recursion makes this rule into a loop. Since the
- first alternative matches empty input, the loop can be executed zero or
- more times.
- The parser function @code{yyparse} continues to process input until a
- grammatical error is seen or the lexical analyzer says there are no more
- input tokens; we will arrange for the latter to happen at end of file.
- @node Rpcalc Line, Rpcalc Expr, Rpcalc Input, Rpcalc Rules
- @subsubsection Explanation of @code{line}
- Now consider the definition of @code{line}:
- @example
- line: '\n'
- | exp '\n' @{ printf ("\t%.10g\n", $1); @}
- ;
- @end example
- The first alternative is a token which is a newline character; this means
- that rpcalc accepts a blank line (and ignores it, since there is no
- action). The second alternative is an expression followed by a newline.
- This is the alternative that makes rpcalc useful. The semantic value of
- the @code{exp} grouping is the value of @code{$1} because the @code{exp} in
- question is the first symbol in the alternative. The action prints this
- value, which is the result of the computation the user asked for.
- This action is unusual because it does not assign a value to @code{$$}. As
- a consequence, the semantic value associated with the @code{line} is
- uninitialized (its value will be unpredictable). This would be a bug if
- that value were ever used, but we don't use it: once rpcalc has printed the
- value of the user's input line, that value is no longer needed.
- @node Rpcalc Expr, , Rpcalc Line, Rpcalc Rules
- @subsubsection Explanation of @code{expr}
- The @code{exp} grouping has several rules, one for each kind of expression.
- The first rule handles the simplest expressions: those that are just numbers.
- The second handles an addition-expression, which looks like two expressions
- followed by a plus-sign. The third handles subtraction, and so on.
- @example
- exp: NUM
- | exp exp '+' @{ $$ = $1 + $2; @}
- | exp exp '-' @{ $$ = $1 - $2; @}
- @dots{}
- ;
- @end example
- We have used @samp{|} to join all the rules for @code{exp}, but we could
- equally well have written them separately:
- @example
- exp: NUM ;
- exp: exp exp '+' @{ $$ = $1 + $2; @} ;
- exp: exp exp '-' @{ $$ = $1 - $2; @} ;
- @dots{}
- @end example
- Most of the rules have actions that compute the value of the expression in
- terms of the value of its parts. For example, in the rule for addition,
- @code{$1} refers to the first component @code{exp} and @code{$2} refers to
- the second one. The third component, @code{'+'}, has no meaningful
- associated semantic value, but if it had one you could refer to it as
- @code{$3}. When @code{yyparse} recognizes a sum expression using this
- rule, the sum of the two subexpressions' values is produced as the value of
- the entire expression. @xref{Actions}.
- You don't have to give an action for every rule. When a rule has no
- action, Bison by default copies the value of @code{$1} into @code{$$}.
- This is what happens in the first rule (the one that uses @code{NUM}).
- The formatting shown here is the recommended convention, but Bison does
- not require it. You can add or change whitespace as much as you wish.
- For example, this:
- @example
- exp : NUM | exp exp '+' @{$$ = $1 + $2; @} | @dots{}
- @end example
- @noindent
- means the same thing as this:
- @example
- exp: NUM
- | exp exp '+' @{ $$ = $1 + $2; @}
- | @dots{}
- @end example
- @noindent
- The latter, however, is much more readable.
- @node Rpcalc Lexer, Rpcalc Main, Rpcalc Rules, RPN Calc
- @subsection The @code{rpcalc} Lexical Analyzer
- @cindex writing a lexical analyzer
- @cindex lexical analyzer, writing
- The lexical analyzer's job is low-level parsing: converting characters or
- sequences of characters into tokens. The Bison parser gets its tokens by
- calling the lexical analyzer. @xref{Lexical, ,The Lexical Analyzer Function @code{yylex}}.
- Only a simple lexical analyzer is needed for the RPN calculator. This
- lexical analyzer skips blanks and tabs, then reads in numbers as
- @code{double} and returns them as @code{NUM} tokens. Any other character
- that isn't part of a number is a separate token. Note that the token-code
- for such a single-character token is the character itself.
- The return value of the lexical analyzer function is a numeric code which
- represents a token type. The same text used in Bison rules to stand for
- this token type is also a C expression for the numeric code for the type.
- This works in two ways. If the token type is a character literal, then its
- numeric code is the ASCII code for that character; you can use the same
- character literal in the lexical analyzer to express the number. If the
- token type is an identifier, that identifier is defined by Bison as a C
- macro whose definition is the appropriate number. In this example,
- therefore, @code{NUM} becomes a macro for @code{yylex} to use.
- The semantic value of the token (if it has one) is stored into the global
- variable @code{yylval}, which is where the Bison parser will look for it.
- (The C data type of @code{yylval} is @code{YYSTYPE}, which was defined
- at the beginning of the grammar; @pxref{Rpcalc Decls, ,Declarations for @code{rpcalc}}.)
- A token type code of zero is returned if the end-of-file is encountered.
- (Bison recognizes any nonpositive value as indicating the end of the
- input.)
- Here is the code for the lexical analyzer:
- @example
- @group
- /* Lexical analyzer returns a double floating point
- number on the stack and the token NUM, or the ASCII
- character read if not a number. Skips all blanks
- and tabs, returns 0 for EOF. */
- #include <ctype.h>
- @end group
- @group
- yylex ()
- @{
- int c;
- /* skip white space */
- while ((c = getchar ()) == ' ' || c == '\t')
- ;
- @end group
- @group
- /* process numbers */
- if (c == '.' || isdigit (c))
- @{
- ungetc (c, stdin);
- scanf ("%lf", &yylval);
- return NUM;
- @}
- @end group
- @group
- /* return end-of-file */
- if (c == EOF)
- return 0;
- /* return single chars */
- return c;
- @}
- @end group
- @end example
- @node Rpcalc Main, Rpcalc Error, Rpcalc Lexer, RPN Calc
- @subsection The Controlling Function
- @cindex controlling function
- @cindex main function in simple example
- In keeping with the spirit of this example, the controlling function is
- kept to the bare minimum. The only requirement is that it call
- @code{yyparse} to start the process of parsing.
- @example
- @group
- main ()
- @{
- yyparse ();
- @}
- @end group
- @end example
- @node Rpcalc Error, Rpcalc Gen, Rpcalc Main, RPN Calc
- @subsection The Error Reporting Routine
- @cindex error reporting routine
- When @code{yyparse} detects a syntax error, it calls the error reporting
- function @code{yyerror} to print an error message (usually but not always
- @code{"parse error"}). It is up to the programmer to supply @code{yyerror}
- (@pxref{Interface, ,Parser C-Language Interface}), so here is the definition we will use:
- @example
- @group
- #include <stdio.h>
- yyerror (s) /* Called by yyparse on error */
- char *s;
- @{
- printf ("%s\n", s);
- @}
- @end group
- @end example
- After @code{yyerror} returns, the Bison parser may recover from the error
- and continue parsing if the grammar contains a suitable error rule
- (@pxref{Error Recovery}). Otherwise, @code{yyparse} returns nonzero. We
- have not written any error rules in this example, so any invalid input will
- cause the calculator program to exit. This is not clean behavior for a
- real calculator, but it is adequate in the first example.
- @node Rpcalc Gen, Rpcalc Compile, Rpcalc Error, RPN Calc
- @subsection Running Bison to Make the Parser
- @cindex running Bison (introduction)
- Before running Bison to produce a parser, we need to decide how to arrange
- all the source code in one or more source files. For such a simple example,
- the easiest thing is to put everything in one file. The definitions of
- @code{yylex}, @code{yyerror} and @code{main} go at the end, in the
- ``additional C code'' section of the file (@pxref{Grammar Layout, ,The Overall Layout of a Bison Grammar}).
- For a large project, you would probably have several source files, and use
- @code{make} to arrange to recompile them.
- With all the source in a single file, you use the following command to
- convert it into a parser file:
- @example
- bison @var{file_name}.y
- @end example
- @noindent
- In this example the file was called @file{rpcalc.y} (for ``Reverse Polish
- CALCulator''). Bison produces a file named @file{@var{file_name}.tab.c},
- removing the @samp{.y} from the original file name. The file output by
- Bison contains the source code for @code{yyparse}. The additional
- functions in the input file (@code{yylex}, @code{yyerror} and @code{main})
- are copied verbatim to the output.
- @node Rpcalc Compile, , Rpcalc Gen, RPN Calc
- @subsection Compiling the Parser File
- @cindex compiling the parser
- Here is how to compile and run the parser file:
- @example
- @group
- # @r{List files in current directory.}
- % ls
- rpcalc.tab.c rpcalc.y
- @end group
- @group
- # @r{Compile the Bison parser.}
- # @r{@samp{-lm} tells compiler to search math library for @code{pow}.}
- % cc rpcalc.tab.c -lm -o rpcalc
- @end group
- @group
- # @r{List files again.}
- % ls
- rpcalc rpcalc.tab.c rpcalc.y
- @end group
- @end example
- The file @file{rpcalc} now contains the executable code. Here is an
- example session using @code{rpcalc}.
- @example
- % rpcalc
- 4 9 +
- 13
- 3 7 + 3 4 5 *+-
- -13
- 3 7 + 3 4 5 * + - n @r{Note the unary minus, @samp{n}}
- 13
- 5 6 / 4 n +
- -3.166666667
- 3 4 ^ @r{Exponentiation}
- 81
- ^D @r{End-of-file indicator}
- %
- @end example
- @node Infix Calc, Simple Error Recovery, RPN Calc, Examples
- @section Infix Notation Calculator: @code{calc}
- @cindex infix notation calculator
- @cindex @code{calc}
- @cindex calculator, infix notation
- We now modify rpcalc to handle infix operators instead of postfix. Infix
- notation involves the concept of operator precedence and the need for
- parentheses nested to arbitrary depth. Here is the Bison code for
- @file{calc.y}, an infix desk-top calculator.
- @example
- /* Infix notation calculator--calc */
- %@{
- #define YYSTYPE double
- #include <math.h>
- %@}
- /* BISON Declarations */
- %token NUM
- %left '-' '+'
- %left '*' '/'
- %left NEG /* negation--unary minus */
- %right '^' /* exponentiation */
- /* Grammar follows */
- %%
- input: /* empty string */
- | input line
- ;
- line: '\n'
- | exp '\n' @{ printf ("\t%.10g\n", $1); @}
- ;
- exp: NUM @{ $$ = $1; @}
- | exp '+' exp @{ $$ = $1 + $3; @}
- | exp '-' exp @{ $$ = $1 - $3; @}
- | exp '*' exp @{ $$ = $1 * $3; @}
- | exp '/' exp @{ $$ = $1 / $3; @}
- | '-' exp %prec NEG @{ $$ = -$2; @}
- | exp '^' exp @{ $$ = pow ($1, $3); @}
- | '(' exp ')' @{ $$ = $2; @}
- ;
- %%
- @end example
- @noindent
- The functions @code{yylex}, @code{yyerror} and @code{main} can be the same
- as before.
- There are two important new features shown in this code.
- In the second section (Bison declarations), @code{%left} declares token
- types and says they are left-associative operators. The declarations
- @code{%left} and @code{%right} (right associativity) take the place of
- @code{%token} which is used to declare a token type name without
- associativity. (These tokens are single-character literals, which
- ordinarily don't need to be declared. We declare them here to specify
- the associativity.)
- Operator precedence is determined by the line ordering of the
- declarations; the higher the line number of the declaration (lower on
- the page or screen), the higher the precedence. Hence, exponentiation
- has the highest precedence, unary minus (@code{NEG}) is next, followed
- by @samp{*} and @samp{/}, and so on. @xref{Precedence, ,Operator Precedence}.
- The other important new feature is the @code{%prec} in the grammar section
- for the unary minus operator. The @code{%prec} simply instructs Bison that
- the rule @samp{| '-' exp} has the same precedence as @code{NEG}---in this
- case the next-to-highest. @xref{Contextual Precedence, ,Context-Dependent Precedence}.
- Here is a sample run of @file{calc.y}:
- @need 500
- @example
- % calc
- 4 + 4.5 - (34/(8*3+-3))
- 6.880952381
- -56 + 2
- -54
- 3 ^ 2
- 9
- @end example
- @node Simple Error Recovery, Multi-function Calc, Infix Calc, Examples
- @section Simple Error Recovery
- @cindex error recovery, simple
- Up to this point, this manual has not addressed the issue of @dfn{error
- recovery}---how to continue parsing after the parser detects a syntax
- error. All we have handled is error reporting with @code{yyerror}. Recall
- that by default @code{yyparse} returns after calling @code{yyerror}. This
- means that an erroneous input line causes the calculator program to exit.
- Now we show how to rectify this deficiency.
- The Bison language itself includes the reserved word @code{error}, which
- may be included in the grammar rules. In the example below it has
- been added to one of the alternatives for @code{line}:
- @example
- @group
- line: '\n'
- | exp '\n' @{ printf ("\t%.10g\n", $1); @}
- | error '\n' @{ yyerrok; @}
- ;
- @end group
- @end example
- This addition to the grammar allows for simple error recovery in the event
- of a parse error. If an expression that cannot be evaluated is read, the
- error will be recognized by the third rule for @code{line}, and parsing
- will continue. (The @code{yyerror} function is still called upon to print
- its message as well.) The action executes the statement @code{yyerrok}, a
- macro defined automatically by Bison; its meaning is that error recovery is
- complete (@pxref{Error Recovery}). Note the difference between
- @code{yyerrok} and @code{yyerror}; neither one is a misprint.@refill
- This form of error recovery deals with syntax errors. There are other
- kinds of errors; for example, division by zero, which raises an exception
- signal that is normally fatal. A real calculator program must handle this
- signal and use @code{longjmp} to return to @code{main} and resume parsing
- input lines; it would also have to discard the rest of the current line of
- input. We won't discuss this issue further because it is not specific to
- Bison programs.
- @node Multi-function Calc, Exercises, Simple Error Recovery, Examples
- @section Multi-Function Calculator: @code{mfcalc}
- @cindex multi-function calculator
- @cindex @code{mfcalc}
- @cindex calculator, multi-function
- Now that the basics of Bison have been discussed, it is time to move on to
- a more advanced problem. The above calculators provided only five
- functions, @samp{+}, @samp{-}, @samp{*}, @samp{/} and @samp{^}. It would
- be nice to have a calculator that provides other mathematical functions such
- as @code{sin}, @code{cos}, etc.
- It is easy to add new operators to the infix calculator as long as they are
- only single-character literals. The lexical analyzer @code{yylex} passes
- back all non-number characters as tokens, so new grammar rules suffice for
- adding a new operator. But we want something more flexible: built-in
- functions whose syntax has this form:
- @example
- @var{function_name} (@var{argument})
- @end example
- @noindent
- At the same time, we will add memory to the calculator, by allowing you
- to create named variables, store values in them, and use them later.
- Here is a sample session with the multi-function calculator:
- @example
- % acalc
- pi = 3.141592653589
- 3.1415926536
- sin(pi)
- 0.0000000000
- alpha = beta1 = 2.3
- 2.3000000000
- alpha
- 2.3000000000
- ln(alpha)
- 0.8329091229
- exp(ln(beta1))
- 2.3000000000
- %
- @end example
- Note that multiple assignment and nested function calls are permitted.
- @menu
- * Decl: Mfcalc Decl. Bison declarations for multi-function calculator.
- * Rules: Mfcalc Rules. Grammar rules for the calculator.
- * Symtab: Mfcalc Symtab. Symbol table management subroutines.
- @end menu
- @node Mfcalc Decl, Mfcalc Rules, , Multi-function Calc
- @subsection Declarations for @code{mfcalc}
- Here are the C and Bison declarations for the multi-function calculator.
- @smallexample
- %@{
- #include <math.h> /* For math functions, cos(), sin(), etc. */
- #include "calc.h" /* Contains definition of `symrec' */
- %@}
- %union @{
- double val; /* For returning numbers. */
- symrec *tptr; /* For returning symbol-table pointers */
- @}
- %token <val> NUM /* Simple double precision number */
- %token <tptr> VAR FNCT /* Variable and Function */
- %type <val> exp
- %right '='
- %left '-' '+'
- %left '*' '/'
- %left NEG /* Negation--unary minus */
- %right '^' /* Exponentiation */
- /* Grammar follows */
- %%
- @end smallexample
- The above grammar introduces only two new features of the Bison language.
- These features allow semantic values to have various data types
- (@pxref{Multiple Types, ,More Than One Value Type}).
- The @code{%union} declaration specifies the entire list of possible types;
- this is instead of defining @code{YYSTYPE}. The allowable types are now
- double-floats (for @code{exp} and @code{NUM}) and pointers to entries in
- the symbol table. @xref{Union Decl, ,The Collection of Value Types}.
- Since values can now have various types, it is necessary to associate a
- type with each grammar symbol whose semantic value is used. These symbols
- are @code{NUM}, @code{VAR}, @code{FNCT}, and @code{exp}. Their
- declarations are augmented with information about their data type (placed
- between angle brackets).
- The Bison construct @code{%type} is used for declaring nonterminal symbols,
- just as @code{%token} is used for declaring token types. We have not used
- @code{%type} before because nonterminal symbols are normally declared
- implicitly by the rules that define them. But @code{exp} must be declared
- explicitly so we can specify its value type. @xref{Type Decl, ,Nonterminal Symbols}.
- @node Mfcalc Rules, Mfcalc Symtab, Mfcalc Decl, Multi-function Calc
- @subsection Grammar Rules for @code{mfcalc}
- Here are the grammar rules for the multi-function calculator.
- Most of them are copied directly from @code{calc}; three rules,
- those which mention @code{VAR} or @code{FNCT}, are new.
- @smallexample
- input: /* empty */
- | input line
- ;
- line:
- '\n'
- | exp '\n' @{ printf ("\t%.10g\n", $1); @}
- | error '\n' @{ yyerrok; @}
- ;
- exp: NUM @{ $$ = $1; @}
- | VAR @{ $$ = $1->value.var; @}
- | VAR '=' exp @{ $$ = $3; $1->value.var = $3; @}
- | FNCT '(' exp ')' @{ $$ = (*($1->value.fnctptr))($3); @}
- | exp '+' exp @{ $$ = $1 + $3; @}
- | exp '-' exp @{ $$ = $1 - $3; @}
- | exp '*' exp @{ $$ = $1 * $3; @}
- | exp '/' exp @{ $$ = $1 / $3; @}
- | '-' exp %prec NEG @{ $$ = -$2; @}
- | exp '^' exp @{ $$ = pow ($1, $3); @}
- | '(' exp ')' @{ $$ = $2; @}
- ;
- /* End of grammar */
- %%
- @end smallexample
- @node Mfcalc Symtab, , Mfcalc Rules, Multi-function Calc
- @subsection The @code{mfcalc} Symbol Table
- @cindex symbol table example
- The multi-function calculator requires a symbol table to keep track of the
- names and meanings of variables and functions. This doesn't affect the
- grammar rules (except for the actions) or the Bison declarations, but it
- requires some additional C functions for support.
- The symbol table itself consists of a linked list of records. Its
- definition, which is kept in the header @file{calc.h}, is as follows. It
- provides for either functions or variables to be placed in the table.
- @smallexample
- @group
- /* Data type for links in the chain of symbols. */
- struct symrec
- @{
- char *name; /* name of symbol */
- int type; /* type of symbol: either VAR or FNCT */
- union @{
- double var; /* value of a VAR */
- double (*fnctptr)(); /* value of a FNCT */
- @} value;
- struct symrec *next; /* link field */
- @};
- @end group
- @group
- typedef struct symrec symrec;
- /* The symbol table: a chain of `struct symrec'. */
- extern symrec *sym_table;
- symrec *putsym ();
- symrec *getsym ();
- @end group
- @end smallexample
- The new version of @code{main} includes a call to @code{init_table}, a
- function that initializes the symbol table. Here it is, and
- @code{init_table} as well:
- @smallexample
- @group
- #include <stdio.h>
- main ()
- @{
- init_table ();
- yyparse ();
- @}
- @end group
- @group
- yyerror (s) /* Called by yyparse on error */
- char *s;
- @{
- printf ("%s\n", s);
- @}
- struct init
- @{
- char *fname;
- double (*fnct)();
- @};
- @end group
- @group
- struct init arith_fncts[]
- = @{
- "sin", sin,
- "cos", cos,
- "atan", atan,
- "ln", log,
- "exp", exp,
- "sqrt", sqrt,
- 0, 0
- @};
- /* The symbol table: a chain of `struct symrec'. */
- symrec *sym_table = (symrec *)0;
- @end group
- @group
- init_table () /* puts arithmetic functions in table. */
- @{
- int i;
- symrec *ptr;
- for (i = 0; arith_fncts[i].fname != 0; i++)
- @{
- ptr = putsym (arith_fncts[i].fname, FNCT);
- ptr->value.fnctptr = arith_fncts[i].fnct;
- @}
- @}
- @end group
- @end smallexample
- By simply editing the initialization list and adding the necessary include
- files, you can add additional functions to the calculator.
- Two important functions allow look-up and installation of symbols in the
- symbol table. The function @code{putsym} is passed a name and the type
- (@code{VAR} or @code{FNCT}) of the object to be installed. The object is
- linked to the front of the list, and a pointer to the object is returned.
- The function @code{getsym} is passed the name of the symbol to look up. If
- found, a pointer to that symbol is returned; otherwise zero is returned.
- @smallexample
- symrec *
- putsym (sym_name,sym_type)
- char *sym_name;
- int sym_type;
- @{
- symrec *ptr;
- ptr = (symrec *) malloc (sizeof (symrec));
- ptr->name = (char *) malloc (strlen (sym_name) + 1);
- strcpy (ptr->name,sym_name);
- ptr->type = sym_type;
- ptr->value.var = 0; /* set value to 0 even if fctn. */
- ptr->next = (struct symrec *)sym_table;
- sym_table = ptr;
- return ptr;
- @}
- symrec *
- getsym (sym_name)
- char *sym_name;
- @{
- symrec *ptr;
- for (ptr = sym_table; ptr != (symrec *) 0;
- ptr = (symrec *)ptr->next)
- if (strcmp (ptr->name,sym_name) == 0)
- return ptr;
- return 0;
- @}
- @end smallexample
- The function @code{yylex} must now recognize variables, numeric values, and
- the single-character arithmetic operators. Strings of alphanumeric
- characters with a leading nondigit are recognized as either variables or
- functions depending on what the symbol table says about them.
- The string is passed to @code{getsym} for look up in the symbol table. If
- the name appears in the table, a pointer to its location and its type
- (@code{VAR} or @code{FNCT}) is returned to @code{yyparse}. If it is not
- already in the table, then it is installed as a @code{VAR} using
- @code{putsym}. Again, a pointer and its type (which must be @code{VAR}) is
- returned to @code{yyparse}.@refill
- No change is needed in the handling of numeric values and arithmetic
- operators in @code{yylex}.
- @smallexample
- @group
- #include <ctype.h>
- yylex ()
- @{
- int c;
- /* Ignore whitespace, get first nonwhite character. */
- while ((c = getchar ()) == ' ' || c == '\t');
- if (c == EOF)
- return 0;
- @end group
- @group
- /* Char starts a number => parse the number. */
- if (c == '.' || isdigit (c))
- @{
- ungetc (c, stdin);
- scanf ("%lf", &yylval.val);
- return NUM;
- @}
- @end group
- @group
- /* Char starts an identifier => read the name. */
- if (isalpha (c))
- @{
- symrec *s;
- static char *symbuf = 0;
- static int length = 0;
- int i;
- @end group
- @group
- /* Initially make the buffer long enough
- for a 40-character symbol name. */
- if (length == 0)
- length = 40, symbuf = (char *)malloc (length + 1);
- i = 0;
- do
- @end group
- @group
- @{
- /* If buffer is full, make it bigger. */
- if (i == length)
- @{
- length *= 2;
- symbuf = (char *)realloc (symbuf, length + 1);
- @}
- /* Add this character to the buffer. */
- symbuf[i++] = c;
- /* Get another character. */
- c = getchar ();
- @}
- @end group
- @group
- while (c != EOF && isalnum (c));
- ungetc (c, stdin);
- symbuf[i] = '\0';
- @end group
- @group
- s = getsym (symbuf);
- if (s == 0)
- s = putsym (symbuf, VAR);
- yylval.tptr = s;
- return s->type;
- @}
- /* Any other character is a token by itself. */
- return c;
- @}
- @end group
- @end smallexample
- This program is both powerful and flexible. You may easily add new
- functions, and it is a simple job to modify this code to install predefined
- variables such as @code{pi} or @code{e} as well.
- @node Exercises, , Multi-function Calc, Examples
- @section Exercises
- @cindex exercises
- @enumerate
- @item
- Add some new functions from @file{math.h} to the initialization list.
- @item
- Add another array that contains constants and their values. Then
- modify @code{init_table} to add these constants to the symbol table.
- It will be easiest to give the constants type @code{VAR}.
- @item
- Make the program report an error if the user refers to an
- uninitialized variable in any way except to store a value in it.
- @end enumerate
- @node Grammar File, Interface, Examples, Top
- @chapter Bison Grammar Files
- Bison takes as input a context-free grammar specification and produces a
- C-language function that recognizes correct instances of the grammar.
- The Bison grammar input file conventionally has a name ending in @samp{.y}.
- @menu
- * Grammar Outline:: Overall layout of the grammar file.
- * Symbols:: Terminal and nonterminal symbols.
- * Rules:: How to write grammar rules.
- * Recursion:: Writing recursive rules.
- * Semantics:: Semantic values and actions.
- * Declarations:: All kinds of Bison declarations are described here.
- * Multiple Parsers:: Putting more than one Bison parser in one program.
- @end menu
- @node Grammar Outline, Symbols, , Grammar File
- @section Outline of a Bison Grammar
- A Bison grammar file has four main sections, shown here with the
- appropriate delimiters:
- @example
- %@{
- @var{C declarations}
- %@}
- @var{Bison declarations}
- %%
- @var{Grammar rules}
- %%
- @var{Additional C code}
- @end example
- Comments enclosed in @samp{/* @dots{} */} may appear in any of the sections.
- @menu
- * C Declarations:: Syntax and usage of the C declarations section.
- * Bison Declarations:: Syntax and usage of the Bison declarations section.
- * Grammar Rules:: Syntax and usage of the grammar rules section.
- * C Code:: Syntax and usage of the additional C code section.
- @end menu
- @node C Declarations, Bison Declarations, , Grammar Outline
- @subsection The C Declarations Section
- @cindex C declarations section
- @cindex declarations, C
- The @var{C declarations} section contains macro definitions and
- declarations of functions and variables that are used in the actions in the
- grammar rules. These are copied to the beginning of the parser file so
- that they precede the definition of @code{yyparse}. You can use
- @samp{#include} to get the declarations from a header file. If you don't
- need any C declarations, you may omit the @samp{%@{} and @samp{%@}}
- delimiters that bracket this section.
- @node Bison Declarations, Grammar Rules, C Declarations, Grammar Outline
- @subsection The Bison Declarations Section
- @cindex Bison declarations (introduction)
- @cindex declarations, Bison (introduction)
- The @var{Bison declarations} section contains declarations that define
- terminal and nonterminal symbols, specify precedence, and so on.
- In some simple grammars you may not need any declarations.
- @xref{Declarations, ,Bison Declarations}.
- @node Grammar Rules, C Code, Bison Declarations, Grammar Outline
- @subsection The Grammar Rules Section
- @cindex grammar rules section
- @cindex rules section for grammar
- The @dfn{grammar rules} section contains one or more Bison grammar
- rules, and nothing else. @xref{Rules, ,Syntax of Grammar Rules}.
- There must always be at least one grammar rule, and the first
- @samp{%%} (which precedes the grammar rules) may never be omitted even
- if it is the first thing in the file.
- @node C Code, , Grammar Rules, Grammar Outline
- @subsection The Additional C Code Section
- @cindex additional C code section
- @cindex C code, section for additional
- The @var{additional C code} section is copied verbatim to the end of
- the parser file, just as the @var{C declarations} section is copied to
- the beginning. This is the most convenient place to put anything
- that you want to have in the parser file but which need not come before
- the definition of @code{yyparse}. For example, the definitions of
- @code{yylex} and @code{yyerror} often go here. @xref{Interface, ,Parser C-Language Interface}.
- If the last section is empty, you may omit the @samp{%%} that separates it
- from the grammar rules.
- The Bison parser itself contains many static variables whose names start
- with @samp{yy} and many macros whose names start with @samp{YY}. It is a
- good idea to avoid using any such names (except those documented in this
- manual) in the additional C code section of the grammar file.
- @node Symbols, Rules, Grammar Outline, Grammar File
- @section Symbols, Terminal and Nonterminal
- @cindex nonterminal symbol
- @cindex terminal symbol
- @cindex token type
- @cindex symbol
- @dfn{Symbols} in Bison grammars represent the grammatical classifications
- of the language.
- A @dfn{terminal symbol} (also known as a @dfn{token type}) represents a
- class of syntactically equivalent tokens. You use the symbol in grammar
- rules to mean that a token in that class is allowed. The symbol is
- represented in the Bison parser by a numeric code, and the @code{yylex}
- function returns a token type code to indicate what kind of token has been
- read. You don't need to know what the code value is; you can use the
- symbol to stand for it.
- A @dfn{nonterminal symbol} stands for a class of syntactically equivalent
- groupings. The symbol name is used in writing grammar rules. By convention,
- it should be all lower case.
- Symbol names can contain letters, digits (not at the beginning),
- underscores and periods. Periods make sense only in nonterminals.
- There are three ways of writing terminal symbols in the grammar:
- @itemize @bullet
- @item
- A @dfn{named token type} is written with an identifier, like an
- identifier in C. By convention, it should be all upper case. Each
- such name must be defined with a Bison declaration such as
- @code{%token}. @xref{Token Decl, ,Token Type Names}.
- @item
- @cindex character token
- @cindex literal token
- @cindex single-character literal
- A @dfn{character token type} (or @dfn{literal token}) is written in
- the grammar using the same syntax used in C for character constants;
- for example, @code{'+'} is a character token type. A character token
- type doesn't need to be declared unless you need to specify its
- semantic value data type (@pxref{Value Type, ,Data Types of Semantic Values}), associativity, or
- precedence (@pxref{Precedence, ,Operator Precedence}).
- By convention, a character token type is used only to represent a
- token that consists of that particular character. Thus, the token
- type @code{'+'} is used to represent the character @samp{+} as a
- token. Nothing enforces this convention, but if you depart from it,
- your program will confuse other readers.
- All the usual escape sequences used in character literals in C can be
- used in Bison as well, but you must not use the null character as a
- character literal because its ASCII code, zero, is the code
- @code{yylex} returns for end-of-input (@pxref{Calling Convention, ,Calling Convention for @code{yylex}}).
- @item
- @cindex string token
- @cindex literal string token
- @cindex multiple-character literal
- WARNING: Use of string tokens such as @code{"<="} is incompatible with yacc.
- A @dfn{literal string token} is written in
- the grammar using the same syntax used in C for string constants;
- for example, @code{"<="} is a string literal. A string literal
- doesn't need to be declared unless you need to specify its
- semantic value data type (@pxref{Value Type}), associativity,
- precedence (@pxref{Precedence}). To associate a symbolic name with the
- string literal, use the @code{%thong} declaration (@pxref{Thong Decl, ,Thong Declarations});
- otherwise the lexical analyzer can retrieve the token number from the
- tables generated with the @samp{-k} or @samp{--token-table} switch.
- By convention, a string literal token is used only to represent a
- token that consists of that particular string. Thus, the token
- type @code{"<="} is used to represent the string @samp{<=} as a
- token. Nothing enforces this convention, but if you depart from it,
- your program will confuse other readers.
- All the usual escape sequences used in string literals in C can be
- used in Bison as well. The contents of the string must be two or more
- characters; use a character token for a single character and @code{/*EMPTY*/}
- for the empty token.
- @end itemize
- How you choose to write a terminal symbol has no effect on its
- grammatical meaning. That depends only on where it appears in rules and
- on when the parser function returns that symbol.
- The value returned by @code{yylex} is always one of the terminal symbols
- (or 0 for end-of-input). Whichever way you write the token type in the
- grammar rules, you write it the same way in the definition of @code{yylex}.
- The numeric code for a character token type is simply the ASCII code for
- the character, so @code{yylex} can use the identical character constant to
- generate the requisite code. Each named token type becomes a C macro in
- the parser file, so @code{yylex} can use the name to stand for the code.
- (This is why periods don't make sense in terminal symbols.)
- @xref{Calling Convention, ,Calling Convention for @code{yylex}}.
- If @code{yylex} is defined in a separate file, you need to arrange for the
- token-type macro definitions to be available there. Use the @samp{-d}
- option when you run Bison, so that it will write these macro definitions
- into a separate header file @file{@var{name}.tab.h} which you can include
- in the other source files that need it. @xref{Invocation, ,Invoking Bison}.
- The symbol @code{error} is a terminal symbol reserved for error recovery
- (@pxref{Error Recovery}); you shouldn't use it for any other purpose.
- In particular, @code{yylex} should never return this value.
- @node Rules, Recursion, Symbols, Grammar File
- @section Syntax of Grammar Rules
- @cindex rule syntax
- @cindex grammar rule syntax
- @cindex syntax of grammar rules
- A Bison grammar rule has the following general form:
- @example
- @var{result}: @var{components}@dots{}
- ;
- @end example
- @noindent
- where @var{result} is the nonterminal symbol that this rule describes
- and @var{components} are various terminal and nonterminal symbols that
- are put together by this rule (@pxref{Symbols, ,Symbols - Terminal and Nonterminal}).
- For example,
- @example
- @group
- exp: exp '+' exp
- ;
- @end group
- @end example
- @noindent
- says that two groupings of type @code{exp}, with a @samp{+} token in between,
- can be combined into a larger grouping of type @code{exp}.
- Whitespace in rules is significant only to separate symbols. You can add
- extra whitespace as you wish.
- Scattered among the components can be @var{actions} that determine
- the semantics of the rule. An action looks like this:
- @example
- @{@var{C statements}@}
- @end example
- @noindent
- Usually there is only one action and it follows the components.
- @xref{Actions}.
- @findex |
- Multiple rules for the same @var{result} can be written separately or can
- be joined with the vertical-bar character @samp{|} as follows:
- @ifinfo
- @example
- @var{result}: @var{rule1-components}@dots{}
- | @var{rule2-components}@dots{}
- @dots{}
- ;
- @end example
- @end ifinfo
- @iftex
- @example
- @group
- @var{result}: @var{rule1-components}@dots{}
- | @var{rule2-components}@dots{}
- @dots{}
- ;
- @end group
- @end example
- @end iftex
- @noindent
- They are still considered distinct rules even when joined in this way.
- If @var{components} in a rule is empty, it means that @var{result} can
- match the empty string. For example, here is how to define a
- comma-separated sequence of zero or more @code{exp} groupings:
- @example
- @group
- expseq: /* empty */
- | expseq1
- ;
- @end group
- @group
- expseq1: exp
- | expseq1 ',' exp
- ;
- @end group
- @end example
- @noindent
- It is customary to write a comment @samp{/* empty */} in each rule
- with no components.
- @node Recursion, Semantics, Rules, Grammar File
- @section Recursive Rules
- @cindex recursive rule
- A rule is called @dfn{recursive} when its @var{result} nonterminal appears
- also on its right hand side. Nearly all Bison grammars need to use
- recursion, because that is the only way to define a sequence of any number
- of somethings. Consider this recursive definition of a comma-separated
- sequence of one or more expressions:
- @example
- @group
- expseq1: exp
- | expseq1 ',' exp
- ;
- @end group
- @end example
- @cindex left recursion
- @cindex right recursion
- @noindent
- Since the recursive use of @code{expseq1} is the leftmost symbol in the
- right hand side, we call this @dfn{left recursion}. By contrast, here
- the same construct is defined using @dfn{right recursion}:
- @example
- @group
- expseq1: exp
- | exp ',' expseq1
- ;
- @end group
- @end example
- @noindent
- Any kind of sequence can be defined using either left recursion or
- right recursion, but you should always use left recursion, because it
- can parse a sequence of any number of elements with bounded stack
- space. Right recursion uses up space on the Bison stack in proportion
- to the number of elements in the sequence, because all the elements
- must be shifted onto the stack before the rule can be applied even
- once. @xref{Algorithm, ,The Bison Parser Algorithm }, for
- further explanation of this.
- @cindex mutual recursion
- @dfn{Indirect} or @dfn{mutual} recursion occurs when the result of the
- rule does not appear directly on its right hand side, but does appear
- in rules for other nonterminals which do appear on its right hand
- side.
- For example:
- @example
- @group
- expr: primary
- | primary '+' primary
- ;
- @end group
- @group
- primary: constant
- | '(' expr ')'
- ;
- @end group
- @end example
- @noindent
- defines two mutually-recursive nonterminals, since each refers to the
- other.
- @node Semantics, Declarations, Recursion, Grammar File
- @section Defining Language Semantics
- @cindex defining language semantics
- @cindex language semantics, defining
- The grammar rules for a language determine only the syntax. The semantics
- are determined by the semantic values associated with various tokens and
- groupings, and by the actions taken when various groupings are recognized.
- For example, the calculator calculates properly because the value
- associated with each expression is the proper number; it adds properly
- because the action for the grouping @w{@samp{@var{x} + @var{y}}} is to add
- the numbers associated with @var{x} and @var{y}.
- @menu
- * Value Type:: Specifying one data type for all semantic values.
- * Multiple Types:: Specifying several alternative data types.
- * Actions:: An action is the semantic definition of a grammar rule.
- * Action Types:: Specifying data types for actions to operate on.
- * Mid-Rule Actions:: Most actions go at the end of a rule.
- This says when, why and how to use the exceptional
- action in the middle of a rule.
- @end menu
- @node Value Type, Multiple Types, , Semantics
- @subsection Data Types of Semantic Values
- @cindex semantic value type
- @cindex value type, semantic
- @cindex data types of semantic values
- @cindex default data type
- In a simple program it may be sufficient to use the same data type for
- the semantic values of all language constructs. This was true in the
- RPN and infix calculator examples (@pxref{RPN Calc, ,Reverse Polish Notation Calculator}).
- Bison's default is to use type @code{int} for all semantic values. To
- specify some other type, define @code{YYSTYPE} as a macro, like this:
- @example
- #define YYSTYPE double
- @end example
- @noindent
- This macro definition must go in the C declarations section of the grammar
- file (@pxref{Grammar Outline, ,Outline of a Bison Grammar}).
- @node Multiple Types, Actions, Value Type, Semantics
- @subsection More Than One Value Type
- In most programs, you will need different data types for different kinds
- of tokens and groupings. For example, a numeric constant may need type
- @code{int} or @code{long}, while a string constant needs type @code{char *},
- and an identifier might need a pointer to an entry in the symbol table.
- To use more than one data type for semantic values in one parser, Bison
- requires you to do two things:
- @itemize @bullet
- @item
- Specify the entire collection of possible data types, with the
- @code{%union} Bison declaration (@pxref{Union Decl, ,The Collection of Value Types}).
- @item
- Choose one of those types for each symbol (terminal or nonterminal)
- for which semantic values are used. This is done for tokens with the
- @code{%token} Bison declaration (@pxref{Token Decl, ,Token Type Names}) and for groupings
- with the @code{%type} Bison declaration (@pxref{Type Decl, ,Nonterminal Symbols}).
- @end itemize
- @node Actions, Action Types, Multiple Types, Semantics
- @subsection Actions
- @cindex action
- @vindex $$
- @vindex $@var{n}
- An action accompanies a syntactic rule and contains C code to be executed
- each time an instance of that rule is recognized. The task of most actions
- is to compute a semantic value for the grouping built by the rule from the
- semantic values associated with tokens or smaller groupings.
- An action consists of C statements surrounded by braces, much like a
- compound statement in C. It can be placed at any position in the rule; it
- is executed at that position. Most rules have just one action at the end
- of the rule, following all the components. Actions in the middle of a rule
- are tricky and used only for special purposes (@pxref{Mid-Rule Actions, ,Actions in Mid-Rule}).
- The C code in an action can refer to the semantic values of the components
- matched by the rule with the construct @code{$@var{n}}, which stands for
- the value of the @var{n}th component. The semantic value for the grouping
- being constructed is @code{$$}. (Bison translates both of these constructs
- into array element references when it copies the actions into the parser
- file.)
- Here is a typical example:
- @example
- @group
- exp: @dots{}
- | exp '+' exp
- @{ $$ = $1 + $3; @}
- @end group
- @end example
- @noindent
- This rule constructs an @code{exp} from two smaller @code{exp} groupings
- connected by a plus-sign token. In the action, @code{$1} and @code{$3}
- refer to the semantic values of the two component @code{exp} groupings,
- which are the first and third symbols on the right hand side of the rule.
- The sum is stored into @code{$$} so that it becomes the semantic value of
- the addition-expression just recognized by the rule. If there were a
- useful semantic value associated with the @samp{+} token, it could be
- referred to as @code{$2}.@refill
- @cindex default action
- If you don't specify an action for a rule, Bison supplies a default:
- @w{@code{$$ = $1}.} Thus, the value of the first symbol in the rule becomes
- the value of the whole rule. Of course, the default rule is valid only
- if the two data types match. There is no meaningful default action for
- an empty rule; every empty rule must have an explicit action unless the
- rule's value does not matter.
- @code{$@var{n}} with @var{n} zero or negative is allowed for reference
- to tokens and groupings on the stack @emph{before} those that match the
- current rule. This is a very risky practice, and to use it reliably
- you must be certain of the context in which the rule is applied. Here
- is a case in which you can use this reliably:
- @example
- @group
- foo: expr bar '+' expr @{ @dots{} @}
- | expr bar '-' expr @{ @dots{} @}
- ;
- @end group
- @group
- bar: /* empty */
- @{ previous_expr = $0; @}
- ;
- @end group
- @end example
- As long as @code{bar} is used only in the fashion shown here, @code{$0}
- always refers to the @code{expr} which precedes @code{bar} in the
- definition of @code{foo}.
- @node Action Types, Mid-Rule Actions, Actions, Semantics
- @subsection Data Types of Values in Actions
- @cindex action data types
- @cindex data types in actions
- If you have chosen a single data type for semantic values, the @code{$$}
- and @code{$@var{n}} constructs always have that data type.
- If you have used @code{%union} to specify a variety of data types, then you
- must declare a choice among these types for each terminal or nonterminal
- symbol that can have a semantic value. Then each time you use @code{$$} or
- @code{$@var{n}}, its data type is determined by which symbol it refers to
- in the rule. In this example,@refill
- @example
- @group
- exp: @dots{}
- | exp '+' exp
- @{ $$ = $1 + $3; @}
- @end group
- @end example
- @noindent
- @code{$1} and @code{$3} refer to instances of @code{exp}, so they all
- have the data type declared for the nonterminal symbol @code{exp}. If
- @code{$2} were used, it would have the data type declared for the
- terminal symbol @code{'+'}, whatever that might be.@refill
- Alternatively, you can specify the data type when you refer to the value,
- by inserting @samp{<@var{type}>} after the @samp{$} at the beginning of the
- reference. For example, if you have defined types as shown here:
- @example
- @group
- %union @{
- int itype;
- double dtype;
- @}
- @end group
- @end example
- @noindent
- then you can write @code{$<itype>1} to refer to the first subunit of the
- rule as an integer, or @code{$<dtype>1} to refer to it as a double.
- @node Mid-Rule Actions, , Action Types, Semantics
- @subsection Actions in Mid-Rule
- @cindex actions in mid-rule
- @cindex mid-rule actions
- Occasionally it is useful to put an action in the middle of a rule.
- These actions are written just like usual end-of-rule actions, but they
- are executed before the parser even recognizes the following components.
- A mid-rule action may refer to the components preceding it using
- @code{$@var{n}}, but it may not refer to subsequent components because
- it is run before they are parsed.
- The mid-rule action itself counts as one of the components of the rule.
- This makes a difference when there is another action later in the same rule
- (and usually there is another at the end): you have to count the actions
- along with the symbols when working out which number @var{n} to use in
- @code{$@var{n}}.
- The mid-rule action can also have a semantic value. The action can set
- its value with an assignment to @code{$$}, and actions later in the rule
- can refer to the value using @code{$@var{n}}. Since there is no symbol
- to name the action, there is no way to declare a data type for the value
- in advance, so you must use the @samp{$<@dots{}>} construct to specify a
- data type each time you refer to this value.
- There is no way to set the value of the entire rule with a mid-rule
- action, because assignments to @code{$$} do not have that effect. The
- only way to set the value for the entire rule is with an ordinary action
- at the end of the rule.
- Here is an example from a hypothetical compiler, handling a @code{let}
- statement that looks like @samp{let (@var{variable}) @var{statement}} and
- serves to create a variable named @var{variable} temporarily for the
- duration of @var{statement}. To parse this construct, we must put
- @var{variable} into the symbol table while @var{statement} is parsed, then
- remove it afterward. Here is how it is done:
- @example
- @group
- stmt: LET '(' var ')'
- @{ $<context>$ = push_context ();
- declare_variable ($3); @}
- stmt @{ $$ = $6;
- pop_context ($<context>5); @}
- @end group
- @end example
- @noindent
- As soon as @samp{let (@var{variable})} has been recognized, the first
- action is run. It saves a copy of the current semantic context (the
- list of accessible variables) as its semantic value, using alternative
- @code{context} in the data-type union. Then it calls
- @code{declare_variable} to add the new variable to that list. Once the
- first action is finished, the embedded statement @code{stmt} can be
- parsed. Note that the mid-rule action is component number 5, so the
- @samp{stmt} is component number 6.
- After the embedded statement is parsed, its semantic value becomes the
- value of the entire @code{let}-statement. Then the semantic value from the
- earlier action is used to restore the prior list of variables. This
- removes the temporary @code{let}-variable from the list so that it won't
- appear to exist while the rest of the program is parsed.
- Taking action before a rule is completely recognized often leads to
- conflicts since the parser must commit to a parse in order to execute the
- action. For example, the following two rules, without mid-rule actions,
- can coexist in a working parser because the parser can shift the open-brace
- token and look at what follows before deciding whether there is a
- declaration or not:
- @example
- @group
- compound: '@{' declarations statements '@}'
- | '@{' statements '@}'
- ;
- @end group
- @end example
- @noindent
- But when we add a mid-rule action as follows, the rules become nonfunctional:
- @example
- @group
- compound: @{ prepare_for_local_variables (); @}
- '@{' declarations statements '@}'
- @end group
- @group
- | '@{' statements '@}'
- ;
- @end group
- @end example
- @noindent
- Now the parser is forced to decide whether to run the mid-rule action
- when it has read no farther than the open-brace. In other words, it
- must commit to using one rule or the other, without sufficient
- information to do it correctly. (The open-brace token is what is called
- the @dfn{look-ahead} token at this time, since the parser is still
- deciding what to do about it. @xref{Look-Ahead, ,Look-Ahead Tokens}.)
- You might think that you could correct the problem by putting identical
- actions into the two rules, like this:
- @example
- @group
- compound: @{ prepare_for_local_variables (); @}
- '@{' declarations statements '@}'
- | @{ prepare_for_local_variables (); @}
- '@{' statements '@}'
- ;
- @end group
- @end example
- @noindent
- But this does not help, because Bison does not realize that the two actions
- are identical. (Bison never tries to understand the C code in an action.)
- If the grammar is such that a declaration can be distinguished from a
- statement by the first token (which is true in C), then one solution which
- does work is to put the action after the open-brace, like this:
- @example
- @group
- compound: '@{' @{ prepare_for_local_variables (); @}
- declarations statements '@}'
- | '@{' statements '@}'
- ;
- @end group
- @end example
- @noindent
- Now the first token of the following declaration or statement,
- which would in any case tell Bison which rule to use, can still do so.
- Another solution is to bury the action inside a nonterminal symbol which
- serves as a subroutine:
- @example
- @group
- subroutine: /* empty */
- @{ prepare_for_local_variables (); @}
- ;
- @end group
- @group
- compound: subroutine
- '@{' declarations statements '@}'
- | subroutine
- '@{' statements '@}'
- ;
- @end group
- @end example
- @noindent
- Now Bison can execute the action in the rule for @code{subroutine} without
- deciding which rule for @code{compound} it will eventually use. Note that
- the action is now at the end of its rule. Any mid-rule action can be
- converted to an end-of-rule action in this way, and this is what Bison
- actually does to implement mid-rule actions.
- @node Declarations, Multiple Parsers, Semantics, Grammar File
- @section Bison Declarations
- @cindex declarations, Bison
- @cindex Bison declarations
- The @dfn{Bison declarations} section of a Bison grammar defines the symbols
- used in formulating the grammar and the data types of semantic values.
- @xref{Symbols, ,Symbols - Terminal and Nonterminal}.
- All token type names (but not single-character literal tokens such as
- @code{'+'} and @code{'*'}) must be declared. Nonterminal symbols must be
- declared if you need to specify which data type to use for the semantic
- value (@pxref{Multiple Types, ,More Than One Value Type}).
- The first rule in the file also specifies the start symbol, by default.
- If you want some other symbol to be the start symbol, you must declare
- it explicitly (@pxref{Language and Grammar, ,Languages and Context-Free Grammars}).
- @menu
- * Token Decl:: Declaring terminal symbols.
- * Thong Decl:: Declaring terminals for multi-character tokens.
- * Precedence Decl:: Declaring terminals with precedence and associativity.
- * Union Decl:: Declaring the set of all semantic value types.
- * Type Decl:: Declaring the choice of type for a nonterminal symbol.
- * Expect Decl:: Suppressing warnings about shift/reduce conflicts.
- * Start Decl:: Specifying the start symbol.
- * Pure Decl:: Requesting a reentrant parser.
- * Decl Summary:: Table of all Bison declarations.
- @end menu
- @node Token Decl, Thong Decl, , Declarations
- @subsection Token Type Names
- @cindex declaring token type names
- @cindex token type names, declaring
- @findex %token
- The basic way to declare a token type name (terminal symbol) is as follows:
- @example
- %token @var{name}
- @end example
- Bison will convert this into a @code{#define} directive in
- the parser, so that the function @code{yylex} (if it is in this file)
- can use the name @var{name} to stand for this token type's code.
- Alternatively, you can use @code{%left}, @code{%right}, or @code{%nonassoc}
- instead of @code{%token}, if you wish to specify precedence.
- @xref{Precedence Decl, ,Operator Precedence}.
- You can explicitly specify the numeric code for a token type by appending
- an integer value in the field immediately following the token name:
- @example
- %token NUM 300
- @end example
- @noindent
- It is generally best, however, to let Bison choose the numeric codes for
- all token types. Bison will automatically select codes that don't conflict
- with each other or with ASCII characters.
- In the event that the stack type is a union, you must augment the
- @code{%token} or other token declaration to include the data type
- alternative delimited by angle-brackets (@pxref{Multiple Types, ,More Than One Value Type}).
- For example:
- @example
- @group
- %union @{ /* define stack type */
- double val;
- symrec *tptr;
- @}
- %token <val> NUM /* define token NUM and its type */
- @end group
- @end example
- @node Thong Decl, Precedence Decl, Token Decl, Declarations
- @subsection Thong Declarations
- @cindex thong declarations
- @cindex multi-character terminal symbols
- @cindex declaring thongs
- @cindex declaring string literal tokens
- WARNING: If you use thong declarations,
- your grammar will NOT be compatible with Yacc.
- The @code{%thong} declaration associates a symbol with a multiple-character
- string literal token. The symbol will be entered in the #defines
- generated by the @samp{-d} switch and may then be used to report the
- token by the lexical analyzer.
- The syntax of a thong declaration has four elements:
- the keyword, the type (optional), the token name, and a quoted string
- giving the contents of the thong. (Backslashes may be used as the
- usual escapes in the string.)
- For example, a C parser might specify
- @example
- %thong <operator> OR "||"
- %thong <operator> LE "<="
- %left OR "<="
- @end example
- @noindent
- Either the symbolic or the string literal form may be used in
- further declarations or the grammar rules.
- Whether or not there is a @code{%thong} declaration, the symbolic name
- does not appear in the generated @code{yytname} table; only the string
- literal appears. Its corresponding token number is available as the index
- of its entry in the @code{yytname} table generated in response
- to the @samp{-k} or @samp{--token-table} switch.
- @node Precedence Decl, Union Decl, Thong Decl, Declarations
- @subsection Operator Precedence
- @cindex precedence declarations
- @cindex declaring operator precedence
- @cindex operator precedence, declaring
- Use the @code{%left}, @code{%right} or @code{%nonassoc} declaration to
- declare a token and specify its precedence and associativity, all at
- once. These are called @dfn{precedence declarations}.
- @xref{Precedence, ,Operator Precedence}, for general information on operator precedence.
- The syntax of a precedence declaration is the same as that of
- @code{%token}: either
- @example
- %left @var{symbols}@dots{}
- @end example
- @noindent
- or
- @example
- %left <@var{type}> @var{symbols}@dots{}
- @end example
- And indeed any of these declarations serves the purposes of @code{%token}.
- But in addition, they specify the associativity and relative precedence for
- all the @var{symbols}:
- @itemize @bullet
- @item
- The associativity of an operator @var{op} determines how repeated uses
- of the operator nest: whether @samp{@var{x} @var{op} @var{y} @var{op}
- @var{z}} is parsed by grouping @var{x} with @var{y} first or by
- grouping @var{y} with @var{z} first. @code{%left} specifies
- left-associativity (grouping @var{x} with @var{y} first) and
- @code{%right} specifies right-associativity (grouping @var{y} with
- @var{z} first). @code{%nonassoc} specifies no associativity, which
- means that @samp{@var{x} @var{op} @var{y} @var{op} @var{z}} is
- considered a syntax error.
- @item
- The precedence of an operator determines how it nests with other operators.
- All the tokens declared in a single precedence declaration have equal
- precedence and nest together according to their associativity.
- When two tokens declared in different precedence declarations associate,
- the one declared later has the higher precedence and is grouped first.
- @end itemize
- @node Union Decl, Type Decl, Precedence Decl, Declarations
- @subsection The Collection of Value Types
- @cindex declaring value types
- @cindex value types, declaring
- @findex %union
- The @code{%union} declaration specifies the entire collection of possible
- data types for semantic values. The keyword @code{%union} is followed by a
- pair of braces containing the same thing that goes inside a @code{union} in
- C.
- For example:
- @example
- @group
- %union @{
- double val;
- symrec *tptr;
- @}
- @end group
- @end example
- @noindent
- This says that the two alternative types are @code{double} and @code{symrec
- *}. They are given names @code{val} and @code{tptr}; these names are used
- in the @code{%token} and @code{%type} declarations to pick one of the types
- for a terminal or nonterminal symbol (@pxref{Type Decl, ,Nonterminal Symbols}).
- Note that, unlike making a @code{union} declaration in C, you do not write
- a semicolon after the closing brace.
- @node Type Decl, Expect Decl, Union Decl, Declarations
- @subsection Nonterminal Symbols
- @cindex declaring value types, nonterminals
- @cindex value types, nonterminals, declaring
- @findex %type
- @noindent
- When you use @code{%union} to specify multiple value types, you must
- declare the value type of each nonterminal symbol for which values are
- used. This is done with a @code{%type} declaration, like this:
- @example
- %type <@var{type}> @var{nonterminal}@dots{}
- @end example
- @noindent
- Here @var{nonterminal} is the name of a nonterminal symbol, and @var{type}
- is the name given in the @code{%union} to the alternative that you want
- (@pxref{Union Decl, ,The Collection of Value Types}). You can give any number of nonterminal symbols in
- the same @code{%type} declaration, if they have the same value type. Use
- spaces to separate the symbol names.
- The value types of symbols may be declared by putting
- @code{<}@var{type}@code{>} after any of these declarators:
- @code{%type}, @code{%token}, @code{%term}, @code{%nterm}, @code{%equiv},
- @code{%assoc}, @code{%left}, @code{%right}, @code{%nonassoc}, @code{%binary}.
- @node Expect Decl, Start Decl, Type Decl, Declarations
- @subsection Suppressing Conflict Warnings
- @cindex suppressing conflict warnings
- @cindex preventing warnings about conflicts
- @cindex warnings, preventing
- @cindex conflicts, suppressing warnings of
- @findex %expect
- Bison normally warns if there are any conflicts in the grammar
- (@pxref{Shift/Reduce, ,Shift/Reduce Conflicts}), but most real grammars have harmless shift/reduce
- conflicts which are resolved in a predictable way and would be difficult to
- eliminate. It is desirable to suppress the warning about these conflicts
- unless the number of conflicts changes. You can do this with the
- @code{%expect} declaration.
- The declaration looks like this:
- @example
- %expect @var{n}
- @end example
- Here @var{n} is a decimal integer. The declaration says there should be no
- warning if there are @var{n} shift/reduce conflicts and no reduce/reduce
- conflicts. The usual warning is given if there are either more or fewer
- conflicts, or if there are any reduce/reduce conflicts.
- In general, using @code{%expect} involves these steps:
- @itemize @bullet
- @item
- Compile your grammar without @code{%expect}. Use the @samp{-v} option
- to get a verbose list of where the conflicts occur. Bison will also
- print the number of conflicts.
- @item
- Check each of the conflicts to make sure that Bison's default
- resolution is what you really want. If not, rewrite the grammar and
- go back to the beginning.
- @item
- Add an @code{%expect} declaration, copying the number @var{n} from the
- number which Bison printed.
- @end itemize
- Now Bison will stop annoying you about the conflicts you have checked, but
- it will warn you again if changes in the grammar result in additional
- conflicts.
- @node Start Decl, Pure Decl, Expect Decl, Declarations
- @subsection The Start-Symbol
- @cindex declaring the start symbol
- @cindex start symbol, declaring
- @cindex default start symbol
- @findex %start
- Bison assumes by default that the start symbol for the grammar is the first
- nonterminal specified in the grammar specification section. The programmer
- may override this restriction with the @code{%start} declaration as follows:
- @example
- %start @var{symbol}
- @end example
- @node Pure Decl, Decl Summary, Start Decl, Declarations
- @subsection A Pure (Reentrant) Parser
- @cindex reentrant parser
- @cindex pure parser
- @findex %pure_parser
- A @dfn{reentrant} program is one which does not alter in the course of
- execution; in other words, it consists entirely of @dfn{pure} (read-only)
- code. Reentrancy is important whenever asynchronous execution is possible;
- for example, a nonreentrant program may not be safe to call from a signal
- handler. In systems with multiple threads of control, a nonreentrant
- program must be called only within interlocks.
- The Bison parser is not normally a reentrant program, because it uses
- statically allocated variables for communication with @code{yylex}. These
- variables include @code{yylval} and @code{yylloc}.
- The Bison declaration @code{%pure_parser} says that you want the parser
- to be reentrant. It looks like this:
- @example
- %pure_parser
- @end example
- The effect is that the two communication variables become local
- variables in @code{yyparse}, and a different calling convention is used for
- the lexical analyzer function @code{yylex}. @xref{Pure Calling, ,Calling for Pure Parsers}, for the
- details of this. The variable @code{yynerrs} also becomes local in
- @code{yyparse} (@pxref{Error Reporting, ,The Error Reporting Function @code{yyerror}}). The convention for calling
- @code{yyparse} itself is unchanged.
- @node Decl Summary, , Pure Decl, Declarations
- @subsection Bison Declaration Summary
- @cindex Bison declaration summary
- @cindex declaration summary
- @cindex summary, Bison declaration
- Here is a summary of all Bison declarations:
- @table @code
- @item %union
- @item %union
- Declare the collection of data types that semantic values may have
- (@pxref{Union Decl, ,The Collection of Value Types}).
- @item %token
- Declare a terminal symbol (token type name) with no precedence
- or associativity specified (@pxref{Token Decl, ,Token Type Names}).
- @item %thong
- Declare a token name to refer to a multiple-character string literal
- token. (@pxref{Thong Decl, ,Thong Declarations}.)
- @item %right
- Declare a terminal symbol (token type name) that is right-associative
- (@pxref{Precedence Decl, ,Operator Precedence}).
- @item %left
- Declare a terminal symbol (token type name) that is left-associative
- (@pxref{Precedence Decl, ,Operator Precedence}).
- @item %nonassoc
- Declare a terminal symbol (token type name) that is nonassociative
- (using it in a way that would be associative is a syntax error)
- (@pxref{Precedence Decl, ,Operator Precedence}).
- @item %type
- Declare the type of semantic values for a nonterminal symbol
- (@pxref{Type Decl, ,Nonterminal Symbols}).
- @item %start
- Specify the grammar's start symbol (@pxref{Start Decl, ,The Start-Symbol}).
- @item %expect
- Declare the expected number of shift-reduce conflicts
- (@pxref{Expect Decl, ,Suppressing Conflict Warnings}).
- @item %pure_parser
- Request a pure (reentrant) parser program (@pxref{Pure Decl, ,A Pure (Reentrant) Parser}).
- @item %semantic_parser
- This causes Bison to utilize a more sophisticated parsing machine.
- It is only appropriate if you have specified @code{%guard@{...@}}
- segments after one or more rules of the grammar.
- @end table
- @node Multiple Parsers,, Declarations, Grammar File
- @section Multiple Parsers in the Same Program
- Most programs that use Bison parse only one language and therefore contain
- only one Bison parser. But what if you want to parse more than one
- language with the same program? Then you need to avoid a name conflict
- between different definitions of @code{yyparse}, @code{yylval}, and so on.
- The easy way to do this is to use the option @samp{-p @var{prefix}}
- (@pxref{Invocation, ,Invoking Bison}). This renames the interface functions and
- variables of the Bison parser to start with @var{prefix} instead of
- @samp{yy}. You can use this to give each parser distinct names that do
- not conflict.
- The precise list of symbols renamed is @code{yyparse}, @code{yylex},
- @code{yyerror}, @code{yylval}, @code{yychar} and @code{yydebug}. For
- example, if you use @samp{-p c}, the names become @code{cparse},
- @code{clex}, and so on.
- @strong{All the other variables and macros associated with Bison are not
- renamed.} These others are not global; there is no conflict if the same
- name is used in different parsers. For example, @code{YYSTYPE} is not
- renamed, but defining this in different ways in different parsers causes
- no trouble (@pxref{Value Type, ,Data Types of Semantic Values}).
- The @samp{-p} option works by adding macro definitions to the beginning
- of the parser source file, defining @code{yyparse} as
- @code{@var{prefix}parse}, and so on. This effectively substitutes one
- name for the other in the entire parser file.
- @node Interface, Algorithm, Grammar File, Top
- @chapter Parser C-Language Interface
- @cindex C-language interface
- @cindex interface
- The Bison parser is actually a C function named @code{yyparse}. Here we
- describe the interface conventions of @code{yyparse} and the other
- functions that it needs to use.
- Keep in mind that the parser uses many C identifiers starting with
- @samp{yy} and @samp{YY} for internal purposes. If you use such an
- identifier (aside from those in this manual) in an action or in additional
- C code in the grammar file, you are likely to run into trouble.
- @menu
- * Parser Function:: How to call @code{yyparse} and what it returns.
- * Lexical:: You must supply a function @code{yylex}
- which reads tokens.
- * Error Reporting:: You must supply a function @code{yyerror}.
- * Action Features:: Special features for use in actions.
- @end menu
- @node Parser Function, Lexical, , Interface
- @section The Parser Function @code{yyparse}
- @findex yyparse
- You call the function @code{yyparse} to cause parsing to occur. This
- function reads tokens, executes actions, and ultimately returns when it
- encounters end-of-input or an unrecoverable syntax error. You can also
- write an action which directs @code{yyparse} to return immediately without
- reading further.
- The value returned by @code{yyparse} is 0 if parsing was successful (return
- is due to end-of-input).
- The value is 1 if parsing failed (return is due to a syntax error).
- In an action, you can cause immediate return from @code{yyparse} by using
- these macros:
- @table @code
- @item YYACCEPT
- @findex YYACCEPT
- Return immediately with value 0 (to report success).
- @item YYABORT
- @findex YYABORT
- Return immediately with value 1 (to report failure).
- @end table
- @node Lexical, Error Reporting, Parser Function, Interface
- @section The Lexical Analyzer Function @code{yylex}
- @findex yylex
- @cindex lexical analyzer
- The @dfn{lexical analyzer} function, @code{yylex}, recognizes tokens from
- the input stream and returns them to the parser. Bison does not create
- this function automatically; you must write it so that @code{yyparse} can
- call it. The function is sometimes referred to as a lexical scanner.
- In simple programs, @code{yylex} is often defined at the end of the Bison
- grammar file. If @code{yylex} is defined in a separate source file, you
- need to arrange for the token-type macro definitions to be available there.
- To do this, use the @samp{-d} option when you run Bison, so that it will
- write these macro definitions into a separate header file
- @file{@var{name}.tab.h} which you can include in the other source files
- that need it. @xref{Invocation, ,Invoking Bison}.@refill
- @menu
- * Calling Convention:: How @code{yyparse} calls @code{yylex}.
- * Token Values:: How @code{yylex} must return the semantic value
- of the token it has read.
- * Token Positions:: How @code{yylex} must return the text position
- (line number, etc.) of the token, if the
- actions want that.
- * Pure Calling:: How the calling convention differs
- in a pure parser (@pxref{Pure Decl, ,A Pure (Reentrant) Parser}).
- @end menu
- @node Calling Convention, Token Values, , Lexical
- @subsection Calling Convention for @code{yylex}
- The value that @code{yylex} returns must be the numeric code for the type
- of token it has just found, or 0 for end-of-input.
- When a token is referred to in the grammar rules by a name, that name
- in the parser file becomes a C macro whose definition is the proper
- numeric code for that token type. So @code{yylex} can use the name
- to indicate that type. @xref{Symbols, ,Symbols - Terminal and Nonterminal}.
- When a token is referred to in the grammar rules by a character literal,
- the numeric code for that character is also the code for the token type.
- So @code{yylex} can simply return that character code. The null character
- must not be used this way, because its code is zero and that is what
- signifies end-of-input.
- Here is an example showing these things:
- @example
- yylex ()
- @{
- @dots{}
- if (c == EOF) /* Detect end of file. */
- return 0;
- @dots{}
- if (c == '+' || c == '-')
- return c; /* Assume token type for `+' is '+'. */
- @dots{}
- return INT; /* Return the type of the token. */
- @dots{}
- @}
- @end example
- @noindent
- This interface has been designed so that the output from the @code{lex}
- utility can be used without change as the definition of @code{yylex}.
- @node Token Values, Token Positions, Calling Convention, Lexical
- @subsection Semantic Values of Tokens
- @vindex yylval
- In an ordinary (nonreentrant) parser, the semantic value of the token must
- be stored into the global variable @code{yylval}. When you are using
- just one data type for semantic values, @code{yylval} has that type.
- Thus, if the type is @code{int} (the default), you might write this in
- @code{yylex}:
- @example
- @group
- @dots{}
- yylval = value; /* Put value onto Bison stack. */
- return INT; /* Return the type of the token. */
- @dots{}
- @end group
- @end example
- When you are using multiple data types, @code{yylval}'s type is a union
- made from the @code{%union} declaration (@pxref{Union Decl, ,The Collection of Value Types}). So when
- you store a token's value, you must use the proper member of the union.
- If the @code{%union} declaration looks like this:
- @example
- @group
- %union @{
- int intval;
- double val;
- symrec *tptr;
- @}
- @end group
- @end example
- @noindent
- then the code in @code{yylex} might look like this:
- @example
- @group
- @dots{}
- yylval.intval = value; /* Put value onto Bison stack. */
- return INT; /* Return the type of the token. */
- @dots{}
- @end group
- @end example
- @node Token Positions, Pure Calling, Token Values, Lexical
- @subsection Textual Positions of Tokens
- @vindex yylloc
- If you are using the @samp{@@@var{n}}-feature (@pxref{Action Features, ,Special Features for Use in Actions}) in
- actions to keep track of the textual locations of tokens and groupings,
- then you must provide this information in @code{yylex}. The function
- @code{yyparse} expects to find the textual location of a token just parsed
- in the global variable @code{yylloc}. So @code{yylex} must store the
- proper data in that variable. The value of @code{yylloc} is a structure
- and you need only initialize the members that are going to be used by the
- actions. The four members are called @code{first_line},
- @code{first_column}, @code{last_line} and @code{last_column}. Note that
- the use of this feature makes the parser noticeably slower.
- @tindex YYLTYPE
- The data type of @code{yylloc} has the name @code{YYLTYPE}.
- @node Pure Calling, , Token Positions, Lexical
- @subsection Calling for Pure Parsers
- When you use the Bison declaration @code{%pure_parser} to request a pure,
- reentrant parser, the global communication variables @code{yylval} and
- @code{yylloc} cannot be used. (@xref{Pure Decl, ,A Pure (Reentrant) Parser}.) In such parsers the
- two global variables are replaced by pointers passed as arguments to
- @code{yylex}. You must declare them as shown here, and pass the
- information back by storing it through those pointers.
- @example
- yylex (lvalp, llocp)
- YYSTYPE *lvalp;
- YYLTYPE *llocp;
- @{
- @dots{}
- *lvalp = value; /* Put value onto Bison stack. */
- return INT; /* Return the type of the token. */
- @dots{}
- @}
- @end example
- If the grammar file does not use the @samp{@@} constructs to refer to
- textual positions, then the type @code{YYLTYPE} will not be defined. In
- this case, omit the second argument; @code{yylex} will be called with
- only one argument.
- @node Error Reporting, Action Features, Lexical, Interface
- @section The Error Reporting Function @code{yyerror}
- @cindex error reporting function
- @findex yyerror
- @cindex parse error
- @cindex syntax error
- The Bison parser detects a @dfn{parse error} or @dfn{syntax error}
- whenever it reads a token which cannot satisfy any syntax rule. A
- action in the grammar can also explicitly proclaim an error, using the
- macro @code{YYERROR} (@pxref{Action Features, ,Special Features for Use in Actions}).
- The Bison parser expects to report the error by calling an error
- reporting function named @code{yyerror}, which you must supply. It is
- called by @code{yyparse} whenever a syntax error is found, and it
- receives one argument. For a parse error, the string is normally
- @w{@code{"parse error"}}.
- @findex YYERROR_VERBOSE
- If you define the macro @code{YYERROR_VERBOSE} in the Bison declarations
- section (@pxref{Bison Declarations, ,The Bison Declarations Section}), then Bison provides a more verbose
- and specific error message string instead of just plain @w{@code{"parse
- error"}}. It doesn't matter what definition you use for
- @code{YYERROR_VERBOSE}, just whether you define it.
- The parser can detect one other kind of error: stack overflow. This
- happens when the input contains constructions that are very deeply
- nested. It isn't likely you will encounter this, since the Bison
- parser extends its stack automatically up to a very large limit. But
- if overflow happens, @code{yyparse} calls @code{yyerror} in the usual
- fashion, except that the argument string is @w{@code{"parser stack
- overflow"}}.
- The following definition suffices in simple programs:
- @example
- @group
- yyerror (s)
- char *s;
- @{
- @end group
- @group
- fprintf (stderr, "%s\n", s);
- @}
- @end group
- @end example
- After @code{yyerror} returns to @code{yyparse}, the latter will attempt
- error recovery if you have written suitable error recovery grammar rules
- (@pxref{Error Recovery}). If recovery is impossible, @code{yyparse} will
- immediately return 1.
- @vindex yynerrs
- The variable @code{yynerrs} contains the number of syntax errors
- encountered so far. Normally this variable is global; but if you
- request a pure parser (@pxref{Pure Decl, ,A Pure (Reentrant) Parser}) then it is a local variable
- which only the actions can access.
- @node Action Features, , Error Reporting, Interface
- @section Special Features for Use in Actions
- @cindex summary, action features
- @cindex action features summary
- Here is a table of Bison constructs, variables and macros that
- are useful in actions.
- @table @samp
- @item $$
- Acts like a variable that contains the semantic value for the
- grouping made by the current rule. @xref{Actions}.
- @item $@var{n}
- Acts like a variable that contains the semantic value for the
- @var{n}th component of the current rule. @xref{Actions}.
- @item $<@var{typealt}>$
- Like @code{$$} but specifies alternative @var{typealt} in the union
- specified by the @code{%union} declaration. @xref{Action Types, ,Data Types of Values in Actions}.
- @item $<@var{typealt}>@var{n}
- Like @code{$@var{n}} but specifies alternative @var{typealt} in the
- union specified by the @code{%union} declaration.
- @xref{Action Types, ,Data Types of Values in Actions}.@refill
- @item YYABORT;
- Return immediately from @code{yyparse}, indicating failure.
- @xref{Parser Function, ,The Parser Function @code{yyparse}}.
- @item YYACCEPT;
- Return immediately from @code{yyparse}, indicating success.
- @xref{Parser Function, ,The Parser Function @code{yyparse}}.
- @item YYBACKUP (@var{token}, @var{value});
- @findex YYBACKUP
- Unshift a token. This macro is allowed only for rules that reduce
- a single value, and only when there is no look-ahead token.
- It installs a look-ahead token with token type @var{token} and
- semantic value @var{value}; then it discards the value that was
- going to be reduced by this rule.
- If the macro is used when it is not valid, such as when there is
- a look-ahead token already, then it reports a syntax error with
- a message @samp{cannot back up} and performs ordinary error
- recovery.
- In either case, the rest of the action is not executed.
- @item YYEMPTY
- @vindex YYEMPTY
- Value stored in @code{yychar} when there is no look-ahead token.
- @item YYERROR;
- @findex YYERROR
- Cause an immediate syntax error. This statement initiates error
- recovery just as if the parser itself had detected an error; however, it
- does not call @code{yyerror}, and does not print any message. If you
- want to print an error message, call @code{yyerror} explicitly before
- the @samp{YYERROR;} statement. @xref{Error Recovery}.
- @item YYRECOVERING
- This macro stands for an expression that has the value 1 when the parser
- is recovering from a syntax error, and 0 the rest of the time.
- @xref{Error Recovery}.
- @item yychar
- Variable containing the current look-ahead token. (In a pure parser,
- this is actually a local variable within @code{yyparse}.) When there is
- no look-ahead token, the value @code{YYEMPTY} is stored in the variable.
- @xref{Look-Ahead, ,Look-Ahead Tokens}.
- @item yyclearin;
- Discard the current look-ahead token. This is useful primarily in
- error rules. @xref{Error Recovery}.
- @item yyerrok;
- Resume generating error messages immediately for subsequent syntax
- errors. This is useful primarily in error rules.
- @xref{Error Recovery}.
- @item @@@var{n}
- @findex @@@var{n}
- Acts like a structure variable containing information on the line
- numbers and column numbers of the @var{n}th component of the current
- rule. The structure has four members, like this:
- @example
- struct @{
- int first_line, last_line;
- int first_column, last_column;
- @};
- @end example
- Thus, to get the starting line number of the third component, use
- @samp{@@3.first_line}.
- In order for the members of this structure to contain valid information,
- you must make @code{yylex} supply this information about each token.
- If you need only certain members, then @code{yylex} need only fill in
- those members.
- The use of this feature makes the parser noticeably slower.
- @end table
- @node Algorithm, Error Recovery, Interface, Top
- @chapter The Bison Parser Algorithm
- @cindex Bison parser algorithm
- @cindex algorithm of parser
- @cindex shifting
- @cindex reduction
- @cindex parser stack
- @cindex stack, parser
- As Bison reads tokens, it pushes them onto a stack along with their
- semantic values. The stack is called the @dfn{parser stack}. Pushing a
- token is traditionally called @dfn{shifting}.
- For example, suppose the infix calculator has read @samp{1 + 5 *}, with a
- @samp{3} to come. The stack will have four elements, one for each token
- that was shifted.
- But the stack does not always have an element for each token read. When
- the last @var{n} tokens and groupings shifted match the components of a
- grammar rule, they can be combined according to that rule. This is called
- @dfn{reduction}. Those tokens and groupings are replaced on the stack by a
- single grouping whose symbol is the result (left hand side) of that rule.
- Running the rule's action is part of the process of reduction, because this
- is what computes the semantic value of the resulting grouping.
- For example, if the infix calculator's parser stack contains this:
- @example
- 1 + 5 * 3
- @end example
- @noindent
- and the next input token is a newline character, then the last three
- elements can be reduced to 15 via the rule:
- @example
- expr: expr '*' expr;
- @end example
- @noindent
- Then the stack contains just these three elements:
- @example
- 1 + 15
- @end example
- @noindent
- At this point, another reduction can be made, resulting in the single value
- 16. Then the newline token can be shifted.
- The parser tries, by shifts and reductions, to reduce the entire input down
- to a single grouping whose symbol is the grammar's start-symbol
- (@pxref{Language and Grammar, ,Languages and Context-Free Grammars}).
- This kind of parser is known in the literature as a bottom-up parser.
- @menu
- * Look-Ahead:: Parser looks one token ahead when deciding what to do.
- * Shift/Reduce:: Conflicts: when either shifting or reduction is valid.
- * Precedence:: Operator precedence works by resolving conflicts.
- * Contextual Precedence:: When an operator's precedence depends on context.
- * Parser States:: The parser is a finite-state-machine with stack.
- * Reduce/Reduce:: When two rules are applicable in the same situation.
- * Mystery Conflicts:: Reduce/reduce conflicts that look unjustified.
- * Stack Overflow:: What happens when stack gets full. How to avoid it.
- @end menu
- @node Look-Ahead, Shift/Reduce, , Algorithm
- @section Look-Ahead Tokens
- @cindex look-ahead token
- The Bison parser does @emph{not} always reduce immediately as soon as the
- last @var{n} tokens and groupings match a rule. This is because such a
- simple strategy is inadequate to handle most languages. Instead, when a
- reduction is possible, the parser sometimes ``looks ahead'' at the next
- token in order to decide what to do.
- When a token is read, it is not immediately shifted; first it becomes the
- @dfn{look-ahead token}, which is not on the stack. Now the parser can
- perform one or more reductions of tokens and groupings on the stack, while
- the look-ahead token remains off to the side. When no more reductions
- should take place, the look-ahead token is shifted onto the stack. This
- does not mean that all possible reductions have been done; depending on the
- token type of the look-ahead token, some rules may choose to delay their
- application.
- Here is a simple case where look-ahead is needed. These three rules define
- expressions which contain binary addition operators and postfix unary
- factorial operators (@samp{!}), and allow parentheses for grouping.
- @example
- @group
- expr: term '+' expr
- | term
- ;
- @end group
- @group
- term: '(' expr ')'
- | term '!'
- | NUMBER
- ;
- @end group
- @end example
- Suppose that the tokens @w{@samp{1 + 2}} have been read and shifted; what
- should be done? If the following token is @samp{)}, then the first three
- tokens must be reduced to form an @code{expr}. This is the only valid
- course, because shifting the @samp{)} would produce a sequence of symbols
- @w{@code{term ')'}}, and no rule allows this.
- If the following token is @samp{!}, then it must be shifted immediately so
- that @w{@samp{2 !}} can be reduced to make a @code{term}. If instead the
- parser were to reduce before shifting, @w{@samp{1 + 2}} would become an
- @code{expr}. It would then be impossible to shift the @samp{!} because
- doing so would produce on the stack the sequence of symbols @code{expr
- '!'}. No rule allows that sequence.
- @vindex yychar
- The current look-ahead token is stored in the variable @code{yychar}.
- @xref{Action Features, ,Special Features for Use in Actions}.
- @node Shift/Reduce, Precedence, Look-Ahead, Algorithm
- @section Shift/Reduce Conflicts
- @cindex conflicts
- @cindex shift/reduce conflicts
- @cindex dangling @code{else}
- @cindex @code{else}, dangling
- Suppose we are parsing a language which has if-then and if-then-else
- statements, with a pair of rules like this:
- @example
- @group
- if_stmt:
- IF expr THEN stmt
- | IF expr THEN stmt ELSE stmt
- ;
- @end group
- @end example
- @noindent
- Here we assume that @code{IF}, @code{THEN} and @code{ELSE} are
- terminal symbols for specific keyword tokens.
- When the @code{ELSE} token is read and becomes the look-ahead token, the
- contents of the stack (assuming the input is valid) are just right for
- reduction by the first rule. But it is also legitimate to shift the
- @code{ELSE}, because that would lead to eventual reduction by the second
- rule.
- This situation, where either a shift or a reduction would be valid, is
- called a @dfn{shift/reduce conflict}. Bison is designed to resolve
- these conflicts by choosing to shift, unless otherwise directed by
- operator precedence declarations. To see the reason for this, let's
- contrast it with the other alternative.
- Since the parser prefers to shift the @code{ELSE}, the result is to attach
- the else-clause to the innermost if-statement, making these two inputs
- equivalent:
- @example
- if x then if y then win (); else lose;
- if x then do; if y then win (); else lose; end;
- @end example
- But if the parser chose to reduce when possible rather than shift, the
- result would be to attach the else-clause to the outermost if-statement,
- making these two inputs equivalent:
- @example
- if x then if y then win (); else lose;
- if x then do; if y then win (); end; else lose;
- @end example
- The conflict exists because the grammar as written is ambiguous: either
- parsing of the simple nested if-statement is legitimate. The established
- convention is that these ambiguities are resolved by attaching the
- else-clause to the innermost if-statement; this is what Bison accomplishes
- by choosing to shift rather than reduce. (It would ideally be cleaner to
- write an unambiguous grammar, but that is very hard to do in this case.)
- This particular ambiguity was first encountered in the specifications of
- Algol 60 and is called the ``dangling @code{else}'' ambiguity.
- To avoid warnings from Bison about predictable, legitimate shift/reduce
- conflicts, use the @code{%expect @var{n}} declaration. There will be no
- warning as long as the number of shift/reduce conflicts is exactly @var{n}.
- @xref{Expect Decl, ,Suppressing Conflict Warnings}.
- The definition of @code{if_stmt} above is solely to blame for the
- conflict, but the conflict does not actually appear without additional
- rules. Here is a complete Bison input file that actually manifests the
- conflict:
- @example
- @group
- %token IF THEN ELSE variable
- %%
- @end group
- @group
- stmt: expr
- | if_stmt
- ;
- @end group
- @group
- if_stmt:
- IF expr THEN stmt
- | IF expr THEN stmt ELSE stmt
- ;
- @end group
- expr: variable
- ;
- @end example
- @node Precedence, Contextual Precedence, Shift/Reduce, Algorithm
- @section Operator Precedence
- @cindex operator precedence
- @cindex precedence of operators
- Another situation where shift/reduce conflicts appear is in arithmetic
- expressions. Here shifting is not always the preferred resolution; the
- Bison declarations for operator precedence allow you to specify when to
- shift and when to reduce.
- @menu
- * Why Precedence:: An example showing why precedence is needed.
- * Using Precedence:: How to specify precedence in Bison grammars.
- * Precedence Examples:: How these features are used in the previous example.
- * How Precedence:: How they work.
- @end menu
- @node Why Precedence, Using Precedence, , Precedence
- @subsection When Precedence is Needed
- Consider the following ambiguous grammar fragment (ambiguous because the
- input @w{@samp{1 - 2 * 3}} can be parsed in two different ways):
- @example
- @group
- expr: expr '-' expr
- | expr '*' expr
- | expr '<' expr
- | '(' expr ')'
- @dots{}
- ;
- @end group
- @end example
- @noindent
- Suppose the parser has seen the tokens @samp{1}, @samp{-} and @samp{2};
- should it reduce them via the rule for the addition operator? It depends
- on the next token. Of course, if the next token is @samp{)}, we must
- reduce; shifting is invalid because no single rule can reduce the token
- sequence @w{@samp{- 2 )}} or anything starting with that. But if the next
- token is @samp{*} or @samp{<}, we have a choice: either shifting or
- reduction would allow the parse to complete, but with different
- results.
- To decide which one Bison should do, we must consider the
- results. If the next operator token @var{op} is shifted, then it
- must be reduced first in order to permit another opportunity to
- reduce the sum. The result is (in effect) @w{@samp{1 - (2
- @var{op} 3)}}. On the other hand, if the subtraction is reduced
- before shifting @var{op}, the result is @w{@samp{(1 - 2) @var{op}
- 3}}. Clearly, then, the choice of shift or reduce should depend
- on the relative precedence of the operators @samp{-} and
- @var{op}: @samp{*} should be shifted first, but not @samp{<}.
- @cindex associativity
- What about input such as @w{@samp{1 - 2 - 5}}; should this be
- @w{@samp{(1 - 2) - 5}} or should it be @w{@samp{1 - (2 - 5)}}? For
- most operators we prefer the former, which is called @dfn{left
- association}. The latter alternative, @dfn{right association}, is
- desirable for assignment operators. The choice of left or right
- association is a matter of whether the parser chooses to shift or
- reduce when the stack contains @w{@samp{1 - 2}} and the look-ahead
- token is @samp{-}: shifting makes right-associativity.
- @node Using Precedence, Precedence Examples, Why Precedence, Precedence
- @subsection Specifying Operator Precedence
- @findex %left
- @findex %right
- @findex %nonassoc
- Bison allows you to specify these choices with the operator precedence
- declarations @code{%left} and @code{%right}. Each such declaration
- contains a list of tokens, which are operators whose precedence and
- associativity is being declared. The @code{%left} declaration makes all
- those operators left-associative and the @code{%right} declaration makes
- them right-associative. A third alternative is @code{%nonassoc}, which
- declares that it is a syntax error to find the same operator twice ``in a
- row''.
- The relative precedence of different operators is controlled by the
- order in which they are declared. The first @code{%left} or
- @code{%right} declaration in the file declares the operators whose
- precedence is lowest, the next such declaration declares the operators
- whose precedence is a little higher, and so on.
- @node Precedence Examples, How Precedence, Using Precedence, Precedence
- @subsection Precedence Examples
- In our example, we would want the following declarations:
- @example
- %left '<'
- %left '-'
- %left '*'
- @end example
- In a more complete example, which supports other operators as well, we
- would declare them in groups of equal precedence. For example, @code{'+'} is
- declared with @code{'-'}:
- @example
- %left '<' '>' '=' NE LE GE
- %left '+' '-'
- %left '*' '/'
- @end example
- @noindent
- (Here @code{NE} and so on stand for the operators for ``not equal''
- and so on. We assume that these tokens are more than one character long
- and therefore are represented by names, not character literals.)
- @node How Precedence, , Precedence Examples, Precedence
- @subsection How Precedence Works
- The first effect of the precedence declarations is to assign precedence
- levels to the terminal symbols declared. The second effect is to assign
- precedence levels to certain rules: each rule gets its precedence from the
- last terminal symbol mentioned in the components. (You can also specify
- explicitly the precedence of a rule. @xref{Contextual Precedence, ,Context-Dependent Precedence}.)
- Finally, the resolution of conflicts works by comparing the
- precedence of the rule being considered with that of the
- look-ahead token. If the token's precedence is higher, the
- choice is to shift. If the rule's precedence is higher, the
- choice is to reduce. If they have equal precedence, the choice
- is made based on the associativity of that precedence level. The
- verbose output file made by @samp{-v} (@pxref{Invocation, ,Invoking Bison}) says
- how each conflict was resolved.
- Not all rules and not all tokens have precedence. If either the rule or
- the look-ahead token has no precedence, then the default is to shift.
- @node Contextual Precedence, Parser States, Precedence, Algorithm
- @section Context-Dependent Precedence
- @cindex context-dependent precedence
- @cindex unary operator precedence
- @cindex precedence, context-dependent
- @cindex precedence, unary operator
- @findex %prec
- Often the precedence of an operator depends on the context. This sounds
- outlandish at first, but it is really very common. For example, a minus
- sign typically has a very high precedence as a unary operator, and a
- somewhat lower precedence (lower than multiplication) as a binary operator.
- The Bison precedence declarations, @code{%left}, @code{%right} and
- @code{%nonassoc}, can only be used once for a given token; so a token has
- only one precedence declared in this way. For context-dependent
- precedence, you need to use an additional mechanism: the @code{%prec}
- modifier for rules.@refill
- The @code{%prec} modifier declares the precedence of a particular rule by
- specifying a terminal symbol whose precedence should be used for that rule.
- It's not necessary for that symbol to appear otherwise in the rule. The
- modifier's syntax is:
- @example
- %prec @var{terminal-symbol}
- @end example
- @noindent
- and it is written after the components of the rule. Its effect is to
- assign the rule the precedence of @var{terminal-symbol}, overriding
- the precedence that would be deduced for it in the ordinary way. The
- altered rule precedence then affects how conflicts involving that rule
- are resolved (@pxref{Precedence, ,Operator Precedence}).
- Here is how @code{%prec} solves the problem of unary minus. First, declare
- a precedence for a fictitious terminal symbol named @code{UMINUS}. There
- are no tokens of this type, but the symbol serves to stand for its
- precedence:
- @example
- @dots{}
- %left '+' '-'
- %left '*'
- %left UMINUS
- @end example
- Now the precedence of @code{UMINUS} can be used in specific rules:
- @example
- @group
- exp: @dots{}
- | exp '-' exp
- @dots{}
- | '-' exp %prec UMINUS
- @end group
- @end example
- @node Parser States, Reduce/Reduce, Contextual Precedence, Algorithm
- @section Parser States
- @cindex finite-state machine
- @cindex parser state
- @cindex state (of parser)
- The function @code{yyparse} is implemented using a finite-state machine.
- The values pushed on the parser stack are not simply token type codes; they
- represent the entire sequence of terminal and nonterminal symbols at or
- near the top of the stack. The current state collects all the information
- about previous input which is relevant to deciding what to do next.
- Each time a look-ahead token is read, the current parser state together
- with the type of look-ahead token are looked up in a table. This table
- entry can say, ``Shift the look-ahead token.'' In this case, it also
- specifies the new parser state, which is pushed onto the top of the
- parser stack. Or it can say, ``Reduce using rule number @var{n}.''
- This means that a certain number of tokens or groupings are taken off
- the top of the stack, and replaced by one grouping. In other words,
- that number of states are popped from the stack, and one new state is
- pushed.
- There is one other alternative: the table can say that the look-ahead token
- is erroneous in the current state. This causes error processing to begin
- (@pxref{Error Recovery}).
- @node Reduce/Reduce, Mystery Conflicts, Parser States, Algorithm
- @section Reduce/Reduce Conflicts
- @cindex reduce/reduce conflict
- @cindex conflicts, reduce/reduce
- A reduce/reduce conflict occurs if there are two or more rules that apply
- to the same sequence of input. This usually indicates a serious error
- in the grammar.
- For example, here is an erroneous attempt to define a sequence
- of zero or more @code{word} groupings.
- @example
- sequence: /* empty */
- @{ printf ("empty sequence\n"); @}
- | maybeword
- | sequence word
- @{ printf ("added word %s\n", $2); @}
- ;
- maybeword: /* empty */
- @{ printf ("empty maybeword\n"); @}
- | word
- @{ printf ("single word %s\n", $1); @}
- ;
- @end example
- @noindent
- The error is an ambiguity: there is more than one way to parse a single
- @code{word} into a @code{sequence}. It could be reduced to a
- @code{maybeword} and then into a @code{sequence} via the second rule.
- Alternatively, nothing-at-all could be reduced into a @code{sequence}
- via the first rule, and this could be combined with the @code{word}
- using the third rule for @code{sequence}.
- There is also more than one way to reduce nothing-at-all into a
- @code{sequence}. This can be done directly via the first rule,
- or indirectly via @code{maybeword} and then the second rule.
- You might think that this is a distinction without a difference, because it
- does not change whether any particular input is valid or not. But it does
- affect which actions are run. One parsing order runs the second rule's
- action; the other runs the first rule's action and the third rule's action.
- In this example, the output of the program changes.
- Bison resolves a reduce/reduce conflict by choosing to use the rule that
- appears first in the grammar, but it is very risky to rely on this. Every
- reduce/reduce conflict must be studied and usually eliminated. Here is the
- proper way to define @code{sequence}:
- @example
- sequence: /* empty */
- @{ printf ("empty sequence\n"); @}
- | sequence word
- @{ printf ("added word %s\n", $2); @}
- ;
- @end example
- Here is another common error that yields a reduce/reduce conflict:
- @example
- sequence: /* empty */
- | sequence words
- | sequence redirects
- ;
- words: /* empty */
- | words word
- ;
- redirects:/* empty */
- | redirects redirect
- ;
- @end example
- @noindent
- The intention here is to define a sequence which can contain either
- @code{word} or @code{redirect} groupings. The individual definitions of
- @code{sequence}, @code{words} and @code{redirects} are error-free, but the
- three together make a subtle ambiguity: even an empty input can be parsed
- in infinitely many ways!
- Consider: nothing-at-all could be a @code{words}. Or it could be two
- @code{words} in a row, or three, or any number. It could equally well be a
- @code{redirects}, or two, or any number. Or it could be a @code{words}
- followed by three @code{redirects} and another @code{words}. And so on.
- Here are two ways to correct these rules. First, to make it a single level
- of sequence:
- @example
- sequence: /* empty */
- | sequence word
- | sequence redirect
- ;
- @end example
- Second, to prevent either a @code{words} or a @code{redirects}
- from being empty:
- @example
- sequence: /* empty */
- | sequence words
- | sequence redirects
- ;
- words: word
- | words word
- ;
- redirects:redirect
- | redirects redirect
- ;
- @end example
- @node Mystery Conflicts, Stack Overflow, Reduce/Reduce, Algorithm
- @section Mysterious Reduce/Reduce Conflicts
- Sometimes reduce/reduce conflicts can occur that don't look warranted.
- Here is an example:
- @example
- @group
- %token ID
- %%
- def: param_spec return_spec ','
- ;
- param_spec:
- type
- | name_list ':' type
- ;
- @end group
- @group
- return_spec:
- type
- | name ':' type
- ;
- @end group
- @group
- type: ID
- ;
- @end group
- @group
- name: ID
- ;
- name_list:
- name
- | name ',' name_list
- ;
- @end group
- @end example
- It would seem that this grammar can be parsed with only a single token
- of look-ahead: when a @code{param_spec} is being read, an @code{ID} is
- a @code{name} if a comma or colon follows, or a @code{type} if another
- @code{ID} follows. In other words, this grammar is LR(1).
- @cindex LR(1)
- @cindex LALR(1)
- However, Bison, like most parser generators, cannot actually handle all
- LR(1) grammars. In this grammar, two contexts, that after an @code{ID}
- at the beginning of a @code{param_spec} and likewise at the beginning of
- a @code{return_spec}, are similar enough that Bison assumes they are the
- same. They appear similar because the same set of rules would be
- active---the rule for reducing to a @code{name} and that for reducing to
- a @code{type}. Bison is unable to determine at that stage of processing
- that the rules would require different look-ahead tokens in the two
- contexts, so it makes a single parser state for them both. Combining
- the two contexts causes a conflict later. In parser terminology, this
- occurrence means that the grammar is not LALR(1).
- In general, it is better to fix deficiencies than to document them. But
- this particular deficiency is intrinsically hard to fix; parser
- generators that can handle LR(1) grammars are hard to write and tend to
- produce parsers that are very large. In practice, Bison is more useful
- as it is now.
- When the problem arises, you can often fix it by identifying the two
- parser states that are being confused, and adding something to make them
- look distinct. In the above example, adding one rule to
- @code{return_spec} as follows makes the problem go away:
- @example
- @group
- %token BOGUS
- @dots{}
- %%
- @dots{}
- return_spec:
- type
- | name ':' type
- /* This rule is never used. */
- | ID BOGUS
- ;
- @end group
- @end example
- This corrects the problem because it introduces the possibility of an
- additional active rule in the context after the @code{ID} at the beginning of
- @code{return_spec}. This rule is not active in the corresponding context
- in a @code{param_spec}, so the two contexts receive distinct parser states.
- As long as the token @code{BOGUS} is never generated by @code{yylex},
- the added rule cannot alter the way actual input is parsed.
- In this particular example, there is another way to solve the problem:
- rewrite the rule for @code{return_spec} to use @code{ID} directly
- instead of via @code{name}. This also causes the two confusing
- contexts to have different sets of active rules, because the one for
- @code{return_spec} activates the altered rule for @code{return_spec}
- rather than the one for @code{name}.
- @example
- param_spec:
- type
- | name_list ':' type
- ;
- return_spec:
- type
- | ID ':' type
- ;
- @end example
- @node Stack Overflow, , Mystery Conflicts, Algorithm
- @section Stack Overflow, and How to Avoid It
- @cindex stack overflow
- @cindex parser stack overflow
- @cindex overflow of parser stack
- The Bison parser stack can overflow if too many tokens are shifted and
- not reduced. When this happens, the parser function @code{yyparse}
- returns a nonzero value, pausing only to call @code{yyerror} to report
- the overflow.
- @vindex YYMAXDEPTH
- By defining the macro @code{YYMAXDEPTH}, you can control how deep the
- parser stack can become before a stack overflow occurs. Define the
- macro with a value that is an integer. This value is the maximum number
- of tokens that can be shifted (and not reduced) before overflow.
- It must be a constant expression whose value is known at compile time.
- The stack space allowed is not necessarily allocated. If you specify a
- large value for @code{YYMAXDEPTH}, the parser actually allocates a small
- stack at first, and then makes it bigger by stages as needed. This
- increasing allocation happens automatically and silently. Therefore,
- you do not need to make @code{YYMAXDEPTH} painfully small merely to save
- space for ordinary inputs that do not need much stack.
- @cindex default stack limit
- The default value of @code{YYMAXDEPTH}, if you do not define it, is
- 10000.
- @vindex YYINITDEPTH
- You can control how much stack is allocated initially by defining the
- macro @code{YYINITDEPTH}. This value too must be a compile-time
- constant integer. The default is 200.
- @node Error Recovery, Context Dependency, Algorithm, Top
- @chapter Error Recovery
- @cindex error recovery
- @cindex recovery from errors
- It is not usually acceptable to have a program terminate on a parse
- error. For example, a compiler should recover sufficiently to parse the
- rest of the input file and check it for errors; a calculator should accept
- another expression.
- In a simple interactive command parser where each input is one line, it may
- be sufficient to allow @code{yyparse} to return 1 on error and have the
- caller ignore the rest of the input line when that happens (and then call
- @code{yyparse} again). But this is inadequate for a compiler, because it
- forgets all the syntactic context leading up to the error. A syntax error
- deep within a function in the compiler input should not cause the compiler
- to treat the following line like the beginning of a source file.
- @findex error
- You can define how to recover from a syntax error by writing rules to
- recognize the special token @code{error}. This is a terminal symbol that
- is always defined (you need not declare it) and reserved for error
- handling. The Bison parser generates an @code{error} token whenever a
- syntax error happens; if you have provided a rule to recognize this token
- in the current context, the parse can continue.
- For example:
- @example
- stmnts: /* empty string */
- | stmnts '\n'
- | stmnts exp '\n'
- | stmnts error '\n'
- @end example
- The fourth rule in this example says that an error followed by a newline
- makes a valid addition to any @code{stmnts}.
- What happens if a syntax error occurs in the middle of an @code{exp}? The
- error recovery rule, interpreted strictly, applies to the precise sequence
- of a @code{stmnts}, an @code{error} and a newline. If an error occurs in
- the middle of an @code{exp}, there will probably be some additional tokens
- and subexpressions on the stack after the last @code{stmnts}, and there
- will be tokens to read before the next newline. So the rule is not
- applicable in the ordinary way.
- But Bison can force the situation to fit the rule, by discarding part of
- the semantic context and part of the input. First it discards states and
- objects from the stack until it gets back to a state in which the
- @code{error} token is acceptable. (This means that the subexpressions
- already parsed are discarded, back to the last complete @code{stmnts}.) At
- this point the @code{error} token can be shifted. Then, if the old
- look-ahead token is not acceptable to be shifted next, the parser reads
- tokens and discards them until it finds a token which is acceptable. In
- this example, Bison reads and discards input until the next newline
- so that the fourth rule can apply.
- The choice of error rules in the grammar is a choice of strategies for
- error recovery. A simple and useful strategy is simply to skip the rest of
- the current input line or current statement if an error is detected:
- @example
- stmnt: error ';' /* on error, skip until ';' is read */
- @end example
- It is also useful to recover to the matching close-delimiter of an
- opening-delimiter that has already been parsed. Otherwise the
- close-delimiter will probably appear to be unmatched, and generate another,
- spurious error message:
- @example
- primary: '(' expr ')'
- | '(' error ')'
- @dots{}
- ;
- @end example
- Error recovery strategies are necessarily guesses. When they guess wrong,
- one syntax error often leads to another. In the above example, the error
- recovery rule guesses that an error is due to bad input within one
- @code{stmnt}. Suppose that instead a spurious semicolon is inserted in the
- middle of a valid @code{stmnt}. After the error recovery rule recovers
- from the first error, another syntax error will be found straightaway,
- since the text following the spurious semicolon is also an invalid
- @code{stmnt}.
- To prevent an outpouring of error messages, the parser will output no error
- message for another syntax error that happens shortly after the first; only
- after three consecutive input tokens have been successfully shifted will
- error messages resume.
- Note that rules which accept the @code{error} token may have actions, just
- as any other rules can.
- @findex yyerrok
- You can make error messages resume immediately by using the macro
- @code{yyerrok} in an action. If you do this in the error rule's action, no
- error messages will be suppressed. This macro requires no arguments;
- @samp{yyerrok;} is a valid C statement.
- @findex yyclearin
- The previous look-ahead token is reanalyzed immediately after an error. If
- this is unacceptable, then the macro @code{yyclearin} may be used to clear
- this token. Write the statement @samp{yyclearin;} in the error rule's
- action.
- For example, suppose that on a parse error, an error handling routine is
- called that advances the input stream to some point where parsing should
- once again commence. The next symbol returned by the lexical scanner is
- probably correct. The previous look-ahead token ought to be discarded
- with @samp{yyclearin;}.
- @vindex YYRECOVERING
- The macro @code{YYRECOVERING} stands for an expression that has the
- value 1 when the parser is recovering from a syntax error, and 0 the
- rest of the time. A value of 1 indicates that error messages are
- currently suppressed for new syntax errors.
- @node Context Dependency, Debugging, Error Recovery, Top
- @chapter Handling Context Dependencies
- The Bison paradigm is to parse tokens first, then group them into larger
- syntactic units. In many languages, the meaning of a token is affected by
- its context. Although this violates the Bison paradigm, certain techniques
- (known as @dfn{kludges}) may enable you to write Bison parsers for such
- languages.
- @menu
- * Semantic Tokens:: Token parsing can depend on the semantic context.
- * Lexical Tie-ins:: Token parsing can depend on the syntactic context.
- * Tie-in Recovery:: Lexical tie-ins have implications for how
- error recovery rules must be written.
- @end menu
- (Actually, ``kludge'' means any technique that gets its job done but is
- neither clean nor robust.)
- @node Semantic Tokens, Lexical Tie-ins, , Context Dependency
- @section Semantic Info in Token Types
- The C language has a context dependency: the way an identifier is used
- depends on what its current meaning is. For example, consider this:
- @example
- foo (x);
- @end example
- This looks like a function call statement, but if @code{foo} is a typedef
- name, then this is actually a declaration of @code{x}. How can a Bison
- parser for C decide how to parse this input?
- The method used in GNU C is to have two different token types,
- @code{IDENTIFIER} and @code{TYPENAME}. When @code{yylex} finds an
- identifier, it looks up the current declaration of the identifier in order
- to decide which token type to return: @code{TYPENAME} if the identifier is
- declared as a typedef, @code{IDENTIFIER} otherwise.
- The grammar rules can then express the context dependency by the choice of
- token type to recognize. @code{IDENTIFIER} is accepted as an expression,
- but @code{TYPENAME} is not. @code{TYPENAME} can start a declaration, but
- @code{IDENTIFIER} cannot. In contexts where the meaning of the identifier
- is @emph{not} significant, such as in declarations that can shadow a
- typedef name, either @code{TYPENAME} or @code{IDENTIFIER} is
- accepted---there is one rule for each of the two token types.
- This technique is simple to use if the decision of which kinds of
- identifiers to allow is made at a place close to where the identifier is
- parsed. But in C this is not always so: C allows a declaration to
- redeclare a typedef name provided an explicit type has been specified
- earlier:
- @example
- typedef int foo, bar, lose;
- static foo (bar); /* @r{redeclare @code{bar} as static variable} */
- static int foo (lose); /* @r{redeclare @code{foo} as function} */
- @end example
- Unfortunately, the name being declared is separated from the declaration
- construct itself by a complicated syntactic structure---the ``declarator''.
- As a result, the part of Bison parser for C needs to be duplicated, with
- all the nonterminal names changed: once for parsing a declaration in which
- a typedef name can be redefined, and once for parsing a declaration in
- which that can't be done. Here is a part of the duplication, with actions
- omitted for brevity:
- @example
- initdcl:
- declarator maybeasm '='
- init
- | declarator maybeasm
- ;
- notype_initdcl:
- notype_declarator maybeasm '='
- init
- | notype_declarator maybeasm
- ;
- @end example
- @noindent
- Here @code{initdcl} can redeclare a typedef name, but @code{notype_initdcl}
- cannot. The distinction between @code{declarator} and
- @code{notype_declarator} is the same sort of thing.
- There is some similarity between this technique and a lexical tie-in
- (described next), in that information which alters the lexical analysis is
- changed during parsing by other parts of the program. The difference is
- here the information is global, and is used for other purposes in the
- program. A true lexical tie-in has a special-purpose flag controlled by
- the syntactic context.
- @node Lexical Tie-ins, Tie-in Recovery, Semantic Tokens, Context Dependency
- @section Lexical Tie-ins
- @cindex lexical tie-in
- One way to handle context-dependency is the @dfn{lexical tie-in}: a flag
- which is set by Bison actions, whose purpose is to alter the way tokens are
- parsed.
- For example, suppose we have a language vaguely like C, but with a special
- construct @samp{hex (@var{hex-expr})}. After the keyword @code{hex} comes
- an expression in parentheses in which all integers are hexadecimal. In
- particular, the token @samp{a1b} must be treated as an integer rather than
- as an identifier if it appears in that context. Here is how you can do it:
- @example
- @group
- %@{
- int hexflag;
- %@}
- %%
- @dots{}
- @end group
- @group
- expr: IDENTIFIER
- | constant
- | HEX '('
- @{ hexflag = 1; @}
- expr ')'
- @{ hexflag = 0;
- $$ = $4; @}
- | expr '+' expr
- @{ $$ = make_sum ($1, $3); @}
- @dots{}
- ;
- @end group
- @group
- constant:
- INTEGER
- | STRING
- ;
- @end group
- @end example
- @noindent
- Here we assume that @code{yylex} looks at the value of @code{hexflag}; when
- it is nonzero, all integers are parsed in hexadecimal, and tokens starting
- with letters are parsed as integers if possible.
- The declaration of @code{hexflag} shown in the C declarations section of
- the parser file is needed to make it accessible to the actions
- (@pxref{C Declarations, ,The C Declarations Section}). You must also write the code in @code{yylex}
- to obey the flag.
- @node Tie-in Recovery, , Lexical Tie-ins, Context Dependency
- @section Lexical Tie-ins and Error Recovery
- Lexical tie-ins make strict demands on any error recovery rules you have.
- @xref{Error Recovery}.
- The reason for this is that the purpose of an error recovery rule is to
- abort the parsing of one construct and resume in some larger construct.
- For example, in C-like languages, a typical error recovery rule is to skip
- tokens until the next semicolon, and then start a new statement, like this:
- @example
- stmt: expr ';'
- | IF '(' expr ')' stmt @{ @dots{} @}
- @dots{}
- error ';'
- @{ hexflag = 0; @}
- ;
- @end example
- If there is a syntax error in the middle of a @samp{hex (@var{expr})}
- construct, this error rule will apply, and then the action for the
- completed @samp{hex (@var{expr})} will never run. So @code{hexflag} would
- remain set for the entire rest of the input, or until the next @code{hex}
- keyword, causing identifiers to be misinterpreted as integers.
- To avoid this problem the error recovery rule itself clears @code{hexflag}.
- There may also be an error recovery rule that works within expressions.
- For example, there could be a rule which applies within parentheses
- and skips to the close-parenthesis:
- @example
- @group
- expr: @dots{}
- | '(' expr ')'
- @{ $$ = $2; @}
- | '(' error ')'
- @dots{}
- @end group
- @end example
- If this rule acts within the @code{hex} construct, it is not going to abort
- that construct (since it applies to an inner level of parentheses within
- the construct). Therefore, it should not clear the flag: the rest of
- the @code{hex} construct should be parsed with the flag still in effect.
- What if there is an error recovery rule which might abort out of the
- @code{hex} construct or might not, depending on circumstances? There is no
- way you can write the action to determine whether a @code{hex} construct is
- being aborted or not. So if you are using a lexical tie-in, you had better
- make sure your error recovery rules are not of this kind. Each rule must
- be such that you can be sure that it always will, or always won't, have to
- clear the flag.
- @node Debugging, Invocation, Context Dependency, Top
- @chapter Debugging Your Parser
- @findex YYDEBUG
- @findex yydebug
- @cindex debugging
- @cindex tracing the parser
- If a Bison grammar compiles properly but doesn't do what you want when it
- runs, the @code{yydebug} parser-trace feature can help you figure out why.
- To enable compilation of trace facilities, you must define the macro
- @code{YYDEBUG} when you compile the parser. You could use
- @samp{-DYYDEBUG=1} as a compiler option or you could put @samp{#define
- YYDEBUG 1} in the C declarations section of the grammar file
- (@pxref{C Declarations, ,The C Declarations Section}). Alternatively, use the @samp{-t} option when
- you run Bison (@pxref{Invocation, ,Invoking Bison}). We always define @code{YYDEBUG} so that
- debugging is always possible.
- The trace facility uses @code{stderr}, so you must add @w{@code{#include
- <stdio.h>}} to the C declarations section unless it is already there.
- Once you have compiled the program with trace facilities, the way to
- request a trace is to store a nonzero value in the variable @code{yydebug}.
- You can do this by making the C code do it (in @code{main}, perhaps), or
- you can alter the value with a C debugger.
- Each step taken by the parser when @code{yydebug} is nonzero produces a
- line or two of trace information, written on @code{stderr}. The trace
- messages tell you these things:
- @itemize @bullet
- @item
- Each time the parser calls @code{yylex}, what kind of token was read.
- @item
- Each time a token is shifted, the depth and complete contents of the
- state stack (@pxref{Parser States}).
- @item
- Each time a rule is reduced, which rule it is, and the complete contents
- of the state stack afterward.
- @end itemize
- To make sense of this information, it helps to refer to the listing file
- produced by the Bison @samp{-v} option (@pxref{Invocation, ,Invoking Bison}). This file
- shows the meaning of each state in terms of positions in various rules, and
- also what each state will do with each possible input token. As you read
- the successive trace messages, you can see that the parser is functioning
- according to its specification in the listing file. Eventually you will
- arrive at the place where something undesirable happens, and you will see
- which parts of the grammar are to blame.
- The parser file is a C program and you can use C debuggers on it, but it's
- not easy to interpret what it is doing. The parser function is a
- finite-state machine interpreter, and aside from the actions it executes
- the same code over and over. Only the values of variables show where in
- the grammar it is working.
- @findex YYPRINT
- The debugging information normally gives the token type of each token
- read, but not its semantic value. You can optionally define a macro
- named @code{YYPRINT} to provide a way to print the value. If you define
- @code{YYPRINT}, it should take three arguments. The parser will pass a
- standard I/O stream, the numeric code for the token type, and the token
- value (from @code{yylval}).
- Here is an example of @code{YYPRINT} suitable for the multi-function
- calculator (@pxref{Mfcalc Decl, ,Declarations for @code{mfcalc}}):
- @smallexample
- #define YYPRINT(file, type, value) yyprint (file, type, value)
- static void
- yyprint (file, type, value)
- FILE *file;
- int type;
- YYSTYPE value;
- @{
- if (type == VAR)
- fprintf (file, " %s", value.tptr->name);
- else if (type == NUM)
- fprintf (file, " %d", value.val);
- @}
- @end smallexample
- @node Invocation, Table of Symbols, Debugging, Top
- @chapter Invoking Bison
- @cindex invoking Bison
- @cindex Bison invocation
- @cindex options for invoking Bison
- The usual way to invoke Bison is as follows:
- @example
- bison @var{infile}
- @end example
- Here @var{infile} is the grammar file name, which usually ends in
- @samp{.y}. The parser file's name is made by replacing the @samp{.y}
- with @samp{.tab.c}. Thus, the @samp{bison foo.y} filename yields
- @file{foo.tab.c}, and the @samp{bison hack/foo.y} filename yields
- @file{hack/foo.tab.c}.@refill
- @menu
- * Bison Options:: All the options described in detail,
- in alphabetical order by short options.
- * Option Cross Key:: Alphabetical list of long options.
- * VMS Invocation:: Bison command syntax on VMS.
- @end menu
- @node Bison Options, Option Cross Key, , Invocation
- @section Bison Options
- Bison supports both traditional single-letter options and mnemonic long
- option names. Long option names are indicated with @samp{--} instead of
- @samp{-}. Abbreviations for option names are allowed as long as they
- are unique. When a long option takes an argument, like
- @samp{--file-prefix}, connect the option name and the argument with
- @samp{=}.
- Here is a list of options that can be used with Bison, alphabetized by
- short option. It is followed by a cross key alphabetized by long
- option.
- @table @samp
- @item -b @var{file-prefix}
- @itemx --file-prefix=@var{prefix}
- Specify a prefix to use for all Bison output file names. The names are
- chosen as if the input file were named @file{@var{prefix}.c}.
- @item -d
- @itemx --defines
- Write an extra output file containing macro definitions for the token
- type names defined in the grammar and the semantic value type
- @code{YYSTYPE}, as well as a few @code{extern} variable declarations.
- If the parser output file is named @file{@var{name}.c} then this file
- is named @file{@var{name}.h}.@refill
- This output file is essential if you wish to put the definition of
- @code{yylex} in a separate source file, because @code{yylex} needs to
- be able to refer to token type codes and the variable
- @code{yylval}. @xref{Token Values, ,Semantic Values of Tokens}.@refill
- @item -k
- @itemx --token-table
- This switch causes the .tab.c output to include a list of token names in
- order by their token numbers; this is defined in the array @code{yytname}.
- The first three elements are @code{"$"}, @code{"error"}, and
- @code{"$illegal"}; entries for single- and multiple-character symbols
- include their quotes: @code{"\'+\'"} and @code{"\"<=\""}. Also generated
- are #defines for @code{YYNTOKENS, YYNNTS, YYNRULES}, and @code{YYNSTATES}
- giving, respectively, one more than the highest token number, the number
- of non-terminal symbols, the number of grammar rules, and the number of states.
- @item -l
- @itemx --no-lines
- Don't put any @code{#line} preprocessor commands in the parser file.
- Ordinarily Bison puts them in the parser file so that the C compiler
- and debuggers will associate errors with your source file, the
- grammar file. This option causes them to associate errors with the
- parser file, treating it an independent source file in its own right.
- @item -n
- @itemx --no-parser
- Do not generate the parser code into the output; generate only
- declarations. The generated @samp{y.tab.c} file will have only
- constant declarations. In addition, a @var{filename}.act file is
- generated containing a switch statement body containing all the
- translated actions. (The @var{filename} is taken from the input file or
- set in accordance with the -o switch.) The declarations in the
- @var{filename}@samp{.tab.c} file will all be static or #defined.
- Some symbols only appear if appropriate options are selected.
- These are #defined: YYLTYPE, YYFINAL, YYFLAG, YYNTBASE,
- YYTRANSLATE, YYLAST, YYNTOKENS, YYNNTS, YYNRULES, YYNSTATES, YYMAXUTOK.
- These are declared and given values: yyltype, yytranslate, yyprhs, yyrhs,
- yystos, yyrline, yytname, yytoknum, yyr1, yyr2, yydefact, yydefgoto,
- yypact, yypgoto, yytable, and yycheck. See the source file output.c
- for definitions of these variables.
- @item -o @var{outfile}
- @itemx --output-file=@var{outfile}
- Specify the name @var{outfile} for the parser file.
- The other output files' names are constructed from @var{outfile}
- as described under the @samp{-v} and @samp{-d} options.
- @item -p @var{prefix}
- @itemx --name-prefix=@var{prefix}
- Rename the external symbols used in the parser so that they start with
- @var{prefix} instead of @samp{yy}. The precise list of symbols renamed
- is @code{yyparse}, @code{yylex}, @code{yyerror}, @code{yylval},
- @code{yychar} and @code{yydebug}.
- For example, if you use @samp{-p c}, the names become @code{cparse},
- @code{clex}, and so on.
- @xref{Multiple Parsers, ,Multiple Parsers in the Same Program}.
- @item -r
- @itemx --raw
- In the output to @file{@var{name}.h} the tokens are usually defined with
- Yacc compatible token numbers. If this switch is specified, the Bison
- assigned numbers are output instead. (Yacc numbers start at 257 except
- for single character tokens; Bison assigns token numbers sequentially
- for all tokens starting at 3.)
- @item -t
- @itemx --debug
- Output a definition of the macro @code{YYDEBUG} into the parser file,
- so that the debugging facilities are compiled. @xref{Debugging, ,Debugging Your Parser}.
- @item -v
- @itemx --verbose
- Write an extra output file containing verbose descriptions of the
- parser states and what is done for each type of look-ahead token in
- that state.
- This file also describes all the conflicts, both those resolved by
- operator precedence and the unresolved ones.
- The file's name is made by removing @samp{.tab.c} or @samp{.c} from
- the parser output file name, and adding @samp{.output} instead.@refill
- Therefore, if the input file is @file{foo.y}, then the parser file is
- called @file{foo.tab.c} by default. As a consequence, the verbose
- output file is called @file{foo.output}.@refill
- @item -V
- @itemx --version
- Print the version number of Bison and exit.
- @item -h
- @itemx --help
- Print a summary of the command-line options to Bison and exit.
- @need 1750
- @item -y
- @itemx --yacc
- @itemx --fixed-output-files
- Equivalent to @samp{-o y.tab.c}; the parser output file is called
- @file{y.tab.c}, and the other outputs are called @file{y.output} and
- @file{y.tab.h}. The purpose of this switch is to imitate Yacc's output
- file name conventions. Thus, the following shell script can substitute
- for Yacc:@refill
- @example
- bison -y $*
- @end example
- @end table
- @node Option Cross Key, VMS Invocation, Bison Options, Invocation
- @section Option Cross Key
- Here is a list of options, alphabetized by long option, to help you find
- the corresponding short option.
- @tex
- \def\leaderfill{\leaders\hbox to 1em{\hss.\hss}\hfill}
- {\tt
- \line{ --debug \leaderfill -t}
- \line{ --defines \leaderfill -d}
- \line{ --file-prefix \leaderfill -b}
- \line{ --fixed-output-files \leaderfill -y}
- \line{ --help \leaderfill -h}
- \line{ --name-prefix \leaderfill -p}
- \line{ --no-lines \leaderfill -l}
- \line{ --no-parser \leaderfill -n}
- \line{ --output-file \leaderfill -o}
- \line{ --raw \leaderfill -r}
- \line{ --token-table \leaderfill -k}
- \line{ --verbose \leaderfill -v}
- \line{ --version \leaderfill -V}
- \line{ --yacc \leaderfill -y}
- }
- @end tex
- @ifinfo
- @example
- --debug -t
- --defines -d
- --file-prefix=@var{prefix} -b @var{file-prefix}
- --fixed-output-files --yacc -y
- --help -h
- --name-prefix -p
- --no-lines -l
- --no-parser -n
- --output-file=@var{outfile} -o @var{outfile}
- --raw -r
- --token-table -k
- --verbose -v
- --version -V
- @end example
- @end ifinfo
- @node VMS Invocation, , Option Cross Key, Invocation
- @section Invoking Bison under VMS
- @cindex invoking Bison under VMS
- @cindex VMS
- The command line syntax for Bison on VMS is a variant of the usual
- Bison command syntax---adapted to fit VMS conventions.
- To find the VMS equivalent for any Bison option, start with the long
- option, and substitute a @samp{/} for the leading @samp{--}, and
- substitute a @samp{_} for each @samp{-} in the name of the long option.
- For example, the following invocation under VMS:
- @example
- bison /debug/name_prefix=bar foo.y
- @end example
- @noindent
- is equivalent to the following command under POSIX.
- @example
- bison --debug --name-prefix=bar foo.y
- @end example
- The VMS file system does not permit filenames such as
- @file{foo.tab.c}. In the above example, the output file
- would instead be named @file{foo_tab.c}.
- @node Table of Symbols, Parser Symbols, Invocation, Top
- @appendix Bison Symbols
- @cindex Bison symbols, table of
- @cindex symbols in Bison, table of
- @table @code
- @item error
- A token name reserved for error recovery. This token may be used in
- grammar rules so as to allow the Bison parser to recognize an error in
- the grammar without halting the process. In effect, a sentence
- containing an error may be recognized as valid. On a parse error, the
- token @code{error} becomes the current look-ahead token. Actions
- corresponding to @code{error} are then executed, and the look-ahead
- token is reset to the token that originally caused the violation.
- @xref{Error Recovery}.
- @item YYABORT
- Macro to pretend that an unrecoverable syntax error has occurred, by
- making @code{yyparse} return 1 immediately. The error reporting
- function @code{yyerror} is not called. @xref{Parser Function, ,The Parser Function @code{yyparse}}.
- @item YYACCEPT
- Macro to pretend that a complete utterance of the language has been
- read, by making @code{yyparse} return 0 immediately.
- @xref{Parser Function, ,The Parser Function @code{yyparse}}.
- @item YYBACKUP
- Macro to discard a value from the parser stack and fake a look-ahead
- token. @xref{Action Features, ,Special Features for Use in Actions}.
- @item YYERROR
- Macro to pretend that a syntax error has just been detected: call
- @code{yyerror} and then perform normal error recovery if possible
- (@pxref{Error Recovery}), or (if recovery is impossible) make
- @code{yyparse} return 1. @xref{Error Recovery}.
- @item YYERROR_VERBOSE
- Macro that you define with @code{#define} in the Bison declarations
- section to request verbose, specific error message strings when
- @code{yyerror} is called.
- @item YYINITDEPTH
- Macro for specifying the initial size of the parser stack.
- @xref{Stack Overflow}.
- @item YYLTYPE
- Macro for the data type of @code{yylloc}; a structure with four
- members. @xref{Token Positions, ,Textual Positions of Tokens}.
- @item yyltype
- Default value for YYLTYPE.
- @item YYMAXDEPTH
- Macro for specifying the maximum size of the parser stack.
- @xref{Stack Overflow}.
- @item YYRECOVERING
- Macro whose value indicates whether the parser is recovering from a
- syntax error. @xref{Action Features, ,Special Features for Use in Actions}.
- @item YYSTYPE
- Macro for the data type of semantic values; @code{int} by default.
- @xref{Value Type, ,Data Types of Semantic Values}.
- @item yychar
- External integer variable that contains the integer value of the
- current look-ahead token. (In a pure parser, it is a local variable
- within @code{yyparse}.) Error-recovery rule actions may examine this
- variable. @xref{Action Features, ,Special Features for Use in Actions}.
- @item yyclearin
- Macro used in error-recovery rule actions. It clears the previous
- look-ahead token. @xref{Error Recovery}.
- @item yydebug
- External integer variable set to zero by default. If @code{yydebug}
- is given a nonzero value, the parser will output information on input
- symbols and parser action. @xref{Debugging, ,Debugging Your Parser}.
- @item yyerrok
- Macro to cause parser to recover immediately to its normal mode
- after a parse error. @xref{Error Recovery}.
- @item yyerror
- User-supplied function to be called by @code{yyparse} on error. The
- function receives one argument, a pointer to a character string
- containing an error message. @xref{Error Reporting, ,The Error Reporting Function @code{yyerror}}.
- @item yylex
- User-supplied lexical analyzer function, called with no arguments
- to get the next token. @xref{Lexical, ,The Lexical Analyzer Function @code{yylex}}.
- @item yylval
- External variable in which @code{yylex} should place the semantic
- value associated with a token. (In a pure parser, it is a local
- variable within @code{yyparse}, and its address is passed to
- @code{yylex}.) @xref{Token Values, ,Semantic Values of Tokens}.
- @item yylloc
- External variable in which @code{yylex} should place the line and
- column numbers associated with a token. (In a pure parser, it is a
- local variable within @code{yyparse}, and its address is passed to
- @code{yylex}.) You can ignore this variable if you don't use the
- @samp{@@} feature in the grammar actions. @xref{Token Positions, ,Textual Positions of Tokens}.
- @item yynerrs
- Global variable which Bison increments each time there is a parse
- error. (In a pure parser, it is a local variable within
- @code{yyparse}.) @xref{Error Reporting, ,The Error Reporting Function @code{yyerror}}.
- @item yyparse
- The parser function produced by Bison; call this function to start
- parsing. @xref{Parser Function, ,The Parser Function @code{yyparse}}.
- @item %left
- Bison declaration to assign left associativity to token(s).
- @xref{Precedence Decl, ,Operator Precedence}.
- @item %nonassoc
- Bison declaration to assign nonassociativity to token(s).
- @xref{Precedence Decl, ,Operator Precedence}.
- @item %prec
- Bison declaration to assign a precedence to a specific rule.
- @xref{Contextual Precedence, ,Context-Dependent Precedence}.
- @item %pure_parser
- Bison declaration to request a pure (reentrant) parser.
- @xref{Pure Decl, ,A Pure (Reentrant) Parser}.
- @item %right
- Bison declaration to assign right associativity to token(s).
- @xref{Precedence Decl, ,Operator Precedence}.
- @item %start
- Bison declaration to specify the start symbol. @xref{Start Decl, ,The Start-Symbol}.
- @item %token
- Bison declaration to declare token(s) without specifying precedence.
- @xref{Token Decl, ,Token Type Names}.
- @item %type
- Bison declaration to declare nonterminals. @xref{Type Decl, ,Nonterminal Symbols}.
- @item %union
- Bison declaration to specify several possible data types for semantic
- values. @xref{Union Decl, ,The Collection of Value Types}.
- @end table
- These are the punctuation and delimiters used in Bison input:
- @table @samp
- @item %%
- Delimiter used to separate the grammar rule section from the
- Bison declarations section or the additional C code section.
- @xref{Grammar Layout, ,The Overall Layout of a Bison Grammar}.
- @item %@{ %@}
- All code listed between @samp{%@{} and @samp{%@}} is copied directly
- to the output file uninterpreted. Such code forms the ``C
- declarations'' section of the input file. @xref{Grammar Outline, ,Outline of a Bison Grammar}.
- @item /*@dots{}*/
- Comment delimiters, as in C.
- @item :
- Separates a rule's result from its components. @xref{Rules, ,Syntax of Grammar Rules}.
- @item ;
- Terminates a rule. @xref{Rules, ,Syntax of Grammar Rules}.
- @item |
- Separates alternate rules for the same result nonterminal.
- @xref{Rules, ,Syntax of Grammar Rules}.
- @end table
- @node Parser Symbols, Glossary, Table of Symbols, Top
- @section Parser Symbols
- @cindex symbols, parser
- Each symbol (either token or variable) receives a symbol number.
- Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are for
- variables. Symbol number zero is the end-of-input token. This token
- is counted in ntokens.
- The rules receive rule numbers 1 to nrules in the order they are written.
- Actions and guards are accessed via the rule number.
- The rules themselves are described by three arrays: rrhs, rlhs and
- ritem. rlhs[R] is the symbol number of the left hand side of rule R.
- The right hand side is stored as symbol numbers in a portion of
- ritem. rrhs[R] contains the index in ritem of the beginning of the
- portion for rule R.
- If rlhs[R] is -1, the rule has been thrown out by reduce.c
- and should be ignored.
- The length of the portion is one greater
- than the number of symbols in the rule's right hand side.
- The last element in the portion contains minus R, which
- identifies it as the end of a portion and says which rule it is for.
- The portions of ritem come in order of increasing rule number and are
- followed by an element which is zero to mark the end.
- These symbols are #defined:
- YYFINAL = the state number of the termination state.
- YYFLAG = most negative short int. Used to flag ??
- YYNTBASE = ntokens
- YYTRANSLATE = macro to translate token number from yacc to bison
- YYLAST = index of highest entry in yytable and yycheck
- YYNTOKENS = number of terminal symbols (same as YYNTBASE)
- YYNNTS = number of nonterminals
- YYNRULES = number of rules in the grammar
- YYNSTATES = number of states in parser
- YYMAXUTOK = highest user token number
- YYTRANSLATE(y) == if y <= YYMAXUTOK then yytranslate[y] else YYNTOKENS+YYNNTS
- The parser tables consist of the following tables.
- Starred ones needed only for the semantic parser.
- Double starred are output only if switches are set.
- yytranslate = vector mapping yylex's token numbers into bison's token numbers.
- The token numbers differ because (a) yacc/yylex utilize the first
- 256 token numbers to refer to individual characters and
- (b) the yacc grammar allows explicit assignment of token numbers.
- Bison token numbers are assigned sequentially from 0 to YYNTOKENS-1.
- ** yytname = vector of string-names indexed by bison token number
- (range 0...YYNTOKENS+YYNNTS, the last entry is "")
- user token names are in (3...YYNTOKENS-1) and may be
- identifier - all letters
- an identifier represents either
- a reserved word or a token class
- character literal - 'x' (the apostrophes appear in the string)
- string literal - "xxx" (the quotes appear in the string)
- non-terminal symbol names are in (YYNTOKENS...YYNTOKENS+YYNNTS-1)
- ** yytoknum = vector of yacc/yylex token numbers corresponding to entries
- in yytname (range 0...YYNTOKENS+YYNNTS, the last entry is 0)
- yyrline = vector of line-numbers of all rules. For yydebug printouts.
- (range 0...YYNRULES, the first entry is 0)
- ** yyrhs = vector of items of all rules.
- This is exactly what ritems contains. For yydebug and for semantic
- parser.
- ** yyprhs[r] = index in yyrhs of first item for rule r.
- (range 0...YYNRULES, the first entry is 0)
- yyr1[r] = symbol number of symbol that rule r derives.
- (range 0...YYNRULES, the first entry is 0)
- yyr2[r] = number of symbols composing right hand side of rule r.
- (range 0...YYNRULES, the first entry is 0)
- * yystos[s] = the symbol number of the symbol that leads to state s.
- (range 0...NSTATES-1)
- yydefact[s] = default rule to reduce with in state s,
- when yytable doesn't specify something else to do.
- Zero means the default is an error.
- (range 0...NSTATES-1)
- yydefgoto[i] = default state to go to after a reduction of a rule that
- generates variable ntokens + i, except when yytable
- specifies something else to do.
- yypact[s] = index in yytable of the portion describing state s.
- (range 0...YYNSTATES-1).
- The lookahead token's type is used to index that portion
- to find out what to do.
- If the value in yytable is positive,
- we shift the token and go to that state.
- If the value is negative, it is minus a rule number to reduce by.
- If the value is zero, the default action from yydefact[s] is used.
- yypgoto[i] = the index in yytable of the portion describing
- what to do after reducing a rule that derives variable i + ntokens.
- This portion is indexed by the parser state number
- as of before the text for this nonterminal was read.
- The value from yytable is the state to go to.
- yytable = a vector filled with portions for different uses,
- found via yypact and yypgoto. (range 0...YYLAST)
-
- yycheck = a vector indexed in parallel with yytable.
- It indicates, in a roundabout way, the bounds of the
- portion you are trying to examine.
- Suppose that the portion of yytable starts at index p
- and the index to be examined within the portion is i.
- Then if yycheck[p+i] != i, i is outside the bounds
- of what is actually allocated, and the default
- (from yydefact or yydefgoto) should be used.
- Otherwise, yytable[p+i] should be used. (range 0...YYLAST)
- @node Glossary, Index, Parser Symbols, Top
- @appendix Glossary
- @cindex glossary
- @table @asis
- @item Backus-Naur Form (BNF)
- Formal method of specifying context-free grammars. BNF was first used
- in the @cite{ALGOL-60} report, 1963. @xref{Language and Grammar, ,Languages and Context-Free Grammars}.
- @item Context-free grammars
- Grammars specified as rules that can be applied regardless of context.
- Thus, if there is a rule which says that an integer can be used as an
- expression, integers are allowed @emph{anywhere} an expression is
- permitted. @xref{Language and Grammar, ,Languages and Context-Free Grammars}.
- @item Dynamic allocation
- Allocation of memory that occurs during execution, rather than at
- compile time or on entry to a function.
- @item Empty string
- Analogous to the empty set in set theory, the empty string is a
- character string of length zero.
- @item Finite-state stack machine
- A ``machine'' that has discrete states in which it is said to exist at
- each instant in time. As input to the machine is processed, the
- machine moves from state to state as specified by the logic of the
- machine. In the case of the parser, the input is the language being
- parsed, and the states correspond to various stages in the grammar
- rules. @xref{Algorithm, ,The Bison Parser Algorithm }.
- @item Grouping
- A language construct that is (in general) grammatically divisible;
- for example, `expression' or `declaration' in C.
- @xref{Language and Grammar, ,Languages and Context-Free Grammars}.
- @item Infix operator
- An arithmetic operator that is placed between the operands on which it
- performs some operation.
- @item Input stream
- A continuous flow of data between devices or programs.
- @item Language construct
- One of the typical usage schemas of the language. For example, one of
- the constructs of the C language is the @code{if} statement.
- @xref{Language and Grammar, ,Languages and Context-Free Grammars}.
- @item Left associativity
- Operators having left associativity are analyzed from left to right:
- @samp{a+b+c} first computes @samp{a+b} and then combines with
- @samp{c}. @xref{Precedence, ,Operator Precedence}.
- @item Left recursion
- A rule whose result symbol is also its first component symbol;
- for example, @samp{expseq1 : expseq1 ',' exp;}. @xref{Recursion, ,Recursive Rules}.
- @item Left-to-right parsing
- Parsing a sentence of a language by analyzing it token by token from
- left to right. @xref{Algorithm, ,The Bison Parser Algorithm }.
- @item Lexical analyzer (scanner)
- A function that reads an input stream and returns tokens one by one.
- @xref{Lexical, ,The Lexical Analyzer Function @code{yylex}}.
- @item Lexical tie-in
- A flag, set by actions in the grammar rules, which alters the way
- tokens are parsed. @xref{Lexical Tie-ins}.
- @item Look-ahead token
- A token already read but not yet shifted. @xref{Look-Ahead, ,Look-Ahead Tokens}.
- @item LALR(1)
- The class of context-free grammars that Bison (like most other parser
- generators) can handle; a subset of LR(1). @xref{Mystery Conflicts, ,
- Mysterious Reduce/Reduce Conflicts}.
- @item LR(1)
- The class of context-free grammars in which at most one token of
- look-ahead is needed to disambiguate the parsing of any piece of input.
- @item Nonterminal symbol
- A grammar symbol standing for a grammatical construct that can
- be expressed through rules in terms of smaller constructs; in other
- words, a construct that is not a token. @xref{Symbols, ,Symbols - Terminal and Nonterminal}.
- @item Parse error
- An error encountered during parsing of an input stream due to invalid
- syntax. @xref{Error Recovery}.
- @item Parser
- A function that recognizes valid sentences of a language by analyzing
- the syntax structure of a set of tokens passed to it from a lexical
- analyzer.
- @item Postfix operator
- An arithmetic operator that is placed after the operands upon which it
- performs some operation.
- @item Reduction
- Replacing a string of nonterminals and/or terminals with a single
- nonterminal, according to a grammar rule. @xref{Algorithm, ,The Bison Parser Algorithm }.
- @item Reentrant
- A reentrant subprogram is a subprogram which can be in invoked any
- number of times in parallel, without interference between the various
- invocations. @xref{Pure Decl, ,A Pure (Reentrant) Parser}.
- @item Reverse polish notation
- A language in which all operators are postfix operators.
- @item Right recursion
- A rule whose result symbol is also its last component symbol;
- for example, @samp{expseq1: exp ',' expseq1;}. @xref{Recursion, ,Recursive Rules}.
- @item Semantics
- In computer languages, the semantics are specified by the actions
- taken for each instance of the language, i.e., the meaning of
- each statement. @xref{Semantics, ,Defining Language Semantics}.
- @item Shift
- A parser is said to shift when it makes the choice of analyzing
- further input from the stream rather than reducing immediately some
- already-recognized rule. @xref{Algorithm, ,The Bison Parser Algorithm }.
- @item Single-character literal
- A single character that is recognized and interpreted as is.
- @xref{Grammar in Bison, ,From Formal Rules to Bison Input}.
- @item Start symbol
- The nonterminal symbol that stands for a complete valid utterance in
- the language being parsed. The start symbol is usually listed as the
- first nonterminal symbol in a language specification.
- @xref{Start Decl, ,The Start-Symbol}.
- @item Symbol table
- A data structure where symbol names and associated data are stored
- during parsing to allow for recognition and use of existing
- information in repeated uses of a symbol. @xref{Multi-function Calc}.
- @item Token
- A basic, grammatically indivisible unit of a language. The symbol
- that describes a token in the grammar is a terminal symbol.
- The input of the Bison parser is a stream of tokens which comes from
- the lexical analyzer. @xref{Symbols, ,Symbols - Terminal and Nonterminal}.
- @item Terminal symbol
- A grammar symbol that has no rules in the grammar and therefore
- is grammatically indivisible. The piece of text it represents
- is a token. @xref{Language and Grammar, ,Languages and Context-Free Grammars}.
- @end table
- @node Index, , Glossary, Top
- @unnumbered Index
- @printindex cp
- @contents
- @bye
- @c old menu
- * Introduction::
- * Conditions::
- * Copying:: The GNU General Public License says
- how you can copy and share Bison
- Tutorial sections:
- * Concepts:: Basic concepts for understanding Bison.
- * Examples:: Three simple explained examples of using Bison.
- Reference sections:
- * Grammar File:: Writing Bison declarations and rules.
- * Interface:: C-language interface to the parser function @code{yyparse}.
- * Algorithm:: How the Bison parser works at run-time.
- * Error Recovery:: Writing rules for error recovery.
- * Context Dependency::What to do if your language syntax is too
- messy for Bison to handle straightforwardly.
- * Debugging:: Debugging Bison parsers that parse wrong.
- * Invocation:: How to run Bison (to produce the parser source file).
- * Table of Symbols:: All the keywords of the Bison language are explained.
- * Glossary:: Basic concepts are explained.
- * Index:: Cross-references to the text.
|