1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239 |
- \input texinfo @c -*-texinfo-*-
- @comment %**start of header
- @setfilename bison.info
- @settitle Bison 1.24
- @setchapternewpage odd
- @iftex
- @finalout
- @end iftex
- @c SMALL BOOK version
- @c This edition has been formatted so that you can format and print it in
- @c the smallbook format.
- @c @smallbook
- @c next time, consider using @set for edition number, etc...
- @c Set following if you have the new `shorttitlepage' command
- @c @clear shorttitlepage-enabled
- @c @set shorttitlepage-enabled
- @c ISPELL CHECK: done, 14 Jan 1993 --bob
- @c Check COPYRIGHT dates. should be updated in the titlepage, ifinfo
- @c titlepage; should NOT be changed in the GPL. --mew
- @iftex
- @syncodeindex fn cp
- @syncodeindex vr cp
- @syncodeindex tp cp
- @end iftex
- @ifinfo
- @synindex fn cp
- @synindex vr cp
- @synindex tp cp
- @end ifinfo
- @comment %**end of header
- @ifinfo
- This file documents the Bison parser generator.
- Copyright (C) 1988, 89, 90, 91, 92, 93, 1995 Free Software Foundation, Inc.
- Permission is granted to make and distribute verbatim copies of
- this manual provided the copyright notice and this permission notice
- are preserved on all copies.
- @ignore
- Permission is granted to process this file through Tex and print the
- results, provided the printed document carries copying permission
- notice identical to this one except for the removal of this paragraph
- (this paragraph not being relevant to the printed manual).
- @end ignore
- Permission is granted to copy and distribute modified versions of this
- manual under the conditions for verbatim copying, provided also that the
- sections entitled ``GNU General Public License'' and ``Conditions for
- Using Bison'' are included exactly as in the original, and provided that
- the entire resulting derived work is distributed under the terms of a
- permission notice identical to this one.
- Permission is granted to copy and distribute translations of this manual
- into another language, under the above conditions for modified versions,
- except that the sections entitled ``GNU General Public License'',
- ``Conditions for Using Bison'' and this permission notice may be
- included in translations approved by the Free Software Foundation
- instead of in the original English.
- @end ifinfo
- @ifset shorttitlepage-enabled
- @shorttitlepage Bison
- @end ifset
- @titlepage
- @title Bison
- @subtitle The YACC-compatible Parser Generator
- @subtitle May 1995, Bison Version 1.24
- @author by Charles Donnelly and Richard Stallman
- @page
- @vskip 0pt plus 1filll
- Copyright @copyright{} 1988, 89, 90, 91, 92, 93, 1995 Free Software
- Foundation
- @sp 2
- Published by the Free Software Foundation @*
- 675 Massachusetts Avenue @*
- Cambridge, MA 02139 USA @*
- Printed copies are available for $15 each.@*
- ISBN-1-882114-30-2
- Permission is granted to make and distribute verbatim copies of
- this manual provided the copyright notice and this permission notice
- are preserved on all copies.
- @ignore
- Permission is granted to process this file through TeX and print the
- results, provided the printed document carries copying permission
- notice identical to this one except for the removal of this paragraph
- (this paragraph not being relevant to the printed manual).
- @end ignore
- Permission is granted to copy and distribute modified versions of this
- manual under the conditions for verbatim copying, provided also that the
- sections entitled ``GNU General Public License'' and ``Conditions for
- Using Bison'' are included exactly as in the original, and provided that
- the entire resulting derived work is distributed under the terms of a
- permission notice identical to this one.
- Permission is granted to copy and distribute translations of this manual
- into another language, under the above conditions for modified versions,
- except that the sections entitled ``GNU General Public License'',
- ``Conditions for Using Bison'' and this permission notice may be
- included in translations approved by the Free Software Foundation
- instead of in the original English.
- @sp 2
- Cover art by Etienne Suvasa.
- @end titlepage
- @page
- @node Top, Introduction, (dir), (dir)
- @ifinfo
- This manual documents version 1.24 of Bison.
- @end ifinfo
- @menu
- * Introduction::
- * Conditions::
- * Copying:: The GNU General Public License says
- how you can copy and share Bison
- Tutorial sections:
- * Concepts:: Basic concepts for understanding Bison.
- * Examples:: Three simple explained examples of using Bison.
- Reference sections:
- * Grammar File:: Writing Bison declarations and rules.
- * Interface:: C-language interface to the parser function @code{yyparse}.
- * Algorithm:: How the Bison parser works at run-time.
- * Error Recovery:: Writing rules for error recovery.
- * Context Dependency:: What to do if your language syntax is too
- messy for Bison to handle straightforwardly.
- * Debugging:: Debugging Bison parsers that parse wrong.
- * Invocation:: How to run Bison (to produce the parser source file).
- * Table of Symbols:: All the keywords of the Bison language are explained.
- * Glossary:: Basic concepts are explained.
- * Index:: Cross-references to the text.
- --- The Detailed Node Listing ---
- The Concepts of Bison
- * Language and Grammar:: Languages and context-free grammars,
- as mathematical ideas.
- * Grammar in Bison:: How we represent grammars for Bison's sake.
- * Semantic Values:: Each token or syntactic grouping can have
- a semantic value (the value of an integer,
- the name of an identifier, etc.).
- * Semantic Actions:: Each rule can have an action containing C code.
- * Bison Parser:: What are Bison's input and output,
- how is the output used?
- * Stages:: Stages in writing and running Bison grammars.
- * Grammar Layout:: Overall structure of a Bison grammar file.
- Examples
- * RPN Calc:: Reverse polish notation calculator;
- a first example with no operator precedence.
- * Infix Calc:: Infix (algebraic) notation calculator.
- Operator precedence is introduced.
- * Simple Error Recovery:: Continuing after syntax errors.
- * Multi-function Calc:: Calculator with memory and trig functions.
- It uses multiple data-types for semantic values.
- * Exercises:: Ideas for improving the multi-function calculator.
- Reverse Polish Notation Calculator
- * Decls: Rpcalc Decls. Bison and C declarations for rpcalc.
- * Rules: Rpcalc Rules. Grammar Rules for rpcalc, with explanation.
- * Lexer: Rpcalc Lexer. The lexical analyzer.
- * Main: Rpcalc Main. The controlling function.
- * Error: Rpcalc Error. The error reporting function.
- * Gen: Rpcalc Gen. Running Bison on the grammar file.
- * Comp: Rpcalc Compile. Run the C compiler on the output code.
- Grammar Rules for @code{rpcalc}
- * Rpcalc Input::
- * Rpcalc Line::
- * Rpcalc Expr::
- Multi-Function Calculator: @code{mfcalc}
- * Decl: Mfcalc Decl. Bison declarations for multi-function calculator.
- * Rules: Mfcalc Rules. Grammar rules for the calculator.
- * Symtab: Mfcalc Symtab. Symbol table management subroutines.
- Bison Grammar Files
- * Grammar Outline:: Overall layout of the grammar file.
- * Symbols:: Terminal and nonterminal symbols.
- * Rules:: How to write grammar rules.
- * Recursion:: Writing recursive rules.
- * Semantics:: Semantic values and actions.
- * Declarations:: All kinds of Bison declarations are described here.
- * Multiple Parsers:: Putting more than one Bison parser in one program.
- Outline of a Bison Grammar
- * C Declarations:: Syntax and usage of the C declarations section.
- * Bison Declarations:: Syntax and usage of the Bison declarations section.
- * Grammar Rules:: Syntax and usage of the grammar rules section.
- * C Code:: Syntax and usage of the additional C code section.
- Defining Language Semantics
- * Value Type:: Specifying one data type for all semantic values.
- * Multiple Types:: Specifying several alternative data types.
- * Actions:: An action is the semantic definition of a grammar rule.
- * Action Types:: Specifying data types for actions to operate on.
- * Mid-Rule Actions:: Most actions go at the end of a rule.
- This says when, why and how to use the exceptional
- action in the middle of a rule.
- Bison Declarations
- * Token Decl:: Declaring terminal symbols.
- * Precedence Decl:: Declaring terminals with precedence and associativity.
- * Union Decl:: Declaring the set of all semantic value types.
- * Type Decl:: Declaring the choice of type for a nonterminal symbol.
- * Expect Decl:: Suppressing warnings about shift/reduce conflicts.
- * Start Decl:: Specifying the start symbol.
- * Pure Decl:: Requesting a reentrant parser.
- * Decl Summary:: Table of all Bison declarations.
- Parser C-Language Interface
- * Parser Function:: How to call @code{yyparse} and what it returns.
- * Lexical:: You must supply a function @code{yylex}
- which reads tokens.
- * Error Reporting:: You must supply a function @code{yyerror}.
- * Action Features:: Special features for use in actions.
- The Lexical Analyzer Function @code{yylex}
- * Calling Convention:: How @code{yyparse} calls @code{yylex}.
- * Token Values:: How @code{yylex} must return the semantic value
- of the token it has read.
- * Token Positions:: How @code{yylex} must return the text position
- (line number, etc.) of the token, if the
- actions want that.
- * Pure Calling:: How the calling convention differs
- in a pure parser (@pxref{Pure Decl, ,A Pure (Reentrant) Parser}).
- The Bison Parser Algorithm
- * Look-Ahead:: Parser looks one token ahead when deciding what to do.
- * Shift/Reduce:: Conflicts: when either shifting or reduction is valid.
- * Precedence:: Operator precedence works by resolving conflicts.
- * Contextual Precedence:: When an operator's precedence depends on context.
- * Parser States:: The parser is a finite-state-machine with stack.
- * Reduce/Reduce:: When two rules are applicable in the same situation.
- * Mystery Conflicts:: Reduce/reduce conflicts that look unjustified.
- * Stack Overflow:: What happens when stack gets full. How to avoid it.
- Operator Precedence
- * Why Precedence:: An example showing why precedence is needed.
- * Using Precedence:: How to specify precedence in Bison grammars.
- * Precedence Examples:: How these features are used in the previous example.
- * How Precedence:: How they work.
- Handling Context Dependencies
- * Semantic Tokens:: Token parsing can depend on the semantic context.
- * Lexical Tie-ins:: Token parsing can depend on the syntactic context.
- * Tie-in Recovery:: Lexical tie-ins have implications for how
- error recovery rules must be written.
- Invoking Bison
- * Bison Options:: All the options described in detail,
- in alphabetical order by short options.
- * Option Cross Key:: Alphabetical list of long options.
- * VMS Invocation:: Bison command syntax on VMS.
- @end menu
- @node Introduction, Conditions, Top, Top
- @unnumbered Introduction
- @cindex introduction
- @dfn{Bison} is a general-purpose parser generator that converts a
- grammar description for an LALR(1) context-free grammar into a C
- program to parse that grammar. Once you are proficient with Bison,
- you may use it to develop a wide range of language parsers, from those
- used in simple desk calculators to complex programming languages.
- Bison is upward compatible with Yacc: all properly-written Yacc grammars
- ought to work with Bison with no change. Anyone familiar with Yacc
- should be able to use Bison with little trouble. You need to be fluent in
- C programming in order to use Bison or to understand this manual.
- We begin with tutorial chapters that explain the basic concepts of using
- Bison and show three explained examples, each building on the last. If you
- don't know Bison or Yacc, start by reading these chapters. Reference
- chapters follow which describe specific aspects of Bison in detail.
- Bison was written primarily by Robert Corbett; Richard Stallman made
- it Yacc-compatible. This edition corresponds to version 1.24 of Bison.
- @node Conditions, Copying, Introduction, Top
- @unnumbered Conditions for Using Bison
- As of Bison version 1.24, we have changed the distribution terms for
- @code{yyparse} to permit using Bison's output in non-free programs.
- Formerly, Bison parsers could be used only in programs that were free
- software.
- The other GNU programming tools, such as the GNU C compiler, have never
- had such a requirement. They could always be used for non-free
- software. The reason Bison was different was not due to a special
- policy decision; it resulted from applying the usual General Public
- License to all of the Bison source code.
- The output of the Bison utility---the Bison parser file---contains a
- verbatim copy of a sizable piece of Bison, which is the code for the
- @code{yyparse} function. (The actions from your grammar are inserted
- into this function at one point, but the rest of the function is not
- changed.) When we applied the GPL terms to the code for @code{yyparse},
- the effect was to restrict the use of Bison output to free software.
- We didn't change the terms because of sympathy for people who want to
- make software proprietary. @strong{Software should be free.} But we
- concluded that limiting Bison's use to free software was doing little to
- encourage people to make other software free. So we decided to make the
- practical conditions for using Bison match the practical conditions for
- using the other GNU tools.
- @node Copying, Concepts, Conditions, Top
- @unnumbered GNU GENERAL PUBLIC LICENSE
- @center Version 2, June 1991
- @display
- Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
- 675 Mass Ave, Cambridge, MA 02139, USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
- @end display
- @unnumberedsec Preamble
- The licenses for most software are designed to take away your
- freedom to share and change it. By contrast, the GNU General Public
- License is intended to guarantee your freedom to share and change free
- software---to make sure the software is free for all its users. This
- General Public License applies to most of the Free Software
- Foundation's software and to any other program whose authors commit to
- using it. (Some other Free Software Foundation software is covered by
- the GNU Library General Public License instead.) You can apply it to
- your programs, too.
- When we speak of free software, we are referring to freedom, not
- price. Our General Public Licenses are designed to make sure that you
- have the freedom to distribute copies of free software (and charge for
- this service if you wish), that you receive source code or can get it
- if you want it, that you can change the software or use pieces of it
- in new free programs; and that you know you can do these things.
- To protect your rights, we need to make restrictions that forbid
- anyone to deny you these rights or to ask you to surrender the rights.
- These restrictions translate to certain responsibilities for you if you
- distribute copies of the software, or if you modify it.
- For example, if you distribute copies of such a program, whether
- gratis or for a fee, you must give the recipients all the rights that
- you have. You must make sure that they, too, receive or can get the
- source code. And you must show them these terms so they know their
- rights.
- We protect your rights with two steps: (1) copyright the software, and
- (2) offer you this license which gives you legal permission to copy,
- distribute and/or modify the software.
- Also, for each author's protection and ours, we want to make certain
- that everyone understands that there is no warranty for this free
- software. If the software is modified by someone else and passed on, we
- want its recipients to know that what they have is not the original, so
- that any problems introduced by others will not reflect on the original
- authors' reputations.
- Finally, any free program is threatened constantly by software
- patents. We wish to avoid the danger that redistributors of a free
- program will individually obtain patent licenses, in effect making the
- program proprietary. To prevent this, we have made it clear that any
- patent must be licensed for everyone's free use or not licensed at all.
- The precise terms and conditions for copying, distribution and
- modification follow.
- @iftex
- @unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
- @end iftex
- @ifinfo
- @center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
- @end ifinfo
- @enumerate 0
- @item
- This License applies to any program or other work which contains
- a notice placed by the copyright holder saying it may be distributed
- under the terms of this General Public License. The ``Program'', below,
- refers to any such program or work, and a ``work based on the Program''
- means either the Program or any derivative work under copyright law:
- that is to say, a work containing the Program or a portion of it,
- either verbatim or with modifications and/or translated into another
- language. (Hereinafter, translation is included without limitation in
- the term ``modification''.) Each licensee is addressed as ``you''.
- Activities other than copying, distribution and modification are not
- covered by this License; they are outside its scope. The act of
- running the Program is not restricted, and the output from the Program
- is covered only if its contents constitute a work based on the
- Program (independent of having been made by running the Program).
- Whether that is true depends on what the Program does.
- @item
- You may copy and distribute verbatim copies of the Program's
- source code as you receive it, in any medium, provided that you
- conspicuously and appropriately publish on each copy an appropriate
- copyright notice and disclaimer of warranty; keep intact all the
- notices that refer to this License and to the absence of any warranty;
- and give any other recipients of the Program a copy of this License
- along with the Program.
- You may charge a fee for the physical act of transferring a copy, and
- you may at your option offer warranty protection in exchange for a fee.
- @item
- You may modify your copy or copies of the Program or any portion
- of it, thus forming a work based on the Program, and copy and
- distribute such modifications or work under the terms of Section 1
- above, provided that you also meet all of these conditions:
- @enumerate a
- @item
- You must cause the modified files to carry prominent notices
- stating that you changed the files and the date of any change.
- @item
- You must cause any work that you distribute or publish, that in
- whole or in part contains or is derived from the Program or any
- part thereof, to be licensed as a whole at no charge to all third
- parties under the terms of this License.
- @item
- If the modified program normally reads commands interactively
- when run, you must cause it, when started running for such
- interactive use in the most ordinary way, to print or display an
- announcement including an appropriate copyright notice and a
- notice that there is no warranty (or else, saying that you provide
- a warranty) and that users may redistribute the program under
- these conditions, and telling the user how to view a copy of this
- License. (Exception: if the Program itself is interactive but
- does not normally print such an announcement, your work based on
- the Program is not required to print an announcement.)
- @end enumerate
- These requirements apply to the modified work as a whole. If
- identifiable sections of that work are not derived from the Program,
- and can be reasonably considered independent and separate works in
- themselves, then this License, and its terms, do not apply to those
- sections when you distribute them as separate works. But when you
- distribute the same sections as part of a whole which is a work based
- on the Program, the distribution of the whole must be on the terms of
- this License, whose permissions for other licensees extend to the
- entire whole, and thus to each and every part regardless of who wrote it.
- Thus, it is not the intent of this section to claim rights or contest
- your rights to work written entirely by you; rather, the intent is to
- exercise the right to control the distribution of derivative or
- collective works based on the Program.
- In addition, mere aggregation of another work not based on the Program
- with the Program (or with a work based on the Program) on a volume of
- a storage or distribution medium does not bring the other work under
- the scope of this License.
- @item
- You may copy and distribute the Program (or a work based on it,
- under Section 2) in object code or executable form under the terms of
- Sections 1 and 2 above provided that you also do one of the following:
- @enumerate a
- @item
- Accompany it with the complete corresponding machine-readable
- source code, which must be distributed under the terms of Sections
- 1 and 2 above on a medium customarily used for software interchange; or,
- @item
- Accompany it with a written offer, valid for at least three
- years, to give any third party, for a charge no more than your
- cost of physically performing source distribution, a complete
- machine-readable copy of the corresponding source code, to be
- distributed under the terms of Sections 1 and 2 above on a medium
- customarily used for software interchange; or,
- @item
- Accompany it with the information you received as to the offer
- to distribute corresponding source code. (This alternative is
- allowed only for noncommercial distribution and only if you
- received the program in object code or executable form with such
- an offer, in accord with Subsection b above.)
- @end enumerate
- The source code for a work means the preferred form of the work for
- making modifications to it. For an executable work, complete source
- code means all the source code for all modules it contains, plus any
- associated interface definition files, plus the scripts used to
- control compilation and installation of the executable. However, as a
- special exception, the source code distributed need not include
- anything that is normally distributed (in either source or binary
- form) with the major components (compiler, kernel, and so on) of the
- operating system on which the executable runs, unless that component
- itself accompanies the executable.
- If distribution of executable or object code is made by offering
- access to copy from a designated place, then offering equivalent
- access to copy the source code from the same place counts as
- distribution of the source code, even though third parties are not
- compelled to copy the source along with the object code.
- @item
- You may not copy, modify, sublicense, or distribute the Program
- except as expressly provided under this License. Any attempt
- otherwise to copy, modify, sublicense or distribute the Program is
- void, and will automatically terminate your rights under this License.
- However, parties who have received copies, or rights, from you under
- this License will not have their licenses terminated so long as such
- parties remain in full compliance.
- @item
- You are not required to accept this License, since you have not
- signed it. However, nothing else grants you permission to modify or
- distribute the Program or its derivative works. These actions are
- prohibited by law if you do not accept this License. Therefore, by
- modifying or distributing the Program (or any work based on the
- Program), you indicate your acceptance of this License to do so, and
- all its terms and conditions for copying, distributing or modifying
- the Program or works based on it.
- @item
- Each time you redistribute the Program (or any work based on the
- Program), the recipient automatically receives a license from the
- original licensor to copy, distribute or modify the Program subject to
- these terms and conditions. You may not impose any further
- restrictions on the recipients' exercise of the rights granted herein.
- You are not responsible for enforcing compliance by third parties to
- this License.
- @item
- If, as a consequence of a court judgment or allegation of patent
- infringement or for any other reason (not limited to patent issues),
- conditions are imposed on you (whether by court order, agreement or
- otherwise) that contradict the conditions of this License, they do not
- excuse you from the conditions of this License. If you cannot
- distribute so as to satisfy simultaneously your obligations under this
- License and any other pertinent obligations, then as a consequence you
- may not distribute the Program at all. For example, if a patent
- license would not permit royalty-free redistribution of the Program by
- all those who receive copies directly or indirectly through you, then
- the only way you could satisfy both it and this License would be to
- refrain entirely from distribution of the Program.
- If any portion of this section is held invalid or unenforceable under
- any particular circumstance, the balance of the section is intended to
- apply and the section as a whole is intended to apply in other
- circumstances.
- It is not the purpose of this section to induce you to infringe any
- patents or other property right claims or to contest validity of any
- such claims; this section has the sole purpose of protecting the
- integrity of the free software distribution system, which is
- implemented by public license practices. Many people have made
- generous contributions to the wide range of software distributed
- through that system in reliance on consistent application of that
- system; it is up to the author/donor to decide if he or she is willing
- to distribute software through any other system and a licensee cannot
- impose that choice.
- This section is intended to make thoroughly clear what is believed to
- be a consequence of the rest of this License.
- @item
- If the distribution and/or use of the Program is restricted in
- certain countries either by patents or by copyrighted interfaces, the
- original copyright holder who places the Program under this License
- may add an explicit geographical distribution limitation excluding
- those countries, so that distribution is permitted only in or among
- countries not thus excluded. In such case, this License incorporates
- the limitation as if written in the body of this License.
- @item
- The Free Software Foundation may publish revised and/or new versions
- of the General Public License from time to time. Such new versions will
- be similar in spirit to the present version, but may differ in detail to
- address new problems or concerns.
- Each version is given a distinguishing version number. If the Program
- specifies a version number of this License which applies to it and ``any
- later version'', you have the option of following the terms and conditions
- either of that version or of any later version published by the Free
- Software Foundation. If the Program does not specify a version number of
- this License, you may choose any version ever published by the Free Software
- Foundation.
- @item
- If you wish to incorporate parts of the Program into other free
- programs whose distribution conditions are different, write to the author
- to ask for permission. For software which is copyrighted by the Free
- Software Foundation, write to the Free Software Foundation; we sometimes
- make exceptions for this. Our decision will be guided by the two goals
- of preserving the free status of all derivatives of our free software and
- of promoting the sharing and reuse of software generally.
- @iftex
- @heading NO WARRANTY
- @end iftex
- @ifinfo
- @center NO WARRANTY
- @end ifinfo
- @item
- BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
- FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
- OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
- PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
- OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
- TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
- PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
- REPAIR OR CORRECTION.
- @item
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
- WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
- REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
- INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
- OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
- TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
- YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
- PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGES.
- @end enumerate
- @iftex
- @heading END OF TERMS AND CONDITIONS
- @end iftex
- @ifinfo
- @center END OF TERMS AND CONDITIONS
- @end ifinfo
- @page
- @unnumberedsec How to Apply These Terms to Your New Programs
- If you develop a new program, and you want it to be of the greatest
- possible use to the public, the best way to achieve this is to make it
- free software which everyone can redistribute and change under these terms.
- To do so, attach the following notices to the program. It is safest
- to attach them to the start of each source file to most effectively
- convey the exclusion of warranty; and each file should have at least
- the ``copyright'' line and a pointer to where the full notice is found.
- @smallexample
- @var{one line to give the program's name and a brief idea of what it does.}
- Copyright (C) 19@var{yy} @var{name of author}
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- @end smallexample
- Also add information on how to contact you by electronic and paper mail.
- If the program is interactive, make it output a short notice like this
- when it starts in an interactive mode:
- @smallexample
- Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
- Gnomovision comes with ABSOLUTELY NO WARRANTY; for details
- type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
- @end smallexample
- The hypothetical commands @samp{show w} and @samp{show c} should show
- the appropriate parts of the General Public License. Of course, the
- commands you use may be called something other than @samp{show w} and
- @samp{show c}; they could even be mouse-clicks or menu items---whatever
- suits your program.
- You should also get your employer (if you work as a programmer) or your
- school, if any, to sign a ``copyright disclaimer'' for the program, if
- necessary. Here is a sample; alter the names:
- @smallexample
- Yoyodyne, Inc., hereby disclaims all copyright interest in the program
- `Gnomovision' (which makes passes at compilers) written by James Hacker.
- @var{signature of Ty Coon}, 1 April 1989
- Ty Coon, President of Vice
- @end smallexample
- This General Public License does not permit incorporating your program into
- proprietary programs. If your program is a subroutine library, you may
- consider it more useful to permit linking proprietary applications with the
- library. If this is what you want to do, use the GNU Library General
- Public License instead of this License.
- @node Concepts, Examples, Copying, Top
- @chapter The Concepts of Bison
- This chapter introduces many of the basic concepts without which the
- details of Bison will not make sense. If you do not already know how to
- use Bison or Yacc, we suggest you start by reading this chapter carefully.
- @menu
- * Language and Grammar:: Languages and context-free grammars,
- as mathematical ideas.
- * Grammar in Bison:: How we represent grammars for Bison's sake.
- * Semantic Values:: Each token or syntactic grouping can have
- a semantic value (the value of an integer,
- the name of an identifier, etc.).
- * Semantic Actions:: Each rule can have an action containing C code.
- * Bison Parser:: What are Bison's input and output,
- how is the output used?
- * Stages:: Stages in writing and running Bison grammars.
- * Grammar Layout:: Overall structure of a Bison grammar file.
- @end menu
- @node Language and Grammar, Grammar in Bison, , Concepts
- @section Languages and Context-Free Grammars
- @cindex context-free grammar
- @cindex grammar, context-free
- In order for Bison to parse a language, it must be described by a
- @dfn{context-free grammar}. This means that you specify one or more
- @dfn{syntactic groupings} and give rules for constructing them from their
- parts. For example, in the C language, one kind of grouping is called an
- `expression'. One rule for making an expression might be, ``An expression
- can be made of a minus sign and another expression''. Another would be,
- ``An expression can be an integer''. As you can see, rules are often
- recursive, but there must be at least one rule which leads out of the
- recursion.
- @cindex BNF
- @cindex Backus-Naur form
- The most common formal system for presenting such rules for humans to read
- is @dfn{Backus-Naur Form} or ``BNF'', which was developed in order to
- specify the language Algol 60. Any grammar expressed in BNF is a
- context-free grammar. The input to Bison is essentially machine-readable
- BNF.
- Not all context-free languages can be handled by Bison, only those
- that are LALR(1). In brief, this means that it must be possible to
- tell how to parse any portion of an input string with just a single
- token of look-ahead. Strictly speaking, that is a description of an
- LR(1) grammar, and LALR(1) involves additional restrictions that are
- hard to explain simply; but it is rare in actual practice to find an
- LR(1) grammar that fails to be LALR(1). @xref{Mystery Conflicts, ,
- Mysterious Reduce/Reduce Conflicts}, for more information on this.
- @cindex symbols (abstract)
- @cindex token
- @cindex syntactic grouping
- @cindex grouping, syntactic
- In the formal grammatical rules for a language, each kind of syntactic unit
- or grouping is named by a @dfn{symbol}. Those which are built by grouping
- smaller constructs according to grammatical rules are called
- @dfn{nonterminal symbols}; those which can't be subdivided are called
- @dfn{terminal symbols} or @dfn{token types}. We call a piece of input
- corresponding to a single terminal symbol a @dfn{token}, and a piece
- corresponding to a single nonterminal symbol a @dfn{grouping}.@refill
- We can use the C language as an example of what symbols, terminal and
- nonterminal, mean. The tokens of C are identifiers, constants (numeric and
- string), and the various keywords, arithmetic operators and punctuation
- marks. So the terminal symbols of a grammar for C include `identifier',
- `number', `string', plus one symbol for each keyword, operator or
- punctuation mark: `if', `return', `const', `static', `int', `char',
- `plus-sign', `open-brace', `close-brace', `comma' and many more. (These
- tokens can be subdivided into characters, but that is a matter of
- lexicography, not grammar.)
- Here is a simple C function subdivided into tokens:
- @example
- int /* @r{keyword `int'} */
- square (x) /* @r{identifier, open-paren,} */
- /* @r{identifier, close-paren} */
- int x; /* @r{keyword `int', identifier, semicolon} */
- @{ /* @r{open-brace} */
- return x * x; /* @r{keyword `return', identifier,} */
- /* @r{asterisk, identifier, semicolon} */
- @} /* @r{close-brace} */
- @end example
- The syntactic groupings of C include the expression, the statement, the
- declaration, and the function definition. These are represented in the
- grammar of C by nonterminal symbols `expression', `statement',
- `declaration' and `function definition'. The full grammar uses dozens of
- additional language constructs, each with its own nonterminal symbol, in
- order to express the meanings of these four. The example above is a
- function definition; it contains one declaration, and one statement. In
- the statement, each @samp{x} is an expression and so is @samp{x * x}.
- Each nonterminal symbol must have grammatical rules showing how it is made
- out of simpler constructs. For example, one kind of C statement is the
- @code{return} statement; this would be described with a grammar rule which
- reads informally as follows:
- @quotation
- A `statement' can be made of a `return' keyword, an `expression' and a
- `semicolon'.
- @end quotation
- @noindent
- There would be many other rules for `statement', one for each kind of
- statement in C.
- @cindex start symbol
- One nonterminal symbol must be distinguished as the special one which
- defines a complete utterance in the language. It is called the @dfn{start
- symbol}. In a compiler, this means a complete input program. In the C
- language, the nonterminal symbol `sequence of definitions and declarations'
- plays this role.
- For example, @samp{1 + 2} is a valid C expression---a valid part of a C
- program---but it is not valid as an @emph{entire} C program. In the
- context-free grammar of C, this follows from the fact that `expression' is
- not the start symbol.
- The Bison parser reads a sequence of tokens as its input, and groups the
- tokens using the grammar rules. If the input is valid, the end result is
- that the entire token sequence reduces to a single grouping whose symbol is
- the grammar's start symbol. If we use a grammar for C, the entire input
- must be a `sequence of definitions and declarations'. If not, the parser
- reports a syntax error.
- @node Grammar in Bison, Semantic Values, Language and Grammar, Concepts
- @section From Formal Rules to Bison Input
- @cindex Bison grammar
- @cindex grammar, Bison
- @cindex formal grammar
- A formal grammar is a mathematical construct. To define the language
- for Bison, you must write a file expressing the grammar in Bison syntax:
- a @dfn{Bison grammar} file. @xref{Grammar File, ,Bison Grammar Files}.
- A nonterminal symbol in the formal grammar is represented in Bison input
- as an identifier, like an identifier in C. By convention, it should be
- in lower case, such as @code{expr}, @code{stmt} or @code{declaration}.
- The Bison representation for a terminal symbol is also called a @dfn{token
- type}. Token types as well can be represented as C-like identifiers. By
- convention, these identifiers should be upper case to distinguish them from
- nonterminals: for example, @code{INTEGER}, @code{IDENTIFIER}, @code{IF} or
- @code{RETURN}. A terminal symbol that stands for a particular keyword in
- the language should be named after that keyword converted to upper case.
- The terminal symbol @code{error} is reserved for error recovery.
- @xref{Symbols}.@refill
- A terminal symbol can also be represented as a character literal, just like
- a C character constant. You should do this whenever a token is just a
- single character (parenthesis, plus-sign, etc.): use that same character in
- a literal as the terminal symbol for that token.
- The grammar rules also have an expression in Bison syntax. For example,
- here is the Bison rule for a C @code{return} statement. The semicolon in
- quotes is a literal character token, representing part of the C syntax for
- the statement; the naked semicolon, and the colon, are Bison punctuation
- used in every rule.
- @example
- stmt: RETURN expr ';'
- ;
- @end example
- @noindent
- @xref{Rules, ,Syntax of Grammar Rules}.
- @node Semantic Values, Semantic Actions, Grammar in Bison, Concepts
- @section Semantic Values
- @cindex semantic value
- @cindex value, semantic
- A formal grammar selects tokens only by their classifications: for example,
- if a rule mentions the terminal symbol `integer constant', it means that
- @emph{any} integer constant is grammatically valid in that position. The
- precise value of the constant is irrelevant to how to parse the input: if
- @samp{x+4} is grammatical then @samp{x+1} or @samp{x+3989} is equally
- grammatical.@refill
- But the precise value is very important for what the input means once it is
- parsed. A compiler is useless if it fails to distinguish between 4, 1 and
- 3989 as constants in the program! Therefore, each token in a Bison grammar
- has both a token type and a @dfn{semantic value}. @xref{Semantics, ,Defining Language Semantics},
- for details.
- The token type is a terminal symbol defined in the grammar, such as
- @code{INTEGER}, @code{IDENTIFIER} or @code{','}. It tells everything
- you need to know to decide where the token may validly appear and how to
- group it with other tokens. The grammar rules know nothing about tokens
- except their types.@refill
- The semantic value has all the rest of the information about the
- meaning of the token, such as the value of an integer, or the name of an
- identifier. (A token such as @code{','} which is just punctuation doesn't
- need to have any semantic value.)
- For example, an input token might be classified as token type
- @code{INTEGER} and have the semantic value 4. Another input token might
- have the same token type @code{INTEGER} but value 3989. When a grammar
- rule says that @code{INTEGER} is allowed, either of these tokens is
- acceptable because each is an @code{INTEGER}. When the parser accepts the
- token, it keeps track of the token's semantic value.
- Each grouping can also have a semantic value as well as its nonterminal
- symbol. For example, in a calculator, an expression typically has a
- semantic value that is a number. In a compiler for a programming
- language, an expression typically has a semantic value that is a tree
- structure describing the meaning of the expression.
- @node Semantic Actions, Bison Parser, Semantic Values, Concepts
- @section Semantic Actions
- @cindex semantic actions
- @cindex actions, semantic
- In order to be useful, a program must do more than parse input; it must
- also produce some output based on the input. In a Bison grammar, a grammar
- rule can have an @dfn{action} made up of C statements. Each time the
- parser recognizes a match for that rule, the action is executed.
- @xref{Actions}.
-
- Most of the time, the purpose of an action is to compute the semantic value
- of the whole construct from the semantic values of its parts. For example,
- suppose we have a rule which says an expression can be the sum of two
- expressions. When the parser recognizes such a sum, each of the
- subexpressions has a semantic value which describes how it was built up.
- The action for this rule should create a similar sort of value for the
- newly recognized larger expression.
- For example, here is a rule that says an expression can be the sum of
- two subexpressions:
- @example
- expr: expr '+' expr @{ $$ = $1 + $3; @}
- ;
- @end example
- @noindent
- The action says how to produce the semantic value of the sum expression
- from the values of the two subexpressions.
- @node Bison Parser, Stages, Semantic Actions, Concepts
- @section Bison Output: the Parser File
- @cindex Bison parser
- @cindex Bison utility
- @cindex lexical analyzer, purpose
- @cindex parser
- When you run Bison, you give it a Bison grammar file as input. The output
- is a C source file that parses the language described by the grammar.
- This file is called a @dfn{Bison parser}. Keep in mind that the Bison
- utility and the Bison parser are two distinct programs: the Bison utility
- is a program whose output is the Bison parser that becomes part of your
- program.
- The job of the Bison parser is to group tokens into groupings according to
- the grammar rules---for example, to build identifiers and operators into
- expressions. As it does this, it runs the actions for the grammar rules it
- uses.
- The tokens come from a function called the @dfn{lexical analyzer} that you
- must supply in some fashion (such as by writing it in C). The Bison parser
- calls the lexical analyzer each time it wants a new token. It doesn't know
- what is ``inside'' the tokens (though their semantic values may reflect
- this). Typically the lexical analyzer makes the tokens by parsing
- characters of text, but Bison does not depend on this. @xref{Lexical, ,The Lexical Analyzer Function @code{yylex}}.
- The Bison parser file is C code which defines a function named
- @code{yyparse} which implements that grammar. This function does not make
- a complete C program: you must supply some additional functions. One is
- the lexical analyzer. Another is an error-reporting function which the
- parser calls to report an error. In addition, a complete C program must
- start with a function called @code{main}; you have to provide this, and
- arrange for it to call @code{yyparse} or the parser will never run.
- @xref{Interface, ,Parser C-Language Interface}.
- Aside from the token type names and the symbols in the actions you
- write, all variable and function names used in the Bison parser file
- begin with @samp{yy} or @samp{YY}. This includes interface functions
- such as the lexical analyzer function @code{yylex}, the error reporting
- function @code{yyerror} and the parser function @code{yyparse} itself.
- This also includes numerous identifiers used for internal purposes.
- Therefore, you should avoid using C identifiers starting with @samp{yy}
- or @samp{YY} in the Bison grammar file except for the ones defined in
- this manual.
- @node Stages, Grammar Layout, Bison Parser, Concepts
- @section Stages in Using Bison
- @cindex stages in using Bison
- @cindex using Bison
- The actual language-design process using Bison, from grammar specification
- to a working compiler or interpreter, has these parts:
- @enumerate
- @item
- Formally specify the grammar in a form recognized by Bison
- (@pxref{Grammar File, ,Bison Grammar Files}). For each grammatical rule in the language,
- describe the action that is to be taken when an instance of that rule
- is recognized. The action is described by a sequence of C statements.
- @item
- Write a lexical analyzer to process input and pass tokens to the
- parser. The lexical analyzer may be written by hand in C
- (@pxref{Lexical, ,The Lexical Analyzer Function @code{yylex}}). It could also be produced using Lex, but the use
- of Lex is not discussed in this manual.
- @item
- Write a controlling function that calls the Bison-produced parser.
- @item
- Write error-reporting routines.
- @end enumerate
- To turn this source code as written into a runnable program, you
- must follow these steps:
- @enumerate
- @item
- Run Bison on the grammar to produce the parser.
- @item
- Compile the code output by Bison, as well as any other source files.
- @item
- Link the object files to produce the finished product.
- @end enumerate
- @node Grammar Layout, , Stages, Concepts
- @section The Overall Layout of a Bison Grammar
- @cindex grammar file
- @cindex file format
- @cindex format of grammar file
- @cindex layout of Bison grammar
- The input file for the Bison utility is a @dfn{Bison grammar file}. The
- general form of a Bison grammar file is as follows:
- @example
- %@{
- @var{C declarations}
- %@}
- @var{Bison declarations}
- %%
- @var{Grammar rules}
- %%
- @var{Additional C code}
- @end example
- @noindent
- The @samp{%%}, @samp{%@{} and @samp{%@}} are punctuation that appears
- in every Bison grammar file to separate the sections.
- The C declarations may define types and variables used in the actions.
- You can also use preprocessor commands to define macros used there, and use
- @code{#include} to include header files that do any of these things.
- The Bison declarations declare the names of the terminal and nonterminal
- symbols, and may also describe operator precedence and the data types of
- semantic values of various symbols.
- The grammar rules define how to construct each nonterminal symbol from its
- parts.
- The additional C code can contain any C code you want to use. Often the
- definition of the lexical analyzer @code{yylex} goes here, plus subroutines
- called by the actions in the grammar rules. In a simple program, all the
- rest of the program can go here.
- @node Examples, Grammar File, Concepts, Top
- @chapter Examples
- @cindex simple examples
- @cindex examples, simple
- Now we show and explain three sample programs written using Bison: a
- reverse polish notation calculator, an algebraic (infix) notation
- calculator, and a multi-function calculator. All three have been tested
- under BSD Unix 4.3; each produces a usable, though limited, interactive
- desk-top calculator.
- These examples are simple, but Bison grammars for real programming
- languages are written the same way.
- @ifinfo
- You can copy these examples out of the Info file and into a source file
- to try them.
- @end ifinfo
- @menu
- * RPN Calc:: Reverse polish notation calculator;
- a first example with no operator precedence.
- * Infix Calc:: Infix (algebraic) notation calculator.
- Operator precedence is introduced.
- * Simple Error Recovery:: Continuing after syntax errors.
- * Multi-function Calc:: Calculator with memory and trig functions.
- It uses multiple data-types for semantic values.
- * Exercises:: Ideas for improving the multi-function calculator.
- @end menu
- @node RPN Calc, Infix Calc, , Examples
- @section Reverse Polish Notation Calculator
- @cindex reverse polish notation
- @cindex polish notation calculator
- @cindex @code{rpcalc}
- @cindex calculator, simple
- The first example is that of a simple double-precision @dfn{reverse polish
- notation} calculator (a calculator using postfix operators). This example
- provides a good starting point, since operator precedence is not an issue.
- The second example will illustrate how operator precedence is handled.
- The source code for this calculator is named @file{rpcalc.y}. The
- @samp{.y} extension is a convention used for Bison input files.
- @menu
- * Decls: Rpcalc Decls. Bison and C declarations for rpcalc.
- * Rules: Rpcalc Rules. Grammar Rules for rpcalc, with explanation.
- * Lexer: Rpcalc Lexer. The lexical analyzer.
- * Main: Rpcalc Main. The controlling function.
- * Error: Rpcalc Error. The error reporting function.
- * Gen: Rpcalc Gen. Running Bison on the grammar file.
- * Comp: Rpcalc Compile. Run the C compiler on the output code.
- @end menu
- @node Rpcalc Decls, Rpcalc Rules, , RPN Calc
- @subsection Declarations for @code{rpcalc}
- Here are the C and Bison declarations for the reverse polish notation
- calculator. As in C, comments are placed between @samp{/*@dots{}*/}.
- @example
- /* Reverse polish notation calculator. */
- %@{
- #define YYSTYPE double
- #include <math.h>
- %@}
- %token NUM
- %% /* Grammar rules and actions follow */
- @end example
- The C declarations section (@pxref{C Declarations, ,The C Declarations Section}) contains two
- preprocessor directives.
- The @code{#define} directive defines the macro @code{YYSTYPE}, thus
- specifying the C data type for semantic values of both tokens and groupings
- (@pxref{Value Type, ,Data Types of Semantic Values}). The Bison parser will use whatever type
- @code{YYSTYPE} is defined as; if you don't define it, @code{int} is the
- default. Because we specify @code{double}, each token and each expression
- has an associated value, which is a floating point number.
- The @code{#include} directive is used to declare the exponentiation
- function @code{pow}.
- The second section, Bison declarations, provides information to Bison about
- the token types (@pxref{Bison Declarations, ,The Bison Declarations Section}). Each terminal symbol that is
- not a single-character literal must be declared here. (Single-character
- literals normally don't need to be declared.) In this example, all the
- arithmetic operators are designated by single-character literals, so the
- only terminal symbol that needs to be declared is @code{NUM}, the token
- type for numeric constants.
- @node Rpcalc Rules, Rpcalc Lexer, Rpcalc Decls, RPN Calc
- @subsection Grammar Rules for @code{rpcalc}
- Here are the grammar rules for the reverse polish notation calculator.
- @example
- input: /* empty */
- | input line
- ;
- line: '\n'
- | exp '\n' @{ printf ("\t%.10g\n", $1); @}
- ;
- exp: NUM @{ $$ = $1; @}
- | exp exp '+' @{ $$ = $1 + $2; @}
- | exp exp '-' @{ $$ = $1 - $2; @}
- | exp exp '*' @{ $$ = $1 * $2; @}
- | exp exp '/' @{ $$ = $1 / $2; @}
- /* Exponentiation */
- | exp exp '^' @{ $$ = pow ($1, $2); @}
- /* Unary minus */
- | exp 'n' @{ $$ = -$1; @}
- ;
- %%
- @end example
- The groupings of the rpcalc ``language'' defined here are the expression
- (given the name @code{exp}), the line of input (@code{line}), and the
- complete input transcript (@code{input}). Each of these nonterminal
- symbols has several alternate rules, joined by the @samp{|} punctuator
- which is read as ``or''. The following sections explain what these rules
- mean.
- The semantics of the language is determined by the actions taken when a
- grouping is recognized. The actions are the C code that appears inside
- braces. @xref{Actions}.
- You must specify these actions in C, but Bison provides the means for
- passing semantic values between the rules. In each action, the
- pseudo-variable @code{$$} stands for the semantic value for the grouping
- that the rule is going to construct. Assigning a value to @code{$$} is the
- main job of most actions. The semantic values of the components of the
- rule are referred to as @code{$1}, @code{$2}, and so on.
- @menu
- * Rpcalc Input::
- * Rpcalc Line::
- * Rpcalc Expr::
- @end menu
- @node Rpcalc Input, Rpcalc Line, , Rpcalc Rules
- @subsubsection Explanation of @code{input}
- Consider the definition of @code{input}:
- @example
- input: /* empty */
- | input line
- ;
- @end example
- This definition reads as follows: ``A complete input is either an empty
- string, or a complete input followed by an input line''. Notice that
- ``complete input'' is defined in terms of itself. This definition is said
- to be @dfn{left recursive} since @code{input} appears always as the
- leftmost symbol in the sequence. @xref{Recursion, ,Recursive Rules}.
- The first alternative is empty because there are no symbols between the
- colon and the first @samp{|}; this means that @code{input} can match an
- empty string of input (no tokens). We write the rules this way because it
- is legitimate to type @kbd{Ctrl-d} right after you start the calculator.
- It's conventional to put an empty alternative first and write the comment
- @samp{/* empty */} in it.
- The second alternate rule (@code{input line}) handles all nontrivial input.
- It means, ``After reading any number of lines, read one more line if
- possible.'' The left recursion makes this rule into a loop. Since the
- first alternative matches empty input, the loop can be executed zero or
- more times.
- The parser function @code{yyparse} continues to process input until a
- grammatical error is seen or the lexical analyzer says there are no more
- input tokens; we will arrange for the latter to happen at end of file.
- @node Rpcalc Line, Rpcalc Expr, Rpcalc Input, Rpcalc Rules
- @subsubsection Explanation of @code{line}
- Now consider the definition of @code{line}:
- @example
- line: '\n'
- | exp '\n' @{ printf ("\t%.10g\n", $1); @}
- ;
- @end example
- The first alternative is a token which is a newline character; this means
- that rpcalc accepts a blank line (and ignores it, since there is no
- action). The second alternative is an expression followed by a newline.
- This is the alternative that makes rpcalc useful. The semantic value of
- the @code{exp} grouping is the value of @code{$1} because the @code{exp} in
- question is the first symbol in the alternative. The action prints this
- value, which is the result of the computation the user asked for.
- This action is unusual because it does not assign a value to @code{$$}. As
- a consequence, the semantic value associated with the @code{line} is
- uninitialized (its value will be unpredictable). This would be a bug if
- that value were ever used, but we don't use it: once rpcalc has printed the
- value of the user's input line, that value is no longer needed.
- @node Rpcalc Expr, , Rpcalc Line, Rpcalc Rules
- @subsubsection Explanation of @code{expr}
- The @code{exp} grouping has several rules, one for each kind of expression.
- The first rule handles the simplest expressions: those that are just numbers.
- The second handles an addition-expression, which looks like two expressions
- followed by a plus-sign. The third handles subtraction, and so on.
- @example
- exp: NUM
- | exp exp '+' @{ $$ = $1 + $2; @}
- | exp exp '-' @{ $$ = $1 - $2; @}
- @dots{}
- ;
- @end example
- We have used @samp{|} to join all the rules for @code{exp}, but we could
- equally well have written them separately:
- @example
- exp: NUM ;
- exp: exp exp '+' @{ $$ = $1 + $2; @} ;
- exp: exp exp '-' @{ $$ = $1 - $2; @} ;
- @dots{}
- @end example
- Most of the rules have actions that compute the value of the expression in
- terms of the value of its parts. For example, in the rule for addition,
- @code{$1} refers to the first component @code{exp} and @code{$2} refers to
- the second one. The third component, @code{'+'}, has no meaningful
- associated semantic value, but if it had one you could refer to it as
- @code{$3}. When @code{yyparse} recognizes a sum expression using this
- rule, the sum of the two subexpressions' values is produced as the value of
- the entire expression. @xref{Actions}.
- You don't have to give an action for every rule. When a rule has no
- action, Bison by default copies the value of @code{$1} into @code{$$}.
- This is what happens in the first rule (the one that uses @code{NUM}).
- The formatting shown here is the recommended convention, but Bison does
- not require it. You can add or change whitespace as much as you wish.
- For example, this:
- @example
- exp : NUM | exp exp '+' @{$$ = $1 + $2; @} | @dots{}
- @end example
- @noindent
- means the same thing as this:
- @example
- exp: NUM
- | exp exp '+' @{ $$ = $1 + $2; @}
- | @dots{}
- @end example
- @noindent
- The latter, however, is much more readable.
- @node Rpcalc Lexer, Rpcalc Main, Rpcalc Rules, RPN Calc
- @subsection The @code{rpcalc} Lexical Analyzer
- @cindex writing a lexical analyzer
- @cindex lexical analyzer, writing
- The lexical analyzer's job is low-level parsing: converting characters or
- sequences of characters into tokens. The Bison parser gets its tokens by
- calling the lexical analyzer. @xref{Lexical, ,The Lexical Analyzer Function @code{yylex}}.
- Only a simple lexical analyzer is needed for the RPN calculator. This
- lexical analyzer skips blanks and tabs, then reads in numbers as
- @code{double} and returns them as @code{NUM} tokens. Any other character
- that isn't part of a number is a separate token. Note that the token-code
- for such a single-character token is the character itself.
- The return value of the lexical analyzer function is a numeric code which
- represents a token type. The same text used in Bison rules to stand for
- this token type is also a C expression for the numeric code for the type.
- This works in two ways. If the token type is a character literal, then its
- numeric code is the ASCII code for that character; you can use the same
- character literal in the lexical analyzer to express the number. If the
- token type is an identifier, that identifier is defined by Bison as a C
- macro whose definition is the appropriate number. In this example,
- therefore, @code{NUM} becomes a macro for @code{yylex} to use.
- The semantic value of the token (if it has one) is stored into the global
- variable @code{yylval}, which is where the Bison parser will look for it.
- (The C data type of @code{yylval} is @code{YYSTYPE}, which was defined
- at the beginning of the grammar; @pxref{Rpcalc Decls, ,Declarations for @code{rpcalc}}.)
- A token type code of zero is returned if the end-of-file is encountered.
- (Bison recognizes any nonpositive value as indicating the end of the
- input.)
- Here is the code for the lexical analyzer:
- @example
- @group
- /* Lexical analyzer returns a double floating point
- number on the stack and the token NUM, or the ASCII
- character read if not a number. Skips all blanks
- and tabs, returns 0 for EOF. */
- #include <ctype.h>
- @end group
- @group
- yylex ()
- @{
- int c;
- /* skip white space */
- while ((c = getchar ()) == ' ' || c == '\t')
- ;
- @end group
- @group
- /* process numbers */
- if (c == '.' || isdigit (c))
- @{
- ungetc (c, stdin);
- scanf ("%lf", &yylval);
- return NUM;
- @}
- @end group
- @group
- /* return end-of-file */
- if (c == EOF)
- return 0;
- /* return single chars */
- return c;
- @}
- @end group
- @end example
- @node Rpcalc Main, Rpcalc Error, Rpcalc Lexer, RPN Calc
- @subsection The Controlling Function
- @cindex controlling function
- @cindex main function in simple example
- In keeping with the spirit of this example, the controlling function is
- kept to the bare minimum. The only requirement is that it call
- @code{yyparse} to start the process of parsing.
- @example
- @group
- main ()
- @{
- yyparse ();
- @}
- @end group
- @end example
- @node Rpcalc Error, Rpcalc Gen, Rpcalc Main, RPN Calc
- @subsection The Error Reporting Routine
- @cindex error reporting routine
- When @code{yyparse} detects a syntax error, it calls the error reporting
- function @code{yyerror} to print an error message (usually but not always
- @code{"parse error"}). It is up to the programmer to supply @code{yyerror}
- (@pxref{Interface, ,Parser C-Language Interface}), so here is the definition we will use:
- @example
- @group
- #include <stdio.h>
- yyerror (s) /* Called by yyparse on error */
- char *s;
- @{
- printf ("%s\n", s);
- @}
- @end group
- @end example
- After @code{yyerror} returns, the Bison parser may recover from the error
- and continue parsing if the grammar contains a suitable error rule
- (@pxref{Error Recovery}). Otherwise, @code{yyparse} returns nonzero. We
- have not written any error rules in this example, so any invalid input will
- cause the calculator program to exit. This is not clean behavior for a
- real calculator, but it is adequate in the first example.
- @node Rpcalc Gen, Rpcalc Compile, Rpcalc Error, RPN Calc
- @subsection Running Bison to Make the Parser
- @cindex running Bison (introduction)
- Before running Bison to produce a parser, we need to decide how to arrange
- all the source code in one or more source files. For such a simple example,
- the easiest thing is to put everything in one file. The definitions of
- @code{yylex}, @code{yyerror} and @code{main} go at the end, in the
- ``additional C code'' section of the file (@pxref{Grammar Layout, ,The Overall Layout of a Bison Grammar}).
- For a large project, you would probably have several source files, and use
- @code{make} to arrange to recompile them.
- With all the source in a single file, you use the following command to
- convert it into a parser file:
- @example
- bison @var{file_name}.y
- @end example
- @noindent
- In this example the file was called @file{rpcalc.y} (for ``Reverse Polish
- CALCulator''). Bison produces a file named @file{@var{file_name}.tab.c},
- removing the @samp{.y} from the original file name. The file output by
- Bison contains the source code for @code{yyparse}. The additional
- functions in the input file (@code{yylex}, @code{yyerror} and @code{main})
- are copied verbatim to the output.
- @node Rpcalc Compile, , Rpcalc Gen, RPN Calc
- @subsection Compiling the Parser File
- @cindex compiling the parser
- Here is how to compile and run the parser file:
- @example
- @group
- # @r{List files in current directory.}
- % ls
- rpcalc.tab.c rpcalc.y
- @end group
- @group
- # @r{Compile the Bison parser.}
- # @r{@samp{-lm} tells compiler to search math library for @code{pow}.}
- % cc rpcalc.tab.c -lm -o rpcalc
- @end group
- @group
- # @r{List files again.}
- % ls
- rpcalc rpcalc.tab.c rpcalc.y
- @end group
- @end example
- The file @file{rpcalc} now contains the executable code. Here is an
- example session using @code{rpcalc}.
- @example
- % rpcalc
- 4 9 +
- 13
- 3 7 + 3 4 5 *+-
- -13
- 3 7 + 3 4 5 * + - n @r{Note the unary minus, @samp{n}}
- 13
- 5 6 / 4 n +
- -3.166666667
- 3 4 ^ @r{Exponentiation}
- 81
- ^D @r{End-of-file indicator}
- %
- @end example
- @node Infix Calc, Simple Error Recovery, RPN Calc, Examples
- @section Infix Notation Calculator: @code{calc}
- @cindex infix notation calculator
- @cindex @code{calc}
- @cindex calculator, infix notation
- We now modify rpcalc to handle infix operators instead of postfix. Infix
- notation involves the concept of operator precedence and the need for
- parentheses nested to arbitrary depth. Here is the Bison code for
- @file{calc.y}, an infix desk-top calculator.
- @example
- /* Infix notation calculator--calc */
- %@{
- #define YYSTYPE double
- #include <math.h>
- %@}
- /* BISON Declarations */
- %token NUM
- %left '-' '+'
- %left '*' '/'
- %left NEG /* negation--unary minus */
- %right '^' /* exponentiation */
- /* Grammar follows */
- %%
- input: /* empty string */
- | input line
- ;
- line: '\n'
- | exp '\n' @{ printf ("\t%.10g\n", $1); @}
- ;
- exp: NUM @{ $$ = $1; @}
- | exp '+' exp @{ $$ = $1 + $3; @}
- | exp '-' exp @{ $$ = $1 - $3; @}
- | exp '*' exp @{ $$ = $1 * $3; @}
- | exp '/' exp @{ $$ = $1 / $3; @}
- | '-' exp %prec NEG @{ $$ = -$2; @}
- | exp '^' exp @{ $$ = pow ($1, $3); @}
- | '(' exp ')' @{ $$ = $2; @}
- ;
- %%
- @end example
- @noindent
- The functions @code{yylex}, @code{yyerror} and @code{main} can be the same
- as before.
- There are two important new features shown in this code.
- In the second section (Bison declarations), @code{%left} declares token
- types and says they are left-associative operators. The declarations
- @code{%left} and @code{%right} (right associativity) take the place of
- @code{%token} which is used to declare a token type name without
- associativity. (These tokens are single-character literals, which
- ordinarily don't need to be declared. We declare them here to specify
- the associativity.)
- Operator precedence is determined by the line ordering of the
- declarations; the higher the line number of the declaration (lower on
- the page or screen), the higher the precedence. Hence, exponentiation
- has the highest precedence, unary minus (@code{NEG}) is next, followed
- by @samp{*} and @samp{/}, and so on. @xref{Precedence, ,Operator Precedence}.
- The other important new feature is the @code{%prec} in the grammar section
- for the unary minus operator. The @code{%prec} simply instructs Bison that
- the rule @samp{| '-' exp} has the same precedence as @code{NEG}---in this
- case the next-to-highest. @xref{Contextual Precedence, ,Context-Dependent Precedence}.
- Here is a sample run of @file{calc.y}:
- @need 500
- @example
- % calc
- 4 + 4.5 - (34/(8*3+-3))
- 6.880952381
- -56 + 2
- -54
- 3 ^ 2
- 9
- @end example
- @node Simple Error Recovery, Multi-function Calc, Infix Calc, Examples
- @section Simple Error Recovery
- @cindex error recovery, simple
- Up to this point, this manual has not addressed the issue of @dfn{error
- recovery}---how to continue parsing after the parser detects a syntax
- error. All we have handled is error reporting with @code{yyerror}. Recall
- that by default @code{yyparse} returns after calling @code{yyerror}. This
- means that an erroneous input line causes the calculator program to exit.
- Now we show how to rectify this deficiency.
- The Bison language itself includes the reserved word @code{error}, which
- may be included in the grammar rules. In the example below it has
- been added to one of the alternatives for @code{line}:
- @example
- @group
- line: '\n'
- | exp '\n' @{ printf ("\t%.10g\n", $1); @}
- | error '\n' @{ yyerrok; @}
- ;
- @end group
- @end example
- This addition to the grammar allows for simple error recovery in the event
- of a parse error. If an expression that cannot be evaluated is read, the
- error will be recognized by the third rule for @code{line}, and parsing
- will continue. (The @code{yyerror} function is still called upon to print
- its message as well.) The action executes the statement @code{yyerrok}, a
- macro defined automatically by Bison; its meaning is that error recovery is
- complete (@pxref{Error Recovery}). Note the difference between
- @code{yyerrok} and @code{yyerror}; neither one is a misprint.@refill
- This form of error recovery deals with syntax errors. There are other
- kinds of errors; for example, division by zero, which raises an exception
- signal that is normally fatal. A real calculator program must handle this
- signal and use @code{longjmp} to return to @code{main} and resume parsing
- input lines; it would also have to discard the rest of the current line of
- input. We won't discuss this issue further because it is not specific to
- Bison programs.
- @node Multi-function Calc, Exercises, Simple Error Recovery, Examples
- @section Multi-Function Calculator: @code{mfcalc}
- @cindex multi-function calculator
- @cindex @code{mfcalc}
- @cindex calculator, multi-function
- Now that the basics of Bison have been discussed, it is time to move on to
- a more advanced problem. The above calculators provided only five
- functions, @samp{+}, @samp{-}, @samp{*}, @samp{/} and @samp{^}. It would
- be nice to have a calculator that provides other mathematical functions such
- as @code{sin}, @code{cos}, etc.
- It is easy to add new operators to the infix calculator as long as they are
- only single-character literals. The lexical analyzer @code{yylex} passes
- back all non-number characters as tokens, so new grammar rules suffice for
- adding a new operator. But we want something more flexible: built-in
- functions whose syntax has this form:
- @example
- @var{function_name} (@var{argument})
- @end example
- @noindent
- At the same time, we will add memory to the calculator, by allowing you
- to create named variables, store values in them, and use them later.
- Here is a sample session with the multi-function calculator:
- @example
- % mfcalc
- pi = 3.141592653589
- 3.1415926536
- sin(pi)
- 0.0000000000
- alpha = beta1 = 2.3
- 2.3000000000
- alpha
- 2.3000000000
- ln(alpha)
- 0.8329091229
- exp(ln(beta1))
- 2.3000000000
- %
- @end example
- Note that multiple assignment and nested function calls are permitted.
- @menu
- * Decl: Mfcalc Decl. Bison declarations for multi-function calculator.
- * Rules: Mfcalc Rules. Grammar rules for the calculator.
- * Symtab: Mfcalc Symtab. Symbol table management subroutines.
- @end menu
- @node Mfcalc Decl, Mfcalc Rules, , Multi-function Calc
- @subsection Declarations for @code{mfcalc}
- Here are the C and Bison declarations for the multi-function calculator.
- @smallexample
- %@{
- #include <math.h> /* For math functions, cos(), sin(), etc. */
- #include "calc.h" /* Contains definition of `symrec' */
- %@}
- %union @{
- double val; /* For returning numbers. */
- symrec *tptr; /* For returning symbol-table pointers */
- @}
- %token <val> NUM /* Simple double precision number */
- %token <tptr> VAR FNCT /* Variable and Function */
- %type <val> exp
- %right '='
- %left '-' '+'
- %left '*' '/'
- %left NEG /* Negation--unary minus */
- %right '^' /* Exponentiation */
- /* Grammar follows */
- %%
- @end smallexample
- The above grammar introduces only two new features of the Bison language.
- These features allow semantic values to have various data types
- (@pxref{Multiple Types, ,More Than One Value Type}).
- The @code{%union} declaration specifies the entire list of possible types;
- this is instead of defining @code{YYSTYPE}. The allowable types are now
- double-floats (for @code{exp} and @code{NUM}) and pointers to entries in
- the symbol table. @xref{Union Decl, ,The Collection of Value Types}.
- Since values can now have various types, it is necessary to associate a
- type with each grammar symbol whose semantic value is used. These symbols
- are @code{NUM}, @code{VAR}, @code{FNCT}, and @code{exp}. Their
- declarations are augmented with information about their data type (placed
- between angle brackets).
- The Bison construct @code{%type} is used for declaring nonterminal symbols,
- just as @code{%token} is used for declaring token types. We have not used
- @code{%type} before because nonterminal symbols are normally declared
- implicitly by the rules that define them. But @code{exp} must be declared
- explicitly so we can specify its value type. @xref{Type Decl, ,Nonterminal Symbols}.
- @node Mfcalc Rules, Mfcalc Symtab, Mfcalc Decl, Multi-function Calc
- @subsection Grammar Rules for @code{mfcalc}
- Here are the grammar rules for the multi-function calculator.
- Most of them are copied directly from @code{calc}; three rules,
- those which mention @code{VAR} or @code{FNCT}, are new.
- @smallexample
- input: /* empty */
- | input line
- ;
- line:
- '\n'
- | exp '\n' @{ printf ("\t%.10g\n", $1); @}
- | error '\n' @{ yyerrok; @}
- ;
- exp: NUM @{ $$ = $1; @}
- | VAR @{ $$ = $1->value.var; @}
- | VAR '=' exp @{ $$ = $3; $1->value.var = $3; @}
- | FNCT '(' exp ')' @{ $$ = (*($1->value.fnctptr))($3); @}
- | exp '+' exp @{ $$ = $1 + $3; @}
- | exp '-' exp @{ $$ = $1 - $3; @}
- | exp '*' exp @{ $$ = $1 * $3; @}
- | exp '/' exp @{ $$ = $1 / $3; @}
- | '-' exp %prec NEG @{ $$ = -$2; @}
- | exp '^' exp @{ $$ = pow ($1, $3); @}
- | '(' exp ')' @{ $$ = $2; @}
- ;
- /* End of grammar */
- %%
- @end smallexample
- @node Mfcalc Symtab, , Mfcalc Rules, Multi-function Calc
- @subsection The @code{mfcalc} Symbol Table
- @cindex symbol table example
- The multi-function calculator requires a symbol table to keep track of the
- names and meanings of variables and functions. This doesn't affect the
- grammar rules (except for the actions) or the Bison declarations, but it
- requires some additional C functions for support.
- The symbol table itself consists of a linked list of records. Its
- definition, which is kept in the header @file{calc.h}, is as follows. It
- provides for either functions or variables to be placed in the table.
- @smallexample
- @group
- /* Data type for links in the chain of symbols. */
- struct symrec
- @{
- char *name; /* name of symbol */
- int type; /* type of symbol: either VAR or FNCT */
- union @{
- double var; /* value of a VAR */
- double (*fnctptr)(); /* value of a FNCT */
- @} value;
- struct symrec *next; /* link field */
- @};
- @end group
- @group
- typedef struct symrec symrec;
- /* The symbol table: a chain of `struct symrec'. */
- extern symrec *sym_table;
- symrec *putsym ();
- symrec *getsym ();
- @end group
- @end smallexample
- The new version of @code{main} includes a call to @code{init_table}, a
- function that initializes the symbol table. Here it is, and
- @code{init_table} as well:
- @smallexample
- @group
- #include <stdio.h>
- main ()
- @{
- init_table ();
- yyparse ();
- @}
- @end group
- @group
- yyerror (s) /* Called by yyparse on error */
- char *s;
- @{
- printf ("%s\n", s);
- @}
- struct init
- @{
- char *fname;
- double (*fnct)();
- @};
- @end group
- @group
- struct init arith_fncts[]
- = @{
- "sin", sin,
- "cos", cos,
- "atan", atan,
- "ln", log,
- "exp", exp,
- "sqrt", sqrt,
- 0, 0
- @};
- /* The symbol table: a chain of `struct symrec'. */
- symrec *sym_table = (symrec *)0;
- @end group
- @group
- init_table () /* puts arithmetic functions in table. */
- @{
- int i;
- symrec *ptr;
- for (i = 0; arith_fncts[i].fname != 0; i++)
- @{
- ptr = putsym (arith_fncts[i].fname, FNCT);
- ptr->value.fnctptr = arith_fncts[i].fnct;
- @}
- @}
- @end group
- @end smallexample
- By simply editing the initialization list and adding the necessary include
- files, you can add additional functions to the calculator.
- Two important functions allow look-up and installation of symbols in the
- symbol table. The function @code{putsym} is passed a name and the type
- (@code{VAR} or @code{FNCT}) of the object to be installed. The object is
- linked to the front of the list, and a pointer to the object is returned.
- The function @code{getsym} is passed the name of the symbol to look up. If
- found, a pointer to that symbol is returned; otherwise zero is returned.
- @smallexample
- symrec *
- putsym (sym_name,sym_type)
- char *sym_name;
- int sym_type;
- @{
- symrec *ptr;
- ptr = (symrec *) malloc (sizeof (symrec));
- ptr->name = (char *) malloc (strlen (sym_name) + 1);
- strcpy (ptr->name,sym_name);
- ptr->type = sym_type;
- ptr->value.var = 0; /* set value to 0 even if fctn. */
- ptr->next = (struct symrec *)sym_table;
- sym_table = ptr;
- return ptr;
- @}
- symrec *
- getsym (sym_name)
- char *sym_name;
- @{
- symrec *ptr;
- for (ptr = sym_table; ptr != (symrec *) 0;
- ptr = (symrec *)ptr->next)
- if (strcmp (ptr->name,sym_name) == 0)
- return ptr;
- return 0;
- @}
- @end smallexample
- The function @code{yylex} must now recognize variables, numeric values, and
- the single-character arithmetic operators. Strings of alphanumeric
- characters with a leading nondigit are recognized as either variables or
- functions depending on what the symbol table says about them.
- The string is passed to @code{getsym} for look up in the symbol table. If
- the name appears in the table, a pointer to its location and its type
- (@code{VAR} or @code{FNCT}) is returned to @code{yyparse}. If it is not
- already in the table, then it is installed as a @code{VAR} using
- @code{putsym}. Again, a pointer and its type (which must be @code{VAR}) is
- returned to @code{yyparse}.@refill
- No change is needed in the handling of numeric values and arithmetic
- operators in @code{yylex}.
- @smallexample
- @group
- #include <ctype.h>
- yylex ()
- @{
- int c;
- /* Ignore whitespace, get first nonwhite character. */
- while ((c = getchar ()) == ' ' || c == '\t');
- if (c == EOF)
- return 0;
- @end group
- @group
- /* Char starts a number => parse the number. */
- if (c == '.' || isdigit (c))
- @{
- ungetc (c, stdin);
- scanf ("%lf", &yylval.val);
- return NUM;
- @}
- @end group
- @group
- /* Char starts an identifier => read the name. */
- if (isalpha (c))
- @{
- symrec *s;
- static char *symbuf = 0;
- static int length = 0;
- int i;
- @end group
- @group
- /* Initially make the buffer long enough
- for a 40-character symbol name. */
- if (length == 0)
- length = 40, symbuf = (char *)malloc (length + 1);
- i = 0;
- do
- @end group
- @group
- @{
- /* If buffer is full, make it bigger. */
- if (i == length)
- @{
- length *= 2;
- symbuf = (char *)realloc (symbuf, length + 1);
- @}
- /* Add this character to the buffer. */
- symbuf[i++] = c;
- /* Get another character. */
- c = getchar ();
- @}
- @end group
- @group
- while (c != EOF && isalnum (c));
- ungetc (c, stdin);
- symbuf[i] = '\0';
- @end group
- @group
- s = getsym (symbuf);
- if (s == 0)
- s = putsym (symbuf, VAR);
- yylval.tptr = s;
- return s->type;
- @}
- /* Any other character is a token by itself. */
- return c;
- @}
- @end group
- @end smallexample
- This program is both powerful and flexible. You may easily add new
- functions, and it is a simple job to modify this code to install predefined
- variables such as @code{pi} or @code{e} as well.
- @node Exercises, , Multi-function Calc, Examples
- @section Exercises
- @cindex exercises
- @enumerate
- @item
- Add some new functions from @file{math.h} to the initialization list.
- @item
- Add another array that contains constants and their values. Then
- modify @code{init_table} to add these constants to the symbol table.
- It will be easiest to give the constants type @code{VAR}.
- @item
- Make the program report an error if the user refers to an
- uninitialized variable in any way except to store a value in it.
- @end enumerate
- @node Grammar File, Interface, Examples, Top
- @chapter Bison Grammar Files
- Bison takes as input a context-free grammar specification and produces a
- C-language function that recognizes correct instances of the grammar.
- The Bison grammar input file conventionally has a name ending in @samp{.y}.
- @menu
- * Grammar Outline:: Overall layout of the grammar file.
- * Symbols:: Terminal and nonterminal symbols.
- * Rules:: How to write grammar rules.
- * Recursion:: Writing recursive rules.
- * Semantics:: Semantic values and actions.
- * Declarations:: All kinds of Bison declarations are described here.
- * Multiple Parsers:: Putting more than one Bison parser in one program.
- @end menu
- @node Grammar Outline, Symbols, , Grammar File
- @section Outline of a Bison Grammar
- A Bison grammar file has four main sections, shown here with the
- appropriate delimiters:
- @example
- %@{
- @var{C declarations}
- %@}
- @var{Bison declarations}
- %%
- @var{Grammar rules}
- %%
- @var{Additional C code}
- @end example
- Comments enclosed in @samp{/* @dots{} */} may appear in any of the sections.
- @menu
- * C Declarations:: Syntax and usage of the C declarations section.
- * Bison Declarations:: Syntax and usage of the Bison declarations section.
- * Grammar Rules:: Syntax and usage of the grammar rules section.
- * C Code:: Syntax and usage of the additional C code section.
- @end menu
- @node C Declarations, Bison Declarations, , Grammar Outline
- @subsection The C Declarations Section
- @cindex C declarations section
- @cindex declarations, C
- The @var{C declarations} section contains macro definitions and
- declarations of functions and variables that are used in the actions in the
- grammar rules. These are copied to the beginning of the parser file so
- that they precede the definition of @code{yyparse}. You can use
- @samp{#include} to get the declarations from a header file. If you don't
- need any C declarations, you may omit the @samp{%@{} and @samp{%@}}
- delimiters that bracket this section.
- @node Bison Declarations, Grammar Rules, C Declarations, Grammar Outline
- @subsection The Bison Declarations Section
- @cindex Bison declarations (introduction)
- @cindex declarations, Bison (introduction)
- The @var{Bison declarations} section contains declarations that define
- terminal and nonterminal symbols, specify precedence, and so on.
- In some simple grammars you may not need any declarations.
- @xref{Declarations, ,Bison Declarations}.
- @node Grammar Rules, C Code, Bison Declarations, Grammar Outline
- @subsection The Grammar Rules Section
- @cindex grammar rules section
- @cindex rules section for grammar
- The @dfn{grammar rules} section contains one or more Bison grammar
- rules, and nothing else. @xref{Rules, ,Syntax of Grammar Rules}.
- There must always be at least one grammar rule, and the first
- @samp{%%} (which precedes the grammar rules) may never be omitted even
- if it is the first thing in the file.
- @node C Code, , Grammar Rules, Grammar Outline
- @subsection The Additional C Code Section
- @cindex additional C code section
- @cindex C code, section for additional
- The @var{additional C code} section is copied verbatim to the end of
- the parser file, just as the @var{C declarations} section is copied to
- the beginning. This is the most convenient place to put anything
- that you want to have in the parser file but which need not come before
- the definition of @code{yyparse}. For example, the definitions of
- @code{yylex} and @code{yyerror} often go here. @xref{Interface, ,Parser C-Language Interface}.
- If the last section is empty, you may omit the @samp{%%} that separates it
- from the grammar rules.
- The Bison parser itself contains many static variables whose names start
- with @samp{yy} and many macros whose names start with @samp{YY}. It is a
- good idea to avoid using any such names (except those documented in this
- manual) in the additional C code section of the grammar file.
- @node Symbols, Rules, Grammar Outline, Grammar File
- @section Symbols, Terminal and Nonterminal
- @cindex nonterminal symbol
- @cindex terminal symbol
- @cindex token type
- @cindex symbol
- @dfn{Symbols} in Bison grammars represent the grammatical classifications
- of the language.
- A @dfn{terminal symbol} (also known as a @dfn{token type}) represents a
- class of syntactically equivalent tokens. You use the symbol in grammar
- rules to mean that a token in that class is allowed. The symbol is
- represented in the Bison parser by a numeric code, and the @code{yylex}
- function returns a token type code to indicate what kind of token has been
- read. You don't need to know what the code value is; you can use the
- symbol to stand for it.
- A @dfn{nonterminal symbol} stands for a class of syntactically equivalent
- groupings. The symbol name is used in writing grammar rules. By convention,
- it should be all lower case.
- Symbol names can contain letters, digits (not at the beginning),
- underscores and periods. Periods make sense only in nonterminals.
- There are two ways of writing terminal symbols in the grammar:
- @itemize @bullet
- @item
- A @dfn{named token type} is written with an identifier, like an
- identifier in C. By convention, it should be all upper case. Each
- such name must be defined with a Bison declaration such as
- @code{%token}. @xref{Token Decl, ,Token Type Names}.
- @item
- @cindex character token
- @cindex literal token
- @cindex single-character literal
- A @dfn{character token type} (or @dfn{literal token}) is written in
- the grammar using the same syntax used in C for character constants;
- for example, @code{'+'} is a character token type. A character token
- type doesn't need to be declared unless you need to specify its
- semantic value data type (@pxref{Value Type, ,Data Types of Semantic Values}), associativity, or
- precedence (@pxref{Precedence, ,Operator Precedence}).
- By convention, a character token type is used only to represent a
- token that consists of that particular character. Thus, the token
- type @code{'+'} is used to represent the character @samp{+} as a
- token. Nothing enforces this convention, but if you depart from it,
- your program will confuse other readers.
- All the usual escape sequences used in character literals in C can be
- used in Bison as well, but you must not use the null character as a
- character literal because its ASCII code, zero, is the code
- @code{yylex} returns for end-of-input (@pxref{Calling Convention, ,Calling Convention for @code{yylex}}).
- @end itemize
- How you choose to write a terminal symbol has no effect on its
- grammatical meaning. That depends only on where it appears in rules and
- on when the parser function returns that symbol.
- The value returned by @code{yylex} is always one of the terminal symbols
- (or 0 for end-of-input). Whichever way you write the token type in the
- grammar rules, you write it the same way in the definition of @code{yylex}.
- The numeric code for a character token type is simply the ASCII code for
- the character, so @code{yylex} can use the identical character constant to
- generate the requisite code. Each named token type becomes a C macro in
- the parser file, so @code{yylex} can use the name to stand for the code.
- (This is why periods don't make sense in terminal symbols.)
- @xref{Calling Convention, ,Calling Convention for @code{yylex}}.
- If @code{yylex} is defined in a separate file, you need to arrange for the
- token-type macro definitions to be available there. Use the @samp{-d}
- option when you run Bison, so that it will write these macro definitions
- into a separate header file @file{@var{name}.tab.h} which you can include
- in the other source files that need it. @xref{Invocation, ,Invoking Bison}.
- The symbol @code{error} is a terminal symbol reserved for error recovery
- (@pxref{Error Recovery}); you shouldn't use it for any other purpose.
- In particular, @code{yylex} should never return this value.
- @node Rules, Recursion, Symbols, Grammar File
- @section Syntax of Grammar Rules
- @cindex rule syntax
- @cindex grammar rule syntax
- @cindex syntax of grammar rules
- A Bison grammar rule has the following general form:
- @example
- @group
- @var{result}: @var{components}@dots{}
- ;
- @end group
- @end example
- @noindent
- where @var{result} is the nonterminal symbol that this rule describes
- and @var{components} are various terminal and nonterminal symbols that
- are put together by this rule (@pxref{Symbols}).
- For example,
- @example
- @group
- exp: exp '+' exp
- ;
- @end group
- @end example
- @noindent
- says that two groupings of type @code{exp}, with a @samp{+} token in between,
- can be combined into a larger grouping of type @code{exp}.
- Whitespace in rules is significant only to separate symbols. You can add
- extra whitespace as you wish.
- Scattered among the components can be @var{actions} that determine
- the semantics of the rule. An action looks like this:
- @example
- @{@var{C statements}@}
- @end example
- @noindent
- Usually there is only one action and it follows the components.
- @xref{Actions}.
- @findex |
- Multiple rules for the same @var{result} can be written separately or can
- be joined with the vertical-bar character @samp{|} as follows:
- @ifinfo
- @example
- @var{result}: @var{rule1-components}@dots{}
- | @var{rule2-components}@dots{}
- @dots{}
- ;
- @end example
- @end ifinfo
- @iftex
- @example
- @group
- @var{result}: @var{rule1-components}@dots{}
- | @var{rule2-components}@dots{}
- @dots{}
- ;
- @end group
- @end example
- @end iftex
- @noindent
- They are still considered distinct rules even when joined in this way.
- If @var{components} in a rule is empty, it means that @var{result} can
- match the empty string. For example, here is how to define a
- comma-separated sequence of zero or more @code{exp} groupings:
- @example
- @group
- expseq: /* empty */
- | expseq1
- ;
- @end group
- @group
- expseq1: exp
- | expseq1 ',' exp
- ;
- @end group
- @end example
- @noindent
- It is customary to write a comment @samp{/* empty */} in each rule
- with no components.
- @node Recursion, Semantics, Rules, Grammar File
- @section Recursive Rules
- @cindex recursive rule
- A rule is called @dfn{recursive} when its @var{result} nonterminal appears
- also on its right hand side. Nearly all Bison grammars need to use
- recursion, because that is the only way to define a sequence of any number
- of somethings. Consider this recursive definition of a comma-separated
- sequence of one or more expressions:
- @example
- @group
- expseq1: exp
- | expseq1 ',' exp
- ;
- @end group
- @end example
- @cindex left recursion
- @cindex right recursion
- @noindent
- Since the recursive use of @code{expseq1} is the leftmost symbol in the
- right hand side, we call this @dfn{left recursion}. By contrast, here
- the same construct is defined using @dfn{right recursion}:
- @example
- @group
- expseq1: exp
- | exp ',' expseq1
- ;
- @end group
- @end example
- @noindent
- Any kind of sequence can be defined using either left recursion or
- right recursion, but you should always use left recursion, because it
- can parse a sequence of any number of elements with bounded stack
- space. Right recursion uses up space on the Bison stack in proportion
- to the number of elements in the sequence, because all the elements
- must be shifted onto the stack before the rule can be applied even
- once. @xref{Algorithm, ,The Bison Parser Algorithm }, for
- further explanation of this.
- @cindex mutual recursion
- @dfn{Indirect} or @dfn{mutual} recursion occurs when the result of the
- rule does not appear directly on its right hand side, but does appear
- in rules for other nonterminals which do appear on its right hand
- side.
- For example:
- @example
- @group
- expr: primary
- | primary '+' primary
- ;
- @end group
- @group
- primary: constant
- | '(' expr ')'
- ;
- @end group
- @end example
- @noindent
- defines two mutually-recursive nonterminals, since each refers to the
- other.
- @node Semantics, Declarations, Recursion, Grammar File
- @section Defining Language Semantics
- @cindex defining language semantics
- @cindex language semantics, defining
- The grammar rules for a language determine only the syntax. The semantics
- are determined by the semantic values associated with various tokens and
- groupings, and by the actions taken when various groupings are recognized.
- For example, the calculator calculates properly because the value
- associated with each expression is the proper number; it adds properly
- because the action for the grouping @w{@samp{@var{x} + @var{y}}} is to add
- the numbers associated with @var{x} and @var{y}.
- @menu
- * Value Type:: Specifying one data type for all semantic values.
- * Multiple Types:: Specifying several alternative data types.
- * Actions:: An action is the semantic definition of a grammar rule.
- * Action Types:: Specifying data types for actions to operate on.
- * Mid-Rule Actions:: Most actions go at the end of a rule.
- This says when, why and how to use the exceptional
- action in the middle of a rule.
- @end menu
- @node Value Type, Multiple Types, , Semantics
- @subsection Data Types of Semantic Values
- @cindex semantic value type
- @cindex value type, semantic
- @cindex data types of semantic values
- @cindex default data type
- In a simple program it may be sufficient to use the same data type for
- the semantic values of all language constructs. This was true in the
- RPN and infix calculator examples (@pxref{RPN Calc, ,Reverse Polish Notation Calculator}).
- Bison's default is to use type @code{int} for all semantic values. To
- specify some other type, define @code{YYSTYPE} as a macro, like this:
- @example
- #define YYSTYPE double
- @end example
- @noindent
- This macro definition must go in the C declarations section of the grammar
- file (@pxref{Grammar Outline, ,Outline of a Bison Grammar}).
- @node Multiple Types, Actions, Value Type, Semantics
- @subsection More Than One Value Type
- In most programs, you will need different data types for different kinds
- of tokens and groupings. For example, a numeric constant may need type
- @code{int} or @code{long}, while a string constant needs type @code{char *},
- and an identifier might need a pointer to an entry in the symbol table.
- To use more than one data type for semantic values in one parser, Bison
- requires you to do two things:
- @itemize @bullet
- @item
- Specify the entire collection of possible data types, with the
- @code{%union} Bison declaration (@pxref{Union Decl, ,The Collection of Value Types}).
- @item
- Choose one of those types for each symbol (terminal or nonterminal)
- for which semantic values are used. This is done for tokens with the
- @code{%token} Bison declaration (@pxref{Token Decl, ,Token Type Names}) and for groupings
- with the @code{%type} Bison declaration (@pxref{Type Decl, ,Nonterminal Symbols}).
- @end itemize
- @node Actions, Action Types, Multiple Types, Semantics
- @subsection Actions
- @cindex action
- @vindex $$
- @vindex $@var{n}
- An action accompanies a syntactic rule and contains C code to be executed
- each time an instance of that rule is recognized. The task of most actions
- is to compute a semantic value for the grouping built by the rule from the
- semantic values associated with tokens or smaller groupings.
- An action consists of C statements surrounded by braces, much like a
- compound statement in C. It can be placed at any position in the rule; it
- is executed at that position. Most rules have just one action at the end
- of the rule, following all the components. Actions in the middle of a rule
- are tricky and used only for special purposes (@pxref{Mid-Rule Actions, ,Actions in Mid-Rule}).
- The C code in an action can refer to the semantic values of the components
- matched by the rule with the construct @code{$@var{n}}, which stands for
- the value of the @var{n}th component. The semantic value for the grouping
- being constructed is @code{$$}. (Bison translates both of these constructs
- into array element references when it copies the actions into the parser
- file.)
- Here is a typical example:
- @example
- @group
- exp: @dots{}
- | exp '+' exp
- @{ $$ = $1 + $3; @}
- @end group
- @end example
- @noindent
- This rule constructs an @code{exp} from two smaller @code{exp} groupings
- connected by a plus-sign token. In the action, @code{$1} and @code{$3}
- refer to the semantic values of the two component @code{exp} groupings,
- which are the first and third symbols on the right hand side of the rule.
- The sum is stored into @code{$$} so that it becomes the semantic value of
- the addition-expression just recognized by the rule. If there were a
- useful semantic value associated with the @samp{+} token, it could be
- referred to as @code{$2}.@refill
- @cindex default action
- If you don't specify an action for a rule, Bison supplies a default:
- @w{@code{$$ = $1}.} Thus, the value of the first symbol in the rule becomes
- the value of the whole rule. Of course, the default rule is valid only
- if the two data types match. There is no meaningful default action for
- an empty rule; every empty rule must have an explicit action unless the
- rule's value does not matter.
- @code{$@var{n}} with @var{n} zero or negative is allowed for reference
- to tokens and groupings on the stack @emph{before} those that match the
- current rule. This is a very risky practice, and to use it reliably
- you must be certain of the context in which the rule is applied. Here
- is a case in which you can use this reliably:
- @example
- @group
- foo: expr bar '+' expr @{ @dots{} @}
- | expr bar '-' expr @{ @dots{} @}
- ;
- @end group
- @group
- bar: /* empty */
- @{ previous_expr = $0; @}
- ;
- @end group
- @end example
- As long as @code{bar} is used only in the fashion shown here, @code{$0}
- always refers to the @code{expr} which precedes @code{bar} in the
- definition of @code{foo}.
- @node Action Types, Mid-Rule Actions, Actions, Semantics
- @subsection Data Types of Values in Actions
- @cindex action data types
- @cindex data types in actions
- If you have chosen a single data type for semantic values, the @code{$$}
- and @code{$@var{n}} constructs always have that data type.
- If you have used @code{%union} to specify a variety of data types, then you
- must declare a choice among these types for each terminal or nonterminal
- symbol that can have a semantic value. Then each time you use @code{$$} or
- @code{$@var{n}}, its data type is determined by which symbol it refers to
- in the rule. In this example,@refill
- @example
- @group
- exp: @dots{}
- | exp '+' exp
- @{ $$ = $1 + $3; @}
- @end group
- @end example
- @noindent
- @code{$1} and @code{$3} refer to instances of @code{exp}, so they all
- have the data type declared for the nonterminal symbol @code{exp}. If
- @code{$2} were used, it would have the data type declared for the
- terminal symbol @code{'+'}, whatever that might be.@refill
- Alternatively, you can specify the data type when you refer to the value,
- by inserting @samp{<@var{type}>} after the @samp{$} at the beginning of the
- reference. For example, if you have defined types as shown here:
- @example
- @group
- %union @{
- int itype;
- double dtype;
- @}
- @end group
- @end example
- @noindent
- then you can write @code{$<itype>1} to refer to the first subunit of the
- rule as an integer, or @code{$<dtype>1} to refer to it as a double.
- @node Mid-Rule Actions, , Action Types, Semantics
- @subsection Actions in Mid-Rule
- @cindex actions in mid-rule
- @cindex mid-rule actions
- Occasionally it is useful to put an action in the middle of a rule.
- These actions are written just like usual end-of-rule actions, but they
- are executed before the parser even recognizes the following components.
- A mid-rule action may refer to the components preceding it using
- @code{$@var{n}}, but it may not refer to subsequent components because
- it is run before they are parsed.
- The mid-rule action itself counts as one of the components of the rule.
- This makes a difference when there is another action later in the same rule
- (and usually there is another at the end): you have to count the actions
- along with the symbols when working out which number @var{n} to use in
- @code{$@var{n}}.
- The mid-rule action can also have a semantic value. The action can set
- its value with an assignment to @code{$$}, and actions later in the rule
- can refer to the value using @code{$@var{n}}. Since there is no symbol
- to name the action, there is no way to declare a data type for the value
- in advance, so you must use the @samp{$<@dots{}>} construct to specify a
- data type each time you refer to this value.
- There is no way to set the value of the entire rule with a mid-rule
- action, because assignments to @code{$$} do not have that effect. The
- only way to set the value for the entire rule is with an ordinary action
- at the end of the rule.
- Here is an example from a hypothetical compiler, handling a @code{let}
- statement that looks like @samp{let (@var{variable}) @var{statement}} and
- serves to create a variable named @var{variable} temporarily for the
- duration of @var{statement}. To parse this construct, we must put
- @var{variable} into the symbol table while @var{statement} is parsed, then
- remove it afterward. Here is how it is done:
- @example
- @group
- stmt: LET '(' var ')'
- @{ $<context>$ = push_context ();
- declare_variable ($3); @}
- stmt @{ $$ = $6;
- pop_context ($<context>5); @}
- @end group
- @end example
- @noindent
- As soon as @samp{let (@var{variable})} has been recognized, the first
- action is run. It saves a copy of the current semantic context (the
- list of accessible variables) as its semantic value, using alternative
- @code{context} in the data-type union. Then it calls
- @code{declare_variable} to add the new variable to that list. Once the
- first action is finished, the embedded statement @code{stmt} can be
- parsed. Note that the mid-rule action is component number 5, so the
- @samp{stmt} is component number 6.
- After the embedded statement is parsed, its semantic value becomes the
- value of the entire @code{let}-statement. Then the semantic value from the
- earlier action is used to restore the prior list of variables. This
- removes the temporary @code{let}-variable from the list so that it won't
- appear to exist while the rest of the program is parsed.
- Taking action before a rule is completely recognized often leads to
- conflicts since the parser must commit to a parse in order to execute the
- action. For example, the following two rules, without mid-rule actions,
- can coexist in a working parser because the parser can shift the open-brace
- token and look at what follows before deciding whether there is a
- declaration or not:
- @example
- @group
- compound: '@{' declarations statements '@}'
- | '@{' statements '@}'
- ;
- @end group
- @end example
- @noindent
- But when we add a mid-rule action as follows, the rules become nonfunctional:
- @example
- @group
- compound: @{ prepare_for_local_variables (); @}
- '@{' declarations statements '@}'
- @end group
- @group
- | '@{' statements '@}'
- ;
- @end group
- @end example
- @noindent
- Now the parser is forced to decide whether to run the mid-rule action
- when it has read no farther than the open-brace. In other words, it
- must commit to using one rule or the other, without sufficient
- information to do it correctly. (The open-brace token is what is called
- the @dfn{look-ahead} token at this time, since the parser is still
- deciding what to do about it. @xref{Look-Ahead, ,Look-Ahead Tokens}.)
- You might think that you could correct the problem by putting identical
- actions into the two rules, like this:
- @example
- @group
- compound: @{ prepare_for_local_variables (); @}
- '@{' declarations statements '@}'
- | @{ prepare_for_local_variables (); @}
- '@{' statements '@}'
- ;
- @end group
- @end example
- @noindent
- But this does not help, because Bison does not realize that the two actions
- are identical. (Bison never tries to understand the C code in an action.)
- If the grammar is such that a declaration can be distinguished from a
- statement by the first token (which is true in C), then one solution which
- does work is to put the action after the open-brace, like this:
- @example
- @group
- compound: '@{' @{ prepare_for_local_variables (); @}
- declarations statements '@}'
- | '@{' statements '@}'
- ;
- @end group
- @end example
- @noindent
- Now the first token of the following declaration or statement,
- which would in any case tell Bison which rule to use, can still do so.
- Another solution is to bury the action inside a nonterminal symbol which
- serves as a subroutine:
- @example
- @group
- subroutine: /* empty */
- @{ prepare_for_local_variables (); @}
- ;
- @end group
- @group
- compound: subroutine
- '@{' declarations statements '@}'
- | subroutine
- '@{' statements '@}'
- ;
- @end group
- @end example
- @noindent
- Now Bison can execute the action in the rule for @code{subroutine} without
- deciding which rule for @code{compound} it will eventually use. Note that
- the action is now at the end of its rule. Any mid-rule action can be
- converted to an end-of-rule action in this way, and this is what Bison
- actually does to implement mid-rule actions.
- @node Declarations, Multiple Parsers, Semantics, Grammar File
- @section Bison Declarations
- @cindex declarations, Bison
- @cindex Bison declarations
- The @dfn{Bison declarations} section of a Bison grammar defines the symbols
- used in formulating the grammar and the data types of semantic values.
- @xref{Symbols}.
- All token type names (but not single-character literal tokens such as
- @code{'+'} and @code{'*'}) must be declared. Nonterminal symbols must be
- declared if you need to specify which data type to use for the semantic
- value (@pxref{Multiple Types, ,More Than One Value Type}).
- The first rule in the file also specifies the start symbol, by default.
- If you want some other symbol to be the start symbol, you must declare
- it explicitly (@pxref{Language and Grammar, ,Languages and Context-Free Grammars}).
- @menu
- * Token Decl:: Declaring terminal symbols.
- * Precedence Decl:: Declaring terminals with precedence and associativity.
- * Union Decl:: Declaring the set of all semantic value types.
- * Type Decl:: Declaring the choice of type for a nonterminal symbol.
- * Expect Decl:: Suppressing warnings about shift/reduce conflicts.
- * Start Decl:: Specifying the start symbol.
- * Pure Decl:: Requesting a reentrant parser.
- * Decl Summary:: Table of all Bison declarations.
- @end menu
- @node Token Decl, Precedence Decl, , Declarations
- @subsection Token Type Names
- @cindex declaring token type names
- @cindex token type names, declaring
- @findex %token
- The basic way to declare a token type name (terminal symbol) is as follows:
- @example
- %token @var{name}
- @end example
- Bison will convert this into a @code{#define} directive in
- the parser, so that the function @code{yylex} (if it is in this file)
- can use the name @var{name} to stand for this token type's code.
- Alternatively, you can use @code{%left}, @code{%right}, or @code{%nonassoc}
- instead of @code{%token}, if you wish to specify precedence.
- @xref{Precedence Decl, ,Operator Precedence}.
- You can explicitly specify the numeric code for a token type by appending
- an integer value in the field immediately following the token name:
- @example
- %token NUM 300
- @end example
- @noindent
- It is generally best, however, to let Bison choose the numeric codes for
- all token types. Bison will automatically select codes that don't conflict
- with each other or with ASCII characters.
- In the event that the stack type is a union, you must augment the
- @code{%token} or other token declaration to include the data type
- alternative delimited by angle-brackets (@pxref{Multiple Types, ,More Than One Value Type}).
- For example:
- @example
- @group
- %union @{ /* define stack type */
- double val;
- symrec *tptr;
- @}
- %token <val> NUM /* define token NUM and its type */
- @end group
- @end example
- @node Precedence Decl, Union Decl, Token Decl, Declarations
- @subsection Operator Precedence
- @cindex precedence declarations
- @cindex declaring operator precedence
- @cindex operator precedence, declaring
- Use the @code{%left}, @code{%right} or @code{%nonassoc} declaration to
- declare a token and specify its precedence and associativity, all at
- once. These are called @dfn{precedence declarations}.
- @xref{Precedence, ,Operator Precedence}, for general information on operator precedence.
- The syntax of a precedence declaration is the same as that of
- @code{%token}: either
- @example
- %left @var{symbols}@dots{}
- @end example
- @noindent
- or
- @example
- %left <@var{type}> @var{symbols}@dots{}
- @end example
- And indeed any of these declarations serves the purposes of @code{%token}.
- But in addition, they specify the associativity and relative precedence for
- all the @var{symbols}:
- @itemize @bullet
- @item
- The associativity of an operator @var{op} determines how repeated uses
- of the operator nest: whether @samp{@var{x} @var{op} @var{y} @var{op}
- @var{z}} is parsed by grouping @var{x} with @var{y} first or by
- grouping @var{y} with @var{z} first. @code{%left} specifies
- left-associativity (grouping @var{x} with @var{y} first) and
- @code{%right} specifies right-associativity (grouping @var{y} with
- @var{z} first). @code{%nonassoc} specifies no associativity, which
- means that @samp{@var{x} @var{op} @var{y} @var{op} @var{z}} is
- considered a syntax error.
- @item
- The precedence of an operator determines how it nests with other operators.
- All the tokens declared in a single precedence declaration have equal
- precedence and nest together according to their associativity.
- When two tokens declared in different precedence declarations associate,
- the one declared later has the higher precedence and is grouped first.
- @end itemize
- @node Union Decl, Type Decl, Precedence Decl, Declarations
- @subsection The Collection of Value Types
- @cindex declaring value types
- @cindex value types, declaring
- @findex %union
- The @code{%union} declaration specifies the entire collection of possible
- data types for semantic values. The keyword @code{%union} is followed by a
- pair of braces containing the same thing that goes inside a @code{union} in
- C.
- For example:
- @example
- @group
- %union @{
- double val;
- symrec *tptr;
- @}
- @end group
- @end example
- @noindent
- This says that the two alternative types are @code{double} and @code{symrec
- *}. They are given names @code{val} and @code{tptr}; these names are used
- in the @code{%token} and @code{%type} declarations to pick one of the types
- for a terminal or nonterminal symbol (@pxref{Type Decl, ,Nonterminal Symbols}).
- Note that, unlike making a @code{union} declaration in C, you do not write
- a semicolon after the closing brace.
- @node Type Decl, Expect Decl, Union Decl, Declarations
- @subsection Nonterminal Symbols
- @cindex declaring value types, nonterminals
- @cindex value types, nonterminals, declaring
- @findex %type
- @noindent
- When you use @code{%union} to specify multiple value types, you must
- declare the value type of each nonterminal symbol for which values are
- used. This is done with a @code{%type} declaration, like this:
- @example
- %type <@var{type}> @var{nonterminal}@dots{}
- @end example
- @noindent
- Here @var{nonterminal} is the name of a nonterminal symbol, and @var{type}
- is the name given in the @code{%union} to the alternative that you want
- (@pxref{Union Decl, ,The Collection of Value Types}). You can give any number of nonterminal symbols in
- the same @code{%type} declaration, if they have the same value type. Use
- spaces to separate the symbol names.
- @node Expect Decl, Start Decl, Type Decl, Declarations
- @subsection Suppressing Conflict Warnings
- @cindex suppressing conflict warnings
- @cindex preventing warnings about conflicts
- @cindex warnings, preventing
- @cindex conflicts, suppressing warnings of
- @findex %expect
- Bison normally warns if there are any conflicts in the grammar
- (@pxref{Shift/Reduce, ,Shift/Reduce Conflicts}), but most real grammars have harmless shift/reduce
- conflicts which are resolved in a predictable way and would be difficult to
- eliminate. It is desirable to suppress the warning about these conflicts
- unless the number of conflicts changes. You can do this with the
- @code{%expect} declaration.
- The declaration looks like this:
- @example
- %expect @var{n}
- @end example
- Here @var{n} is a decimal integer. The declaration says there should be no
- warning if there are @var{n} shift/reduce conflicts and no reduce/reduce
- conflicts. The usual warning is given if there are either more or fewer
- conflicts, or if there are any reduce/reduce conflicts.
- In general, using @code{%expect} involves these steps:
- @itemize @bullet
- @item
- Compile your grammar without @code{%expect}. Use the @samp{-v} option
- to get a verbose list of where the conflicts occur. Bison will also
- print the number of conflicts.
- @item
- Check each of the conflicts to make sure that Bison's default
- resolution is what you really want. If not, rewrite the grammar and
- go back to the beginning.
- @item
- Add an @code{%expect} declaration, copying the number @var{n} from the
- number which Bison printed.
- @end itemize
- Now Bison will stop annoying you about the conflicts you have checked, but
- it will warn you again if changes in the grammar result in additional
- conflicts.
- @node Start Decl, Pure Decl, Expect Decl, Declarations
- @subsection The Start-Symbol
- @cindex declaring the start symbol
- @cindex start symbol, declaring
- @cindex default start symbol
- @findex %start
- Bison assumes by default that the start symbol for the grammar is the first
- nonterminal specified in the grammar specification section. The programmer
- may override this restriction with the @code{%start} declaration as follows:
- @example
- %start @var{symbol}
- @end example
- @node Pure Decl, Decl Summary, Start Decl, Declarations
- @subsection A Pure (Reentrant) Parser
- @cindex reentrant parser
- @cindex pure parser
- @findex %pure_parser
- A @dfn{reentrant} program is one which does not alter in the course of
- execution; in other words, it consists entirely of @dfn{pure} (read-only)
- code. Reentrancy is important whenever asynchronous execution is possible;
- for example, a nonreentrant program may not be safe to call from a signal
- handler. In systems with multiple threads of control, a nonreentrant
- program must be called only within interlocks.
- The Bison parser is not normally a reentrant program, because it uses
- statically allocated variables for communication with @code{yylex}. These
- variables include @code{yylval} and @code{yylloc}.
- The Bison declaration @code{%pure_parser} says that you want the parser
- to be reentrant. It looks like this:
- @example
- %pure_parser
- @end example
- The effect is that the two communication variables become local
- variables in @code{yyparse}, and a different calling convention is used
- for the lexical analyzer function @code{yylex}. @xref{Pure Calling,
- ,Calling Conventions for Pure Parsers}, for the details of this. The
- variable @code{yynerrs} also becomes local in @code{yyparse}
- (@pxref{Error Reporting, ,The Error Reporting Function @code{yyerror}}).
- The convention for calling @code{yyparse} itself is unchanged.
- @node Decl Summary, , Pure Decl, Declarations
- @subsection Bison Declaration Summary
- @cindex Bison declaration summary
- @cindex declaration summary
- @cindex summary, Bison declaration
- Here is a summary of all Bison declarations:
- @table @code
- @item %union
- Declare the collection of data types that semantic values may have
- (@pxref{Union Decl, ,The Collection of Value Types}).
- @item %token
- Declare a terminal symbol (token type name) with no precedence
- or associativity specified (@pxref{Token Decl, ,Token Type Names}).
- @item %right
- Declare a terminal symbol (token type name) that is right-associative
- (@pxref{Precedence Decl, ,Operator Precedence}).
- @item %left
- Declare a terminal symbol (token type name) that is left-associative
- (@pxref{Precedence Decl, ,Operator Precedence}).
- @item %nonassoc
- Declare a terminal symbol (token type name) that is nonassociative
- (using it in a way that would be associative is a syntax error)
- (@pxref{Precedence Decl, ,Operator Precedence}).
- @item %type
- Declare the type of semantic values for a nonterminal symbol
- (@pxref{Type Decl, ,Nonterminal Symbols}).
- @item %start
- Specify the grammar's start symbol (@pxref{Start Decl, ,The Start-Symbol}).
- @item %expect
- Declare the expected number of shift-reduce conflicts
- (@pxref{Expect Decl, ,Suppressing Conflict Warnings}).
- @item %pure_parser
- Request a pure (reentrant) parser program (@pxref{Pure Decl, ,A Pure (Reentrant) Parser}).
- @end table
- @node Multiple Parsers, , Declarations, Grammar File
- @section Multiple Parsers in the Same Program
- Most programs that use Bison parse only one language and therefore contain
- only one Bison parser. But what if you want to parse more than one
- language with the same program? Then you need to avoid a name conflict
- between different definitions of @code{yyparse}, @code{yylval}, and so on.
- The easy way to do this is to use the option @samp{-p @var{prefix}}
- (@pxref{Invocation, ,Invoking Bison}). This renames the interface functions and
- variables of the Bison parser to start with @var{prefix} instead of
- @samp{yy}. You can use this to give each parser distinct names that do
- not conflict.
- The precise list of symbols renamed is @code{yyparse}, @code{yylex},
- @code{yyerror}, @code{yynerrs}, @code{yylval}, @code{yychar} and
- @code{yydebug}. For example, if you use @samp{-p c}, the names become
- @code{cparse}, @code{clex}, and so on.
- @strong{All the other variables and macros associated with Bison are not
- renamed.} These others are not global; there is no conflict if the same
- name is used in different parsers. For example, @code{YYSTYPE} is not
- renamed, but defining this in different ways in different parsers causes
- no trouble (@pxref{Value Type, ,Data Types of Semantic Values}).
- The @samp{-p} option works by adding macro definitions to the beginning
- of the parser source file, defining @code{yyparse} as
- @code{@var{prefix}parse}, and so on. This effectively substitutes one
- name for the other in the entire parser file.
- @node Interface, Algorithm, Grammar File, Top
- @chapter Parser C-Language Interface
- @cindex C-language interface
- @cindex interface
- The Bison parser is actually a C function named @code{yyparse}. Here we
- describe the interface conventions of @code{yyparse} and the other
- functions that it needs to use.
- Keep in mind that the parser uses many C identifiers starting with
- @samp{yy} and @samp{YY} for internal purposes. If you use such an
- identifier (aside from those in this manual) in an action or in additional
- C code in the grammar file, you are likely to run into trouble.
- @menu
- * Parser Function:: How to call @code{yyparse} and what it returns.
- * Lexical:: You must supply a function @code{yylex}
- which reads tokens.
- * Error Reporting:: You must supply a function @code{yyerror}.
- * Action Features:: Special features for use in actions.
- @end menu
- @node Parser Function, Lexical, , Interface
- @section The Parser Function @code{yyparse}
- @findex yyparse
- You call the function @code{yyparse} to cause parsing to occur. This
- function reads tokens, executes actions, and ultimately returns when it
- encounters end-of-input or an unrecoverable syntax error. You can also
- write an action which directs @code{yyparse} to return immediately without
- reading further.
- The value returned by @code{yyparse} is 0 if parsing was successful (return
- is due to end-of-input).
- The value is 1 if parsing failed (return is due to a syntax error).
- In an action, you can cause immediate return from @code{yyparse} by using
- these macros:
- @table @code
- @item YYACCEPT
- @findex YYACCEPT
- Return immediately with value 0 (to report success).
- @item YYABORT
- @findex YYABORT
- Return immediately with value 1 (to report failure).
- @end table
- @node Lexical, Error Reporting, Parser Function, Interface
- @section The Lexical Analyzer Function @code{yylex}
- @findex yylex
- @cindex lexical analyzer
- The @dfn{lexical analyzer} function, @code{yylex}, recognizes tokens from
- the input stream and returns them to the parser. Bison does not create
- this function automatically; you must write it so that @code{yyparse} can
- call it. The function is sometimes referred to as a lexical scanner.
- In simple programs, @code{yylex} is often defined at the end of the Bison
- grammar file. If @code{yylex} is defined in a separate source file, you
- need to arrange for the token-type macro definitions to be available there.
- To do this, use the @samp{-d} option when you run Bison, so that it will
- write these macro definitions into a separate header file
- @file{@var{name}.tab.h} which you can include in the other source files
- that need it. @xref{Invocation, ,Invoking Bison}.@refill
- @menu
- * Calling Convention:: How @code{yyparse} calls @code{yylex}.
- * Token Values:: How @code{yylex} must return the semantic value
- of the token it has read.
- * Token Positions:: How @code{yylex} must return the text position
- (line number, etc.) of the token, if the
- actions want that.
- * Pure Calling:: How the calling convention differs
- in a pure parser (@pxref{Pure Decl, ,A Pure (Reentrant) Parser}).
- @end menu
- @node Calling Convention, Token Values, , Lexical
- @subsection Calling Convention for @code{yylex}
- The value that @code{yylex} returns must be the numeric code for the type
- of token it has just found, or 0 for end-of-input.
- When a token is referred to in the grammar rules by a name, that name
- in the parser file becomes a C macro whose definition is the proper
- numeric code for that token type. So @code{yylex} can use the name
- to indicate that type. @xref{Symbols}.
- When a token is referred to in the grammar rules by a character literal,
- the numeric code for that character is also the code for the token type.
- So @code{yylex} can simply return that character code. The null character
- must not be used this way, because its code is zero and that is what
- signifies end-of-input.
- Here is an example showing these things:
- @example
- yylex ()
- @{
- @dots{}
- if (c == EOF) /* Detect end of file. */
- return 0;
- @dots{}
- if (c == '+' || c == '-')
- return c; /* Assume token type for `+' is '+'. */
- @dots{}
- return INT; /* Return the type of the token. */
- @dots{}
- @}
- @end example
- @noindent
- This interface has been designed so that the output from the @code{lex}
- utility can be used without change as the definition of @code{yylex}.
- @node Token Values, Token Positions, Calling Convention, Lexical
- @subsection Semantic Values of Tokens
- @vindex yylval
- In an ordinary (nonreentrant) parser, the semantic value of the token must
- be stored into the global variable @code{yylval}. When you are using
- just one data type for semantic values, @code{yylval} has that type.
- Thus, if the type is @code{int} (the default), you might write this in
- @code{yylex}:
- @example
- @group
- @dots{}
- yylval = value; /* Put value onto Bison stack. */
- return INT; /* Return the type of the token. */
- @dots{}
- @end group
- @end example
- When you are using multiple data types, @code{yylval}'s type is a union
- made from the @code{%union} declaration (@pxref{Union Decl, ,The Collection of Value Types}). So when
- you store a token's value, you must use the proper member of the union.
- If the @code{%union} declaration looks like this:
- @example
- @group
- %union @{
- int intval;
- double val;
- symrec *tptr;
- @}
- @end group
- @end example
- @noindent
- then the code in @code{yylex} might look like this:
- @example
- @group
- @dots{}
- yylval.intval = value; /* Put value onto Bison stack. */
- return INT; /* Return the type of the token. */
- @dots{}
- @end group
- @end example
- @node Token Positions, Pure Calling, Token Values, Lexical
- @subsection Textual Positions of Tokens
- @vindex yylloc
- If you are using the @samp{@@@var{n}}-feature (@pxref{Action Features, ,Special Features for Use in Actions}) in
- actions to keep track of the textual locations of tokens and groupings,
- then you must provide this information in @code{yylex}. The function
- @code{yyparse} expects to find the textual location of a token just parsed
- in the global variable @code{yylloc}. So @code{yylex} must store the
- proper data in that variable. The value of @code{yylloc} is a structure
- and you need only initialize the members that are going to be used by the
- actions. The four members are called @code{first_line},
- @code{first_column}, @code{last_line} and @code{last_column}. Note that
- the use of this feature makes the parser noticeably slower.
- @tindex YYLTYPE
- The data type of @code{yylloc} has the name @code{YYLTYPE}.
- @node Pure Calling, , Token Positions, Lexical
- @subsection Calling Conventions for Pure Parsers
- When you use the Bison declaration @code{%pure_parser} to request a
- pure, reentrant parser, the global communication variables @code{yylval}
- and @code{yylloc} cannot be used. (@xref{Pure Decl, ,A Pure (Reentrant)
- Parser}.) In such parsers the two global variables are replaced by
- pointers passed as arguments to @code{yylex}. You must declare them as
- shown here, and pass the information back by storing it through those
- pointers.
- @example
- yylex (lvalp, llocp)
- YYSTYPE *lvalp;
- YYLTYPE *llocp;
- @{
- @dots{}
- *lvalp = value; /* Put value onto Bison stack. */
- return INT; /* Return the type of the token. */
- @dots{}
- @}
- @end example
- If the grammar file does not use the @samp{@@} constructs to refer to
- textual positions, then the type @code{YYLTYPE} will not be defined. In
- this case, omit the second argument; @code{yylex} will be called with
- only one argument.
- @vindex YYPARSE_PARAM
- You can pass parameter information to a reentrant parser in a reentrant
- way. Define the macro @code{YYPARSE_PARAM} as a variable name. The
- resulting @code{yyparse} function then accepts one argument, of type
- @code{void *}, with that name.
- When you call @code{yyparse}, pass the address of an object, casting the
- address to @code{void *}. The grammar actions can refer to the contents
- of the object by casting the pointer value back to its proper type and
- then dereferencing it. Here's an example. Write this in the parser:
- @example
- %@{
- struct parser_control
- @{
- int nastiness;
- int randomness;
- @};
- #define YYPARSE_PARAM parm
- %@}
- @end example
- @noindent
- Then call the parser like this:
- @example
- struct parser_control
- @{
- int nastiness;
- int randomness;
- @};
- @dots{}
- @{
- struct parser_control foo;
- @dots{} /* @r{Store proper data in @code{foo}.} */
- value = yyparse ((void *) &foo);
- @dots{}
- @}
- @end example
- @noindent
- In the grammar actions, use expressions like this to refer to the data:
- @example
- ((struct parser_control *) parm)->randomness
- @end example
- @vindex YYLEX_PARAM
- If you wish to pass the additional parameter data to @code{yylex},
- define the macro @code{YYLEX_PARAM} just like @code{YYPARSE_PARAM}, as
- shown here:
- @example
- %@{
- struct parser_control
- @{
- int nastiness;
- int randomness;
- @};
- #define YYPARSE_PARAM parm
- #define YYLEX_PARAM parm
- %@}
- @end example
- You should then define @code{yylex} to accept one additional
- argument---the value of @code{parm}. (This makes either two or three
- arguments in total, depending on whether an argument of type
- @code{YYLTYPE} is passed.) You can declare the argument as a pointer to
- the proper object type, or you can declare it as @code{void *} and
- access the contents as shown above.
- @node Error Reporting, Action Features, Lexical, Interface
- @section The Error Reporting Function @code{yyerror}
- @cindex error reporting function
- @findex yyerror
- @cindex parse error
- @cindex syntax error
- The Bison parser detects a @dfn{parse error} or @dfn{syntax error}
- whenever it reads a token which cannot satisfy any syntax rule. A
- action in the grammar can also explicitly proclaim an error, using the
- macro @code{YYERROR} (@pxref{Action Features, ,Special Features for Use in Actions}).
- The Bison parser expects to report the error by calling an error
- reporting function named @code{yyerror}, which you must supply. It is
- called by @code{yyparse} whenever a syntax error is found, and it
- receives one argument. For a parse error, the string is normally
- @w{@code{"parse error"}}.
- @findex YYERROR_VERBOSE
- If you define the macro @code{YYERROR_VERBOSE} in the Bison declarations
- section (@pxref{Bison Declarations, ,The Bison Declarations Section}), then Bison provides a more verbose
- and specific error message string instead of just plain @w{@code{"parse
- error"}}. It doesn't matter what definition you use for
- @code{YYERROR_VERBOSE}, just whether you define it.
- The parser can detect one other kind of error: stack overflow. This
- happens when the input contains constructions that are very deeply
- nested. It isn't likely you will encounter this, since the Bison
- parser extends its stack automatically up to a very large limit. But
- if overflow happens, @code{yyparse} calls @code{yyerror} in the usual
- fashion, except that the argument string is @w{@code{"parser stack
- overflow"}}.
- The following definition suffices in simple programs:
- @example
- @group
- yyerror (s)
- char *s;
- @{
- @end group
- @group
- fprintf (stderr, "%s\n", s);
- @}
- @end group
- @end example
- After @code{yyerror} returns to @code{yyparse}, the latter will attempt
- error recovery if you have written suitable error recovery grammar rules
- (@pxref{Error Recovery}). If recovery is impossible, @code{yyparse} will
- immediately return 1.
- @vindex yynerrs
- The variable @code{yynerrs} contains the number of syntax errors
- encountered so far. Normally this variable is global; but if you
- request a pure parser (@pxref{Pure Decl, ,A Pure (Reentrant) Parser}) then it is a local variable
- which only the actions can access.
- @node Action Features, , Error Reporting, Interface
- @section Special Features for Use in Actions
- @cindex summary, action features
- @cindex action features summary
- Here is a table of Bison constructs, variables and macros that
- are useful in actions.
- @table @samp
- @item $$
- Acts like a variable that contains the semantic value for the
- grouping made by the current rule. @xref{Actions}.
- @item $@var{n}
- Acts like a variable that contains the semantic value for the
- @var{n}th component of the current rule. @xref{Actions}.
- @item $<@var{typealt}>$
- Like @code{$$} but specifies alternative @var{typealt} in the union
- specified by the @code{%union} declaration. @xref{Action Types, ,Data Types of Values in Actions}.
- @item $<@var{typealt}>@var{n}
- Like @code{$@var{n}} but specifies alternative @var{typealt} in the
- union specified by the @code{%union} declaration.
- @xref{Action Types, ,Data Types of Values in Actions}.@refill
- @item YYABORT;
- Return immediately from @code{yyparse}, indicating failure.
- @xref{Parser Function, ,The Parser Function @code{yyparse}}.
- @item YYACCEPT;
- Return immediately from @code{yyparse}, indicating success.
- @xref{Parser Function, ,The Parser Function @code{yyparse}}.
- @item YYBACKUP (@var{token}, @var{value});
- @findex YYBACKUP
- Unshift a token. This macro is allowed only for rules that reduce
- a single value, and only when there is no look-ahead token.
- It installs a look-ahead token with token type @var{token} and
- semantic value @var{value}; then it discards the value that was
- going to be reduced by this rule.
- If the macro is used when it is not valid, such as when there is
- a look-ahead token already, then it reports a syntax error with
- a message @samp{cannot back up} and performs ordinary error
- recovery.
- In either case, the rest of the action is not executed.
- @item YYEMPTY
- @vindex YYEMPTY
- Value stored in @code{yychar} when there is no look-ahead token.
- @item YYERROR;
- @findex YYERROR
- Cause an immediate syntax error. This statement initiates error
- recovery just as if the parser itself had detected an error; however, it
- does not call @code{yyerror}, and does not print any message. If you
- want to print an error message, call @code{yyerror} explicitly before
- the @samp{YYERROR;} statement. @xref{Error Recovery}.
- @item YYRECOVERING
- This macro stands for an expression that has the value 1 when the parser
- is recovering from a syntax error, and 0 the rest of the time.
- @xref{Error Recovery}.
- @item yychar
- Variable containing the current look-ahead token. (In a pure parser,
- this is actually a local variable within @code{yyparse}.) When there is
- no look-ahead token, the value @code{YYEMPTY} is stored in the variable.
- @xref{Look-Ahead, ,Look-Ahead Tokens}.
- @item yyclearin;
- Discard the current look-ahead token. This is useful primarily in
- error rules. @xref{Error Recovery}.
- @item yyerrok;
- Resume generating error messages immediately for subsequent syntax
- errors. This is useful primarily in error rules.
- @xref{Error Recovery}.
- @item @@@var{n}
- @findex @@@var{n}
- Acts like a structure variable containing information on the line
- numbers and column numbers of the @var{n}th component of the current
- rule. The structure has four members, like this:
- @example
- struct @{
- int first_line, last_line;
- int first_column, last_column;
- @};
- @end example
- Thus, to get the starting line number of the third component, use
- @samp{@@3.first_line}.
- In order for the members of this structure to contain valid information,
- you must make @code{yylex} supply this information about each token.
- If you need only certain members, then @code{yylex} need only fill in
- those members.
- The use of this feature makes the parser noticeably slower.
- @end table
- @node Algorithm, Error Recovery, Interface, Top
- @chapter The Bison Parser Algorithm
- @cindex Bison parser algorithm
- @cindex algorithm of parser
- @cindex shifting
- @cindex reduction
- @cindex parser stack
- @cindex stack, parser
- As Bison reads tokens, it pushes them onto a stack along with their
- semantic values. The stack is called the @dfn{parser stack}. Pushing a
- token is traditionally called @dfn{shifting}.
- For example, suppose the infix calculator has read @samp{1 + 5 *}, with a
- @samp{3} to come. The stack will have four elements, one for each token
- that was shifted.
- But the stack does not always have an element for each token read. When
- the last @var{n} tokens and groupings shifted match the components of a
- grammar rule, they can be combined according to that rule. This is called
- @dfn{reduction}. Those tokens and groupings are replaced on the stack by a
- single grouping whose symbol is the result (left hand side) of that rule.
- Running the rule's action is part of the process of reduction, because this
- is what computes the semantic value of the resulting grouping.
- For example, if the infix calculator's parser stack contains this:
- @example
- 1 + 5 * 3
- @end example
- @noindent
- and the next input token is a newline character, then the last three
- elements can be reduced to 15 via the rule:
- @example
- expr: expr '*' expr;
- @end example
- @noindent
- Then the stack contains just these three elements:
- @example
- 1 + 15
- @end example
- @noindent
- At this point, another reduction can be made, resulting in the single value
- 16. Then the newline token can be shifted.
- The parser tries, by shifts and reductions, to reduce the entire input down
- to a single grouping whose symbol is the grammar's start-symbol
- (@pxref{Language and Grammar, ,Languages and Context-Free Grammars}).
- This kind of parser is known in the literature as a bottom-up parser.
- @menu
- * Look-Ahead:: Parser looks one token ahead when deciding what to do.
- * Shift/Reduce:: Conflicts: when either shifting or reduction is valid.
- * Precedence:: Operator precedence works by resolving conflicts.
- * Contextual Precedence:: When an operator's precedence depends on context.
- * Parser States:: The parser is a finite-state-machine with stack.
- * Reduce/Reduce:: When two rules are applicable in the same situation.
- * Mystery Conflicts:: Reduce/reduce conflicts that look unjustified.
- * Stack Overflow:: What happens when stack gets full. How to avoid it.
- @end menu
- @node Look-Ahead, Shift/Reduce, , Algorithm
- @section Look-Ahead Tokens
- @cindex look-ahead token
- The Bison parser does @emph{not} always reduce immediately as soon as the
- last @var{n} tokens and groupings match a rule. This is because such a
- simple strategy is inadequate to handle most languages. Instead, when a
- reduction is possible, the parser sometimes ``looks ahead'' at the next
- token in order to decide what to do.
- When a token is read, it is not immediately shifted; first it becomes the
- @dfn{look-ahead token}, which is not on the stack. Now the parser can
- perform one or more reductions of tokens and groupings on the stack, while
- the look-ahead token remains off to the side. When no more reductions
- should take place, the look-ahead token is shifted onto the stack. This
- does not mean that all possible reductions have been done; depending on the
- token type of the look-ahead token, some rules may choose to delay their
- application.
- Here is a simple case where look-ahead is needed. These three rules define
- expressions which contain binary addition operators and postfix unary
- factorial operators (@samp{!}), and allow parentheses for grouping.
- @example
- @group
- expr: term '+' expr
- | term
- ;
- @end group
- @group
- term: '(' expr ')'
- | term '!'
- | NUMBER
- ;
- @end group
- @end example
- Suppose that the tokens @w{@samp{1 + 2}} have been read and shifted; what
- should be done? If the following token is @samp{)}, then the first three
- tokens must be reduced to form an @code{expr}. This is the only valid
- course, because shifting the @samp{)} would produce a sequence of symbols
- @w{@code{term ')'}}, and no rule allows this.
- If the following token is @samp{!}, then it must be shifted immediately so
- that @w{@samp{2 !}} can be reduced to make a @code{term}. If instead the
- parser were to reduce before shifting, @w{@samp{1 + 2}} would become an
- @code{expr}. It would then be impossible to shift the @samp{!} because
- doing so would produce on the stack the sequence of symbols @code{expr
- '!'}. No rule allows that sequence.
- @vindex yychar
- The current look-ahead token is stored in the variable @code{yychar}.
- @xref{Action Features, ,Special Features for Use in Actions}.
- @node Shift/Reduce, Precedence, Look-Ahead, Algorithm
- @section Shift/Reduce Conflicts
- @cindex conflicts
- @cindex shift/reduce conflicts
- @cindex dangling @code{else}
- @cindex @code{else}, dangling
- Suppose we are parsing a language which has if-then and if-then-else
- statements, with a pair of rules like this:
- @example
- @group
- if_stmt:
- IF expr THEN stmt
- | IF expr THEN stmt ELSE stmt
- ;
- @end group
- @end example
- @noindent
- Here we assume that @code{IF}, @code{THEN} and @code{ELSE} are
- terminal symbols for specific keyword tokens.
- When the @code{ELSE} token is read and becomes the look-ahead token, the
- contents of the stack (assuming the input is valid) are just right for
- reduction by the first rule. But it is also legitimate to shift the
- @code{ELSE}, because that would lead to eventual reduction by the second
- rule.
- This situation, where either a shift or a reduction would be valid, is
- called a @dfn{shift/reduce conflict}. Bison is designed to resolve
- these conflicts by choosing to shift, unless otherwise directed by
- operator precedence declarations. To see the reason for this, let's
- contrast it with the other alternative.
- Since the parser prefers to shift the @code{ELSE}, the result is to attach
- the else-clause to the innermost if-statement, making these two inputs
- equivalent:
- @example
- if x then if y then win (); else lose;
- if x then do; if y then win (); else lose; end;
- @end example
- But if the parser chose to reduce when possible rather than shift, the
- result would be to attach the else-clause to the outermost if-statement,
- making these two inputs equivalent:
- @example
- if x then if y then win (); else lose;
- if x then do; if y then win (); end; else lose;
- @end example
- The conflict exists because the grammar as written is ambiguous: either
- parsing of the simple nested if-statement is legitimate. The established
- convention is that these ambiguities are resolved by attaching the
- else-clause to the innermost if-statement; this is what Bison accomplishes
- by choosing to shift rather than reduce. (It would ideally be cleaner to
- write an unambiguous grammar, but that is very hard to do in this case.)
- This particular ambiguity was first encountered in the specifications of
- Algol 60 and is called the ``dangling @code{else}'' ambiguity.
- To avoid warnings from Bison about predictable, legitimate shift/reduce
- conflicts, use the @code{%expect @var{n}} declaration. There will be no
- warning as long as the number of shift/reduce conflicts is exactly @var{n}.
- @xref{Expect Decl, ,Suppressing Conflict Warnings}.
- The definition of @code{if_stmt} above is solely to blame for the
- conflict, but the conflict does not actually appear without additional
- rules. Here is a complete Bison input file that actually manifests the
- conflict:
- @example
- @group
- %token IF THEN ELSE variable
- %%
- @end group
- @group
- stmt: expr
- | if_stmt
- ;
- @end group
- @group
- if_stmt:
- IF expr THEN stmt
- | IF expr THEN stmt ELSE stmt
- ;
- @end group
- expr: variable
- ;
- @end example
- @node Precedence, Contextual Precedence, Shift/Reduce, Algorithm
- @section Operator Precedence
- @cindex operator precedence
- @cindex precedence of operators
- Another situation where shift/reduce conflicts appear is in arithmetic
- expressions. Here shifting is not always the preferred resolution; the
- Bison declarations for operator precedence allow you to specify when to
- shift and when to reduce.
- @menu
- * Why Precedence:: An example showing why precedence is needed.
- * Using Precedence:: How to specify precedence in Bison grammars.
- * Precedence Examples:: How these features are used in the previous example.
- * How Precedence:: How they work.
- @end menu
- @node Why Precedence, Using Precedence, , Precedence
- @subsection When Precedence is Needed
- Consider the following ambiguous grammar fragment (ambiguous because the
- input @w{@samp{1 - 2 * 3}} can be parsed in two different ways):
- @example
- @group
- expr: expr '-' expr
- | expr '*' expr
- | expr '<' expr
- | '(' expr ')'
- @dots{}
- ;
- @end group
- @end example
- @noindent
- Suppose the parser has seen the tokens @samp{1}, @samp{-} and @samp{2};
- should it reduce them via the rule for the addition operator? It depends
- on the next token. Of course, if the next token is @samp{)}, we must
- reduce; shifting is invalid because no single rule can reduce the token
- sequence @w{@samp{- 2 )}} or anything starting with that. But if the next
- token is @samp{*} or @samp{<}, we have a choice: either shifting or
- reduction would allow the parse to complete, but with different
- results.
- To decide which one Bison should do, we must consider the
- results. If the next operator token @var{op} is shifted, then it
- must be reduced first in order to permit another opportunity to
- reduce the sum. The result is (in effect) @w{@samp{1 - (2
- @var{op} 3)}}. On the other hand, if the subtraction is reduced
- before shifting @var{op}, the result is @w{@samp{(1 - 2) @var{op}
- 3}}. Clearly, then, the choice of shift or reduce should depend
- on the relative precedence of the operators @samp{-} and
- @var{op}: @samp{*} should be shifted first, but not @samp{<}.
- @cindex associativity
- What about input such as @w{@samp{1 - 2 - 5}}; should this be
- @w{@samp{(1 - 2) - 5}} or should it be @w{@samp{1 - (2 - 5)}}? For
- most operators we prefer the former, which is called @dfn{left
- association}. The latter alternative, @dfn{right association}, is
- desirable for assignment operators. The choice of left or right
- association is a matter of whether the parser chooses to shift or
- reduce when the stack contains @w{@samp{1 - 2}} and the look-ahead
- token is @samp{-}: shifting makes right-associativity.
- @node Using Precedence, Precedence Examples, Why Precedence, Precedence
- @subsection Specifying Operator Precedence
- @findex %left
- @findex %right
- @findex %nonassoc
- Bison allows you to specify these choices with the operator precedence
- declarations @code{%left} and @code{%right}. Each such declaration
- contains a list of tokens, which are operators whose precedence and
- associativity is being declared. The @code{%left} declaration makes all
- those operators left-associative and the @code{%right} declaration makes
- them right-associative. A third alternative is @code{%nonassoc}, which
- declares that it is a syntax error to find the same operator twice ``in a
- row''.
- The relative precedence of different operators is controlled by the
- order in which they are declared. The first @code{%left} or
- @code{%right} declaration in the file declares the operators whose
- precedence is lowest, the next such declaration declares the operators
- whose precedence is a little higher, and so on.
- @node Precedence Examples, How Precedence, Using Precedence, Precedence
- @subsection Precedence Examples
- In our example, we would want the following declarations:
- @example
- %left '<'
- %left '-'
- %left '*'
- @end example
- In a more complete example, which supports other operators as well, we
- would declare them in groups of equal precedence. For example, @code{'+'} is
- declared with @code{'-'}:
- @example
- %left '<' '>' '=' NE LE GE
- %left '+' '-'
- %left '*' '/'
- @end example
- @noindent
- (Here @code{NE} and so on stand for the operators for ``not equal''
- and so on. We assume that these tokens are more than one character long
- and therefore are represented by names, not character literals.)
- @node How Precedence, , Precedence Examples, Precedence
- @subsection How Precedence Works
- The first effect of the precedence declarations is to assign precedence
- levels to the terminal symbols declared. The second effect is to assign
- precedence levels to certain rules: each rule gets its precedence from the
- last terminal symbol mentioned in the components. (You can also specify
- explicitly the precedence of a rule. @xref{Contextual Precedence, ,Context-Dependent Precedence}.)
- Finally, the resolution of conflicts works by comparing the
- precedence of the rule being considered with that of the
- look-ahead token. If the token's precedence is higher, the
- choice is to shift. If the rule's precedence is higher, the
- choice is to reduce. If they have equal precedence, the choice
- is made based on the associativity of that precedence level. The
- verbose output file made by @samp{-v} (@pxref{Invocation, ,Invoking Bison}) says
- how each conflict was resolved.
- Not all rules and not all tokens have precedence. If either the rule or
- the look-ahead token has no precedence, then the default is to shift.
- @node Contextual Precedence, Parser States, Precedence, Algorithm
- @section Context-Dependent Precedence
- @cindex context-dependent precedence
- @cindex unary operator precedence
- @cindex precedence, context-dependent
- @cindex precedence, unary operator
- @findex %prec
- Often the precedence of an operator depends on the context. This sounds
- outlandish at first, but it is really very common. For example, a minus
- sign typically has a very high precedence as a unary operator, and a
- somewhat lower precedence (lower than multiplication) as a binary operator.
- The Bison precedence declarations, @code{%left}, @code{%right} and
- @code{%nonassoc}, can only be used once for a given token; so a token has
- only one precedence declared in this way. For context-dependent
- precedence, you need to use an additional mechanism: the @code{%prec}
- modifier for rules.@refill
- The @code{%prec} modifier declares the precedence of a particular rule by
- specifying a terminal symbol whose precedence should be used for that rule.
- It's not necessary for that symbol to appear otherwise in the rule. The
- modifier's syntax is:
- @example
- %prec @var{terminal-symbol}
- @end example
- @noindent
- and it is written after the components of the rule. Its effect is to
- assign the rule the precedence of @var{terminal-symbol}, overriding
- the precedence that would be deduced for it in the ordinary way. The
- altered rule precedence then affects how conflicts involving that rule
- are resolved (@pxref{Precedence, ,Operator Precedence}).
- Here is how @code{%prec} solves the problem of unary minus. First, declare
- a precedence for a fictitious terminal symbol named @code{UMINUS}. There
- are no tokens of this type, but the symbol serves to stand for its
- precedence:
- @example
- @dots{}
- %left '+' '-'
- %left '*'
- %left UMINUS
- @end example
- Now the precedence of @code{UMINUS} can be used in specific rules:
- @example
- @group
- exp: @dots{}
- | exp '-' exp
- @dots{}
- | '-' exp %prec UMINUS
- @end group
- @end example
- @node Parser States, Reduce/Reduce, Contextual Precedence, Algorithm
- @section Parser States
- @cindex finite-state machine
- @cindex parser state
- @cindex state (of parser)
- The function @code{yyparse} is implemented using a finite-state machine.
- The values pushed on the parser stack are not simply token type codes; they
- represent the entire sequence of terminal and nonterminal symbols at or
- near the top of the stack. The current state collects all the information
- about previous input which is relevant to deciding what to do next.
- Each time a look-ahead token is read, the current parser state together
- with the type of look-ahead token are looked up in a table. This table
- entry can say, ``Shift the look-ahead token.'' In this case, it also
- specifies the new parser state, which is pushed onto the top of the
- parser stack. Or it can say, ``Reduce using rule number @var{n}.''
- This means that a certain number of tokens or groupings are taken off
- the top of the stack, and replaced by one grouping. In other words,
- that number of states are popped from the stack, and one new state is
- pushed.
- There is one other alternative: the table can say that the look-ahead token
- is erroneous in the current state. This causes error processing to begin
- (@pxref{Error Recovery}).
- @node Reduce/Reduce, Mystery Conflicts, Parser States, Algorithm
- @section Reduce/Reduce Conflicts
- @cindex reduce/reduce conflict
- @cindex conflicts, reduce/reduce
- A reduce/reduce conflict occurs if there are two or more rules that apply
- to the same sequence of input. This usually indicates a serious error
- in the grammar.
- For example, here is an erroneous attempt to define a sequence
- of zero or more @code{word} groupings.
- @example
- sequence: /* empty */
- @{ printf ("empty sequence\n"); @}
- | maybeword
- | sequence word
- @{ printf ("added word %s\n", $2); @}
- ;
- maybeword: /* empty */
- @{ printf ("empty maybeword\n"); @}
- | word
- @{ printf ("single word %s\n", $1); @}
- ;
- @end example
- @noindent
- The error is an ambiguity: there is more than one way to parse a single
- @code{word} into a @code{sequence}. It could be reduced to a
- @code{maybeword} and then into a @code{sequence} via the second rule.
- Alternatively, nothing-at-all could be reduced into a @code{sequence}
- via the first rule, and this could be combined with the @code{word}
- using the third rule for @code{sequence}.
- There is also more than one way to reduce nothing-at-all into a
- @code{sequence}. This can be done directly via the first rule,
- or indirectly via @code{maybeword} and then the second rule.
- You might think that this is a distinction without a difference, because it
- does not change whether any particular input is valid or not. But it does
- affect which actions are run. One parsing order runs the second rule's
- action; the other runs the first rule's action and the third rule's action.
- In this example, the output of the program changes.
- Bison resolves a reduce/reduce conflict by choosing to use the rule that
- appears first in the grammar, but it is very risky to rely on this. Every
- reduce/reduce conflict must be studied and usually eliminated. Here is the
- proper way to define @code{sequence}:
- @example
- sequence: /* empty */
- @{ printf ("empty sequence\n"); @}
- | sequence word
- @{ printf ("added word %s\n", $2); @}
- ;
- @end example
- Here is another common error that yields a reduce/reduce conflict:
- @example
- sequence: /* empty */
- | sequence words
- | sequence redirects
- ;
- words: /* empty */
- | words word
- ;
- redirects:/* empty */
- | redirects redirect
- ;
- @end example
- @noindent
- The intention here is to define a sequence which can contain either
- @code{word} or @code{redirect} groupings. The individual definitions of
- @code{sequence}, @code{words} and @code{redirects} are error-free, but the
- three together make a subtle ambiguity: even an empty input can be parsed
- in infinitely many ways!
- Consider: nothing-at-all could be a @code{words}. Or it could be two
- @code{words} in a row, or three, or any number. It could equally well be a
- @code{redirects}, or two, or any number. Or it could be a @code{words}
- followed by three @code{redirects} and another @code{words}. And so on.
- Here are two ways to correct these rules. First, to make it a single level
- of sequence:
- @example
- sequence: /* empty */
- | sequence word
- | sequence redirect
- ;
- @end example
- Second, to prevent either a @code{words} or a @code{redirects}
- from being empty:
- @example
- sequence: /* empty */
- | sequence words
- | sequence redirects
- ;
- words: word
- | words word
- ;
- redirects:redirect
- | redirects redirect
- ;
- @end example
- @node Mystery Conflicts, Stack Overflow, Reduce/Reduce, Algorithm
- @section Mysterious Reduce/Reduce Conflicts
- Sometimes reduce/reduce conflicts can occur that don't look warranted.
- Here is an example:
- @example
- @group
- %token ID
- %%
- def: param_spec return_spec ','
- ;
- param_spec:
- type
- | name_list ':' type
- ;
- @end group
- @group
- return_spec:
- type
- | name ':' type
- ;
- @end group
- @group
- type: ID
- ;
- @end group
- @group
- name: ID
- ;
- name_list:
- name
- | name ',' name_list
- ;
- @end group
- @end example
- It would seem that this grammar can be parsed with only a single token
- of look-ahead: when a @code{param_spec} is being read, an @code{ID} is
- a @code{name} if a comma or colon follows, or a @code{type} if another
- @code{ID} follows. In other words, this grammar is LR(1).
- @cindex LR(1)
- @cindex LALR(1)
- However, Bison, like most parser generators, cannot actually handle all
- LR(1) grammars. In this grammar, two contexts, that after an @code{ID}
- at the beginning of a @code{param_spec} and likewise at the beginning of
- a @code{return_spec}, are similar enough that Bison assumes they are the
- same. They appear similar because the same set of rules would be
- active---the rule for reducing to a @code{name} and that for reducing to
- a @code{type}. Bison is unable to determine at that stage of processing
- that the rules would require different look-ahead tokens in the two
- contexts, so it makes a single parser state for them both. Combining
- the two contexts causes a conflict later. In parser terminology, this
- occurrence means that the grammar is not LALR(1).
- In general, it is better to fix deficiencies than to document them. But
- this particular deficiency is intrinsically hard to fix; parser
- generators that can handle LR(1) grammars are hard to write and tend to
- produce parsers that are very large. In practice, Bison is more useful
- as it is now.
- When the problem arises, you can often fix it by identifying the two
- parser states that are being confused, and adding something to make them
- look distinct. In the above example, adding one rule to
- @code{return_spec} as follows makes the problem go away:
- @example
- @group
- %token BOGUS
- @dots{}
- %%
- @dots{}
- return_spec:
- type
- | name ':' type
- /* This rule is never used. */
- | ID BOGUS
- ;
- @end group
- @end example
- This corrects the problem because it introduces the possibility of an
- additional active rule in the context after the @code{ID} at the beginning of
- @code{return_spec}. This rule is not active in the corresponding context
- in a @code{param_spec}, so the two contexts receive distinct parser states.
- As long as the token @code{BOGUS} is never generated by @code{yylex},
- the added rule cannot alter the way actual input is parsed.
- In this particular example, there is another way to solve the problem:
- rewrite the rule for @code{return_spec} to use @code{ID} directly
- instead of via @code{name}. This also causes the two confusing
- contexts to have different sets of active rules, because the one for
- @code{return_spec} activates the altered rule for @code{return_spec}
- rather than the one for @code{name}.
- @example
- param_spec:
- type
- | name_list ':' type
- ;
- return_spec:
- type
- | ID ':' type
- ;
- @end example
- @node Stack Overflow, , Mystery Conflicts, Algorithm
- @section Stack Overflow, and How to Avoid It
- @cindex stack overflow
- @cindex parser stack overflow
- @cindex overflow of parser stack
- The Bison parser stack can overflow if too many tokens are shifted and
- not reduced. When this happens, the parser function @code{yyparse}
- returns a nonzero value, pausing only to call @code{yyerror} to report
- the overflow.
- @vindex YYMAXDEPTH
- By defining the macro @code{YYMAXDEPTH}, you can control how deep the
- parser stack can become before a stack overflow occurs. Define the
- macro with a value that is an integer. This value is the maximum number
- of tokens that can be shifted (and not reduced) before overflow.
- It must be a constant expression whose value is known at compile time.
- The stack space allowed is not necessarily allocated. If you specify a
- large value for @code{YYMAXDEPTH}, the parser actually allocates a small
- stack at first, and then makes it bigger by stages as needed. This
- increasing allocation happens automatically and silently. Therefore,
- you do not need to make @code{YYMAXDEPTH} painfully small merely to save
- space for ordinary inputs that do not need much stack.
- @cindex default stack limit
- The default value of @code{YYMAXDEPTH}, if you do not define it, is
- 10000.
- @vindex YYINITDEPTH
- You can control how much stack is allocated initially by defining the
- macro @code{YYINITDEPTH}. This value too must be a compile-time
- constant integer. The default is 200.
- @node Error Recovery, Context Dependency, Algorithm, Top
- @chapter Error Recovery
- @cindex error recovery
- @cindex recovery from errors
- It is not usually acceptable to have a program terminate on a parse
- error. For example, a compiler should recover sufficiently to parse the
- rest of the input file and check it for errors; a calculator should accept
- another expression.
- In a simple interactive command parser where each input is one line, it may
- be sufficient to allow @code{yyparse} to return 1 on error and have the
- caller ignore the rest of the input line when that happens (and then call
- @code{yyparse} again). But this is inadequate for a compiler, because it
- forgets all the syntactic context leading up to the error. A syntax error
- deep within a function in the compiler input should not cause the compiler
- to treat the following line like the beginning of a source file.
- @findex error
- You can define how to recover from a syntax error by writing rules to
- recognize the special token @code{error}. This is a terminal symbol that
- is always defined (you need not declare it) and reserved for error
- handling. The Bison parser generates an @code{error} token whenever a
- syntax error happens; if you have provided a rule to recognize this token
- in the current context, the parse can continue.
- For example:
- @example
- stmnts: /* empty string */
- | stmnts '\n'
- | stmnts exp '\n'
- | stmnts error '\n'
- @end example
- The fourth rule in this example says that an error followed by a newline
- makes a valid addition to any @code{stmnts}.
- What happens if a syntax error occurs in the middle of an @code{exp}? The
- error recovery rule, interpreted strictly, applies to the precise sequence
- of a @code{stmnts}, an @code{error} and a newline. If an error occurs in
- the middle of an @code{exp}, there will probably be some additional tokens
- and subexpressions on the stack after the last @code{stmnts}, and there
- will be tokens to read before the next newline. So the rule is not
- applicable in the ordinary way.
- But Bison can force the situation to fit the rule, by discarding part of
- the semantic context and part of the input. First it discards states and
- objects from the stack until it gets back to a state in which the
- @code{error} token is acceptable. (This means that the subexpressions
- already parsed are discarded, back to the last complete @code{stmnts}.) At
- this point the @code{error} token can be shifted. Then, if the old
- look-ahead token is not acceptable to be shifted next, the parser reads
- tokens and discards them until it finds a token which is acceptable. In
- this example, Bison reads and discards input until the next newline
- so that the fourth rule can apply.
- The choice of error rules in the grammar is a choice of strategies for
- error recovery. A simple and useful strategy is simply to skip the rest of
- the current input line or current statement if an error is detected:
- @example
- stmnt: error ';' /* on error, skip until ';' is read */
- @end example
- It is also useful to recover to the matching close-delimiter of an
- opening-delimiter that has already been parsed. Otherwise the
- close-delimiter will probably appear to be unmatched, and generate another,
- spurious error message:
- @example
- primary: '(' expr ')'
- | '(' error ')'
- @dots{}
- ;
- @end example
- Error recovery strategies are necessarily guesses. When they guess wrong,
- one syntax error often leads to another. In the above example, the error
- recovery rule guesses that an error is due to bad input within one
- @code{stmnt}. Suppose that instead a spurious semicolon is inserted in the
- middle of a valid @code{stmnt}. After the error recovery rule recovers
- from the first error, another syntax error will be found straightaway,
- since the text following the spurious semicolon is also an invalid
- @code{stmnt}.
- To prevent an outpouring of error messages, the parser will output no error
- message for another syntax error that happens shortly after the first; only
- after three consecutive input tokens have been successfully shifted will
- error messages resume.
- Note that rules which accept the @code{error} token may have actions, just
- as any other rules can.
- @findex yyerrok
- You can make error messages resume immediately by using the macro
- @code{yyerrok} in an action. If you do this in the error rule's action, no
- error messages will be suppressed. This macro requires no arguments;
- @samp{yyerrok;} is a valid C statement.
- @findex yyclearin
- The previous look-ahead token is reanalyzed immediately after an error. If
- this is unacceptable, then the macro @code{yyclearin} may be used to clear
- this token. Write the statement @samp{yyclearin;} in the error rule's
- action.
- For example, suppose that on a parse error, an error handling routine is
- called that advances the input stream to some point where parsing should
- once again commence. The next symbol returned by the lexical scanner is
- probably correct. The previous look-ahead token ought to be discarded
- with @samp{yyclearin;}.
- @vindex YYRECOVERING
- The macro @code{YYRECOVERING} stands for an expression that has the
- value 1 when the parser is recovering from a syntax error, and 0 the
- rest of the time. A value of 1 indicates that error messages are
- currently suppressed for new syntax errors.
- @node Context Dependency, Debugging, Error Recovery, Top
- @chapter Handling Context Dependencies
- The Bison paradigm is to parse tokens first, then group them into larger
- syntactic units. In many languages, the meaning of a token is affected by
- its context. Although this violates the Bison paradigm, certain techniques
- (known as @dfn{kludges}) may enable you to write Bison parsers for such
- languages.
- @menu
- * Semantic Tokens:: Token parsing can depend on the semantic context.
- * Lexical Tie-ins:: Token parsing can depend on the syntactic context.
- * Tie-in Recovery:: Lexical tie-ins have implications for how
- error recovery rules must be written.
- @end menu
- (Actually, ``kludge'' means any technique that gets its job done but is
- neither clean nor robust.)
- @node Semantic Tokens, Lexical Tie-ins, , Context Dependency
- @section Semantic Info in Token Types
- The C language has a context dependency: the way an identifier is used
- depends on what its current meaning is. For example, consider this:
- @example
- foo (x);
- @end example
- This looks like a function call statement, but if @code{foo} is a typedef
- name, then this is actually a declaration of @code{x}. How can a Bison
- parser for C decide how to parse this input?
- The method used in GNU C is to have two different token types,
- @code{IDENTIFIER} and @code{TYPENAME}. When @code{yylex} finds an
- identifier, it looks up the current declaration of the identifier in order
- to decide which token type to return: @code{TYPENAME} if the identifier is
- declared as a typedef, @code{IDENTIFIER} otherwise.
- The grammar rules can then express the context dependency by the choice of
- token type to recognize. @code{IDENTIFIER} is accepted as an expression,
- but @code{TYPENAME} is not. @code{TYPENAME} can start a declaration, but
- @code{IDENTIFIER} cannot. In contexts where the meaning of the identifier
- is @emph{not} significant, such as in declarations that can shadow a
- typedef name, either @code{TYPENAME} or @code{IDENTIFIER} is
- accepted---there is one rule for each of the two token types.
- This technique is simple to use if the decision of which kinds of
- identifiers to allow is made at a place close to where the identifier is
- parsed. But in C this is not always so: C allows a declaration to
- redeclare a typedef name provided an explicit type has been specified
- earlier:
- @example
- typedef int foo, bar, lose;
- static foo (bar); /* @r{redeclare @code{bar} as static variable} */
- static int foo (lose); /* @r{redeclare @code{foo} as function} */
- @end example
- Unfortunately, the name being declared is separated from the declaration
- construct itself by a complicated syntactic structure---the ``declarator''.
- As a result, the part of Bison parser for C needs to be duplicated, with
- all the nonterminal names changed: once for parsing a declaration in which
- a typedef name can be redefined, and once for parsing a declaration in
- which that can't be done. Here is a part of the duplication, with actions
- omitted for brevity:
- @example
- initdcl:
- declarator maybeasm '='
- init
- | declarator maybeasm
- ;
- notype_initdcl:
- notype_declarator maybeasm '='
- init
- | notype_declarator maybeasm
- ;
- @end example
- @noindent
- Here @code{initdcl} can redeclare a typedef name, but @code{notype_initdcl}
- cannot. The distinction between @code{declarator} and
- @code{notype_declarator} is the same sort of thing.
- There is some similarity between this technique and a lexical tie-in
- (described next), in that information which alters the lexical analysis is
- changed during parsing by other parts of the program. The difference is
- here the information is global, and is used for other purposes in the
- program. A true lexical tie-in has a special-purpose flag controlled by
- the syntactic context.
- @node Lexical Tie-ins, Tie-in Recovery, Semantic Tokens, Context Dependency
- @section Lexical Tie-ins
- @cindex lexical tie-in
- One way to handle context-dependency is the @dfn{lexical tie-in}: a flag
- which is set by Bison actions, whose purpose is to alter the way tokens are
- parsed.
- For example, suppose we have a language vaguely like C, but with a special
- construct @samp{hex (@var{hex-expr})}. After the keyword @code{hex} comes
- an expression in parentheses in which all integers are hexadecimal. In
- particular, the token @samp{a1b} must be treated as an integer rather than
- as an identifier if it appears in that context. Here is how you can do it:
- @example
- @group
- %@{
- int hexflag;
- %@}
- %%
- @dots{}
- @end group
- @group
- expr: IDENTIFIER
- | constant
- | HEX '('
- @{ hexflag = 1; @}
- expr ')'
- @{ hexflag = 0;
- $$ = $4; @}
- | expr '+' expr
- @{ $$ = make_sum ($1, $3); @}
- @dots{}
- ;
- @end group
- @group
- constant:
- INTEGER
- | STRING
- ;
- @end group
- @end example
- @noindent
- Here we assume that @code{yylex} looks at the value of @code{hexflag}; when
- it is nonzero, all integers are parsed in hexadecimal, and tokens starting
- with letters are parsed as integers if possible.
- The declaration of @code{hexflag} shown in the C declarations section of
- the parser file is needed to make it accessible to the actions
- (@pxref{C Declarations, ,The C Declarations Section}). You must also write the code in @code{yylex}
- to obey the flag.
- @node Tie-in Recovery, , Lexical Tie-ins, Context Dependency
- @section Lexical Tie-ins and Error Recovery
- Lexical tie-ins make strict demands on any error recovery rules you have.
- @xref{Error Recovery}.
- The reason for this is that the purpose of an error recovery rule is to
- abort the parsing of one construct and resume in some larger construct.
- For example, in C-like languages, a typical error recovery rule is to skip
- tokens until the next semicolon, and then start a new statement, like this:
- @example
- stmt: expr ';'
- | IF '(' expr ')' stmt @{ @dots{} @}
- @dots{}
- error ';'
- @{ hexflag = 0; @}
- ;
- @end example
- If there is a syntax error in the middle of a @samp{hex (@var{expr})}
- construct, this error rule will apply, and then the action for the
- completed @samp{hex (@var{expr})} will never run. So @code{hexflag} would
- remain set for the entire rest of the input, or until the next @code{hex}
- keyword, causing identifiers to be misinterpreted as integers.
- To avoid this problem the error recovery rule itself clears @code{hexflag}.
- There may also be an error recovery rule that works within expressions.
- For example, there could be a rule which applies within parentheses
- and skips to the close-parenthesis:
- @example
- @group
- expr: @dots{}
- | '(' expr ')'
- @{ $$ = $2; @}
- | '(' error ')'
- @dots{}
- @end group
- @end example
- If this rule acts within the @code{hex} construct, it is not going to abort
- that construct (since it applies to an inner level of parentheses within
- the construct). Therefore, it should not clear the flag: the rest of
- the @code{hex} construct should be parsed with the flag still in effect.
- What if there is an error recovery rule which might abort out of the
- @code{hex} construct or might not, depending on circumstances? There is no
- way you can write the action to determine whether a @code{hex} construct is
- being aborted or not. So if you are using a lexical tie-in, you had better
- make sure your error recovery rules are not of this kind. Each rule must
- be such that you can be sure that it always will, or always won't, have to
- clear the flag.
- @node Debugging, Invocation, Context Dependency, Top
- @chapter Debugging Your Parser
- @findex YYDEBUG
- @findex yydebug
- @cindex debugging
- @cindex tracing the parser
- If a Bison grammar compiles properly but doesn't do what you want when it
- runs, the @code{yydebug} parser-trace feature can help you figure out why.
- To enable compilation of trace facilities, you must define the macro
- @code{YYDEBUG} when you compile the parser. You could use
- @samp{-DYYDEBUG=1} as a compiler option or you could put @samp{#define
- YYDEBUG 1} in the C declarations section of the grammar file
- (@pxref{C Declarations, ,The C Declarations Section}). Alternatively, use the @samp{-t} option when
- you run Bison (@pxref{Invocation, ,Invoking Bison}). We always define @code{YYDEBUG} so that
- debugging is always possible.
- The trace facility uses @code{stderr}, so you must add @w{@code{#include
- <stdio.h>}} to the C declarations section unless it is already there.
- Once you have compiled the program with trace facilities, the way to
- request a trace is to store a nonzero value in the variable @code{yydebug}.
- You can do this by making the C code do it (in @code{main}, perhaps), or
- you can alter the value with a C debugger.
- Each step taken by the parser when @code{yydebug} is nonzero produces a
- line or two of trace information, written on @code{stderr}. The trace
- messages tell you these things:
- @itemize @bullet
- @item
- Each time the parser calls @code{yylex}, what kind of token was read.
- @item
- Each time a token is shifted, the depth and complete contents of the
- state stack (@pxref{Parser States}).
- @item
- Each time a rule is reduced, which rule it is, and the complete contents
- of the state stack afterward.
- @end itemize
- To make sense of this information, it helps to refer to the listing file
- produced by the Bison @samp{-v} option (@pxref{Invocation, ,Invoking Bison}). This file
- shows the meaning of each state in terms of positions in various rules, and
- also what each state will do with each possible input token. As you read
- the successive trace messages, you can see that the parser is functioning
- according to its specification in the listing file. Eventually you will
- arrive at the place where something undesirable happens, and you will see
- which parts of the grammar are to blame.
- The parser file is a C program and you can use C debuggers on it, but it's
- not easy to interpret what it is doing. The parser function is a
- finite-state machine interpreter, and aside from the actions it executes
- the same code over and over. Only the values of variables show where in
- the grammar it is working.
- @findex YYPRINT
- The debugging information normally gives the token type of each token
- read, but not its semantic value. You can optionally define a macro
- named @code{YYPRINT} to provide a way to print the value. If you define
- @code{YYPRINT}, it should take three arguments. The parser will pass a
- standard I/O stream, the numeric code for the token type, and the token
- value (from @code{yylval}).
- Here is an example of @code{YYPRINT} suitable for the multi-function
- calculator (@pxref{Mfcalc Decl, ,Declarations for @code{mfcalc}}):
- @smallexample
- #define YYPRINT(file, type, value) yyprint (file, type, value)
- static void
- yyprint (file, type, value)
- FILE *file;
- int type;
- YYSTYPE value;
- @{
- if (type == VAR)
- fprintf (file, " %s", value.tptr->name);
- else if (type == NUM)
- fprintf (file, " %d", value.val);
- @}
- @end smallexample
- @node Invocation, Table of Symbols, Debugging, Top
- @chapter Invoking Bison
- @cindex invoking Bison
- @cindex Bison invocation
- @cindex options for invoking Bison
- The usual way to invoke Bison is as follows:
- @example
- bison @var{infile}
- @end example
- Here @var{infile} is the grammar file name, which usually ends in
- @samp{.y}. The parser file's name is made by replacing the @samp{.y}
- with @samp{.tab.c}. Thus, the @samp{bison foo.y} filename yields
- @file{foo.tab.c}, and the @samp{bison hack/foo.y} filename yields
- @file{hack/foo.tab.c}.@refill
- @menu
- * Bison Options:: All the options described in detail,
- in alphabetical order by short options.
- * Option Cross Key:: Alphabetical list of long options.
- * VMS Invocation:: Bison command syntax on VMS.
- @end menu
- @node Bison Options, Option Cross Key, , Invocation
- @section Bison Options
- Bison supports both traditional single-letter options and mnemonic long
- option names. Long option names are indicated with @samp{--} instead of
- @samp{-}. Abbreviations for option names are allowed as long as they
- are unique. When a long option takes an argument, like
- @samp{--file-prefix}, connect the option name and the argument with
- @samp{=}.
- Here is a list of options that can be used with Bison, alphabetized by
- short option. It is followed by a cross key alphabetized by long
- option.
- @table @samp
- @item -b @var{file-prefix}
- @itemx --file-prefix=@var{prefix}
- Specify a prefix to use for all Bison output file names. The names are
- chosen as if the input file were named @file{@var{prefix}.c}.
- @item -d
- @itemx --defines
- Write an extra output file containing macro definitions for the token
- type names defined in the grammar and the semantic value type
- @code{YYSTYPE}, as well as a few @code{extern} variable declarations.
- If the parser output file is named @file{@var{name}.c} then this file
- is named @file{@var{name}.h}.@refill
- This output file is essential if you wish to put the definition of
- @code{yylex} in a separate source file, because @code{yylex} needs to
- be able to refer to token type codes and the variable
- @code{yylval}. @xref{Token Values, ,Semantic Values of Tokens}.@refill
- @item -l
- @itemx --no-lines
- Don't put any @code{#line} preprocessor commands in the parser file.
- Ordinarily Bison puts them in the parser file so that the C compiler
- and debuggers will associate errors with your source file, the
- grammar file. This option causes them to associate errors with the
- parser file, treating it an independent source file in its own right.
- @item -o @var{outfile}
- @itemx --output-file=@var{outfile}
- Specify the name @var{outfile} for the parser file.
- The other output files' names are constructed from @var{outfile}
- as described under the @samp{-v} and @samp{-d} switches.
- @item -p @var{prefix}
- @itemx --name-prefix=@var{prefix}
- Rename the external symbols used in the parser so that they start with
- @var{prefix} instead of @samp{yy}. The precise list of symbols renamed
- is @code{yyparse}, @code{yylex}, @code{yyerror}, @code{yynerrs},
- @code{yylval}, @code{yychar} and @code{yydebug}.
- For example, if you use @samp{-p c}, the names become @code{cparse},
- @code{clex}, and so on.
- @xref{Multiple Parsers, ,Multiple Parsers in the Same Program}.
- @item -t
- @itemx --debug
- Output a definition of the macro @code{YYDEBUG} into the parser file,
- so that the debugging facilities are compiled. @xref{Debugging, ,Debugging Your Parser}.
- @item -v
- @itemx --verbose
- Write an extra output file containing verbose descriptions of the
- parser states and what is done for each type of look-ahead token in
- that state.
- This file also describes all the conflicts, both those resolved by
- operator precedence and the unresolved ones.
- The file's name is made by removing @samp{.tab.c} or @samp{.c} from
- the parser output file name, and adding @samp{.output} instead.@refill
- Therefore, if the input file is @file{foo.y}, then the parser file is
- called @file{foo.tab.c} by default. As a consequence, the verbose
- output file is called @file{foo.output}.@refill
- @item -V
- @itemx --version
- Print the version number of Bison and exit.
- @item -h
- @itemx --help
- Print a summary of the command-line options to Bison and exit.
- @need 1750
- @item -y
- @itemx --yacc
- @itemx --fixed-output-files
- Equivalent to @samp{-o y.tab.c}; the parser output file is called
- @file{y.tab.c}, and the other outputs are called @file{y.output} and
- @file{y.tab.h}. The purpose of this switch is to imitate Yacc's output
- file name conventions. Thus, the following shell script can substitute
- for Yacc:@refill
- @example
- bison -y $*
- @end example
- @end table
- @node Option Cross Key, VMS Invocation, Bison Options, Invocation
- @section Option Cross Key
- Here is a list of options, alphabetized by long option, to help you find
- the corresponding short option.
- @tex
- \def\leaderfill{\leaders\hbox to 1em{\hss.\hss}\hfill}
- {\tt
- \line{ --debug \leaderfill -t}
- \line{ --defines \leaderfill -d}
- \line{ --file-prefix \leaderfill -b}
- \line{ --fixed-output-files \leaderfill -y}
- \line{ --help \leaderfill -h}
- \line{ --name-prefix \leaderfill -p}
- \line{ --no-lines \leaderfill -l}
- \line{ --output-file \leaderfill -o}
- \line{ --verbose \leaderfill -v}
- \line{ --version \leaderfill -V}
- \line{ --yacc \leaderfill -y}
- }
- @end tex
- @ifinfo
- @example
- --debug -t
- --defines -d
- --file-prefix=@var{prefix} -b @var{file-prefix}
- --fixed-output-files --yacc -y
- --help -h
- --name-prefix -p
- --no-lines -l
- --output-file=@var{outfile} -o @var{outfile}
- --verbose -v
- --version -V
- @end example
- @end ifinfo
- @node VMS Invocation, , Option Cross Key, Invocation
- @section Invoking Bison under VMS
- @cindex invoking Bison under VMS
- @cindex VMS
- The command line syntax for Bison on VMS is a variant of the usual
- Bison command syntax---adapted to fit VMS conventions.
- To find the VMS equivalent for any Bison option, start with the long
- option, and substitute a @samp{/} for the leading @samp{--}, and
- substitute a @samp{_} for each @samp{-} in the name of the long option.
- For example, the following invocation under VMS:
- @example
- bison /debug/name_prefix=bar foo.y
- @end example
- @noindent
- is equivalent to the following command under POSIX.
- @example
- bison --debug --name-prefix=bar foo.y
- @end example
- The VMS file system does not permit filenames such as
- @file{foo.tab.c}. In the above example, the output file
- would instead be named @file{foo_tab.c}.
- @node Table of Symbols, Glossary, Invocation, Top
- @appendix Bison Symbols
- @cindex Bison symbols, table of
- @cindex symbols in Bison, table of
- @table @code
- @item error
- A token name reserved for error recovery. This token may be used in
- grammar rules so as to allow the Bison parser to recognize an error in
- the grammar without halting the process. In effect, a sentence
- containing an error may be recognized as valid. On a parse error, the
- token @code{error} becomes the current look-ahead token. Actions
- corresponding to @code{error} are then executed, and the look-ahead
- token is reset to the token that originally caused the violation.
- @xref{Error Recovery}.
- @item YYABORT
- Macro to pretend that an unrecoverable syntax error has occurred, by
- making @code{yyparse} return 1 immediately. The error reporting
- function @code{yyerror} is not called. @xref{Parser Function, ,The Parser Function @code{yyparse}}.
- @item YYACCEPT
- Macro to pretend that a complete utterance of the language has been
- read, by making @code{yyparse} return 0 immediately.
- @xref{Parser Function, ,The Parser Function @code{yyparse}}.
- @item YYBACKUP
- Macro to discard a value from the parser stack and fake a look-ahead
- token. @xref{Action Features, ,Special Features for Use in Actions}.
- @item YYERROR
- Macro to pretend that a syntax error has just been detected: call
- @code{yyerror} and then perform normal error recovery if possible
- (@pxref{Error Recovery}), or (if recovery is impossible) make
- @code{yyparse} return 1. @xref{Error Recovery}.
- @item YYERROR_VERBOSE
- Macro that you define with @code{#define} in the Bison declarations
- section to request verbose, specific error message strings when
- @code{yyerror} is called.
- @item YYINITDEPTH
- Macro for specifying the initial size of the parser stack.
- @xref{Stack Overflow}.
- @item YYLEX_PARAM
- Macro for specifying an extra argument (or list of extra arguments) for
- @code{yyparse} to pass to @code{yylex}. @xref{Pure Calling,, Calling
- Conventions for Pure Parsers}.
- @item YYLTYPE
- Macro for the data type of @code{yylloc}; a structure with four
- members. @xref{Token Positions, ,Textual Positions of Tokens}.
- @item YYMAXDEPTH
- Macro for specifying the maximum size of the parser stack.
- @xref{Stack Overflow}.
- @item YYPARSE_PARAM
- Macro for specifying the name of a parameter that @code{yyparse} should
- accept. @xref{Pure Calling,, Calling Conventions for Pure Parsers}.
- @item YYRECOVERING
- Macro whose value indicates whether the parser is recovering from a
- syntax error. @xref{Action Features, ,Special Features for Use in Actions}.
- @item YYSTYPE
- Macro for the data type of semantic values; @code{int} by default.
- @xref{Value Type, ,Data Types of Semantic Values}.
- @item yychar
- External integer variable that contains the integer value of the
- current look-ahead token. (In a pure parser, it is a local variable
- within @code{yyparse}.) Error-recovery rule actions may examine this
- variable. @xref{Action Features, ,Special Features for Use in Actions}.
- @item yyclearin
- Macro used in error-recovery rule actions. It clears the previous
- look-ahead token. @xref{Error Recovery}.
- @item yydebug
- External integer variable set to zero by default. If @code{yydebug}
- is given a nonzero value, the parser will output information on input
- symbols and parser action. @xref{Debugging, ,Debugging Your Parser}.
- @item yyerrok
- Macro to cause parser to recover immediately to its normal mode
- after a parse error. @xref{Error Recovery}.
- @item yyerror
- User-supplied function to be called by @code{yyparse} on error. The
- function receives one argument, a pointer to a character string
- containing an error message. @xref{Error Reporting, ,The Error Reporting Function @code{yyerror}}.
- @item yylex
- User-supplied lexical analyzer function, called with no arguments
- to get the next token. @xref{Lexical, ,The Lexical Analyzer Function @code{yylex}}.
- @item yylval
- External variable in which @code{yylex} should place the semantic
- value associated with a token. (In a pure parser, it is a local
- variable within @code{yyparse}, and its address is passed to
- @code{yylex}.) @xref{Token Values, ,Semantic Values of Tokens}.
- @item yylloc
- External variable in which @code{yylex} should place the line and
- column numbers associated with a token. (In a pure parser, it is a
- local variable within @code{yyparse}, and its address is passed to
- @code{yylex}.) You can ignore this variable if you don't use the
- @samp{@@} feature in the grammar actions. @xref{Token Positions, ,Textual Positions of Tokens}.
- @item yynerrs
- Global variable which Bison increments each time there is a parse
- error. (In a pure parser, it is a local variable within
- @code{yyparse}.) @xref{Error Reporting, ,The Error Reporting Function @code{yyerror}}.
- @item yyparse
- The parser function produced by Bison; call this function to start
- parsing. @xref{Parser Function, ,The Parser Function @code{yyparse}}.
- @item %left
- Bison declaration to assign left associativity to token(s).
- @xref{Precedence Decl, ,Operator Precedence}.
- @item %nonassoc
- Bison declaration to assign nonassociativity to token(s).
- @xref{Precedence Decl, ,Operator Precedence}.
- @item %prec
- Bison declaration to assign a precedence to a specific rule.
- @xref{Contextual Precedence, ,Context-Dependent Precedence}.
- @item %pure_parser
- Bison declaration to request a pure (reentrant) parser.
- @xref{Pure Decl, ,A Pure (Reentrant) Parser}.
- @item %right
- Bison declaration to assign right associativity to token(s).
- @xref{Precedence Decl, ,Operator Precedence}.
- @item %start
- Bison declaration to specify the start symbol. @xref{Start Decl, ,The Start-Symbol}.
- @item %token
- Bison declaration to declare token(s) without specifying precedence.
- @xref{Token Decl, ,Token Type Names}.
- @item %type
- Bison declaration to declare nonterminals. @xref{Type Decl, ,Nonterminal Symbols}.
- @item %union
- Bison declaration to specify several possible data types for semantic
- values. @xref{Union Decl, ,The Collection of Value Types}.
- @end table
- These are the punctuation and delimiters used in Bison input:
- @table @samp
- @item %%
- Delimiter used to separate the grammar rule section from the
- Bison declarations section or the additional C code section.
- @xref{Grammar Layout, ,The Overall Layout of a Bison Grammar}.
- @item %@{ %@}
- All code listed between @samp{%@{} and @samp{%@}} is copied directly
- to the output file uninterpreted. Such code forms the ``C
- declarations'' section of the input file. @xref{Grammar Outline, ,Outline of a Bison Grammar}.
- @item /*@dots{}*/
- Comment delimiters, as in C.
- @item :
- Separates a rule's result from its components. @xref{Rules, ,Syntax of Grammar Rules}.
- @item ;
- Terminates a rule. @xref{Rules, ,Syntax of Grammar Rules}.
- @item |
- Separates alternate rules for the same result nonterminal.
- @xref{Rules, ,Syntax of Grammar Rules}.
- @end table
- @node Glossary, Index, Table of Symbols, Top
- @appendix Glossary
- @cindex glossary
- @table @asis
- @item Backus-Naur Form (BNF)
- Formal method of specifying context-free grammars. BNF was first used
- in the @cite{ALGOL-60} report, 1963. @xref{Language and Grammar, ,Languages and Context-Free Grammars}.
- @item Context-free grammars
- Grammars specified as rules that can be applied regardless of context.
- Thus, if there is a rule which says that an integer can be used as an
- expression, integers are allowed @emph{anywhere} an expression is
- permitted. @xref{Language and Grammar, ,Languages and Context-Free Grammars}.
- @item Dynamic allocation
- Allocation of memory that occurs during execution, rather than at
- compile time or on entry to a function.
- @item Empty string
- Analogous to the empty set in set theory, the empty string is a
- character string of length zero.
- @item Finite-state stack machine
- A ``machine'' that has discrete states in which it is said to exist at
- each instant in time. As input to the machine is processed, the
- machine moves from state to state as specified by the logic of the
- machine. In the case of the parser, the input is the language being
- parsed, and the states correspond to various stages in the grammar
- rules. @xref{Algorithm, ,The Bison Parser Algorithm }.
- @item Grouping
- A language construct that is (in general) grammatically divisible;
- for example, `expression' or `declaration' in C.
- @xref{Language and Grammar, ,Languages and Context-Free Grammars}.
- @item Infix operator
- An arithmetic operator that is placed between the operands on which it
- performs some operation.
- @item Input stream
- A continuous flow of data between devices or programs.
- @item Language construct
- One of the typical usage schemas of the language. For example, one of
- the constructs of the C language is the @code{if} statement.
- @xref{Language and Grammar, ,Languages and Context-Free Grammars}.
- @item Left associativity
- Operators having left associativity are analyzed from left to right:
- @samp{a+b+c} first computes @samp{a+b} and then combines with
- @samp{c}. @xref{Precedence, ,Operator Precedence}.
- @item Left recursion
- A rule whose result symbol is also its first component symbol;
- for example, @samp{expseq1 : expseq1 ',' exp;}. @xref{Recursion, ,Recursive Rules}.
- @item Left-to-right parsing
- Parsing a sentence of a language by analyzing it token by token from
- left to right. @xref{Algorithm, ,The Bison Parser Algorithm }.
- @item Lexical analyzer (scanner)
- A function that reads an input stream and returns tokens one by one.
- @xref{Lexical, ,The Lexical Analyzer Function @code{yylex}}.
- @item Lexical tie-in
- A flag, set by actions in the grammar rules, which alters the way
- tokens are parsed. @xref{Lexical Tie-ins}.
- @item Look-ahead token
- A token already read but not yet shifted. @xref{Look-Ahead, ,Look-Ahead Tokens}.
- @item LALR(1)
- The class of context-free grammars that Bison (like most other parser
- generators) can handle; a subset of LR(1). @xref{Mystery Conflicts, ,
- Mysterious Reduce/Reduce Conflicts}.
- @item LR(1)
- The class of context-free grammars in which at most one token of
- look-ahead is needed to disambiguate the parsing of any piece of input.
- @item Nonterminal symbol
- A grammar symbol standing for a grammatical construct that can
- be expressed through rules in terms of smaller constructs; in other
- words, a construct that is not a token. @xref{Symbols}.
- @item Parse error
- An error encountered during parsing of an input stream due to invalid
- syntax. @xref{Error Recovery}.
- @item Parser
- A function that recognizes valid sentences of a language by analyzing
- the syntax structure of a set of tokens passed to it from a lexical
- analyzer.
- @item Postfix operator
- An arithmetic operator that is placed after the operands upon which it
- performs some operation.
- @item Reduction
- Replacing a string of nonterminals and/or terminals with a single
- nonterminal, according to a grammar rule. @xref{Algorithm, ,The Bison Parser Algorithm }.
- @item Reentrant
- A reentrant subprogram is a subprogram which can be in invoked any
- number of times in parallel, without interference between the various
- invocations. @xref{Pure Decl, ,A Pure (Reentrant) Parser}.
- @item Reverse polish notation
- A language in which all operators are postfix operators.
- @item Right recursion
- A rule whose result symbol is also its last component symbol;
- for example, @samp{expseq1: exp ',' expseq1;}. @xref{Recursion, ,Recursive Rules}.
- @item Semantics
- In computer languages, the semantics are specified by the actions
- taken for each instance of the language, i.e., the meaning of
- each statement. @xref{Semantics, ,Defining Language Semantics}.
- @item Shift
- A parser is said to shift when it makes the choice of analyzing
- further input from the stream rather than reducing immediately some
- already-recognized rule. @xref{Algorithm, ,The Bison Parser Algorithm }.
- @item Single-character literal
- A single character that is recognized and interpreted as is.
- @xref{Grammar in Bison, ,From Formal Rules to Bison Input}.
- @item Start symbol
- The nonterminal symbol that stands for a complete valid utterance in
- the language being parsed. The start symbol is usually listed as the
- first nonterminal symbol in a language specification.
- @xref{Start Decl, ,The Start-Symbol}.
- @item Symbol table
- A data structure where symbol names and associated data are stored
- during parsing to allow for recognition and use of existing
- information in repeated uses of a symbol. @xref{Multi-function Calc}.
- @item Token
- A basic, grammatically indivisible unit of a language. The symbol
- that describes a token in the grammar is a terminal symbol.
- The input of the Bison parser is a stream of tokens which comes from
- the lexical analyzer. @xref{Symbols}.
- @item Terminal symbol
- A grammar symbol that has no rules in the grammar and therefore
- is grammatically indivisible. The piece of text it represents
- is a token. @xref{Language and Grammar, ,Languages and Context-Free Grammars}.
- @end table
- @node Index, , Glossary, Top
- @unnumbered Index
- @printindex cp
- @contents
- @bye
- @c old menu
- * Introduction::
- * Conditions::
- * Copying:: The GNU General Public License says
- how you can copy and share Bison
- Tutorial sections:
- * Concepts:: Basic concepts for understanding Bison.
- * Examples:: Three simple explained examples of using Bison.
- Reference sections:
- * Grammar File:: Writing Bison declarations and rules.
- * Interface:: C-language interface to the parser function @code{yyparse}.
- * Algorithm:: How the Bison parser works at run-time.
- * Error Recovery:: Writing rules for error recovery.
- * Context Dependency::What to do if your language syntax is too
- messy for Bison to handle straightforwardly.
- * Debugging:: Debugging Bison parsers that parse wrong.
- * Invocation:: How to run Bison (to produce the parser source file).
- * Table of Symbols:: All the keywords of the Bison language are explained.
- * Glossary:: Basic concepts are explained.
- * Index:: Cross-references to the text.
|