reset.rt 142 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250
  1. ### Useful constants
  2. NULL => 0;
  3. EOF => -1;
  4. ## Booleans
  5. FALSE => 0;
  6. TRUE => 1;
  7. ### System calls
  8. SYS_exit => 1;
  9. SYS_read => 3;
  10. SYS_write => 4;
  11. SYS_open => 5;
  12. SYS_close => 6;
  13. SYS_brk => 45;
  14. SYS_munmap => 91;
  15. SYS_getcwd => 183;
  16. SYS_mmap2 => 192;
  17. ### System call functions
  18. ## Exit with code
  19. # a0 - exit code
  20. exit : (a0) { syscall(SYS_exit, a0); }
  21. ## Read from file
  22. # a0 - file
  23. # a1 - buffer
  24. # a2 - count
  25. # @return read count
  26. read : (a0, a1, a2) { return(syscall(SYS_read, a0, a1, a2)); }
  27. ## Write to file
  28. # a0 - file
  29. # a1 - buffer
  30. # a2 - count
  31. # @return write count
  32. write : (a0, a1, a2) { return(syscall(SYS_write, a0, a1, a2)); }
  33. ## Open file
  34. # a0 - path
  35. # a1 - flags
  36. # a2 - mode
  37. # @return fd or error
  38. open : (a0, a1, a2) { return(syscall(SYS_open, a0, a1, a2)); }
  39. ## Close file
  40. # a0 - fd
  41. # @return result code
  42. close : (a0) { return(syscall(SYS_close, a0)); }
  43. ## Set/return heap segment limit
  44. # a0 - address (set) or NULL (return)
  45. # @return current heap limit
  46. brk : (a0) { return(syscall(SYS_brk, a0)); }
  47. ## Unmap memory page
  48. # a0 - address
  49. # a1 - size
  50. # @return zero on success, negative on error
  51. munmap : (a0, a1) { return(syscall(SYS_munmap, a0, a1)); }
  52. ## Map memory page
  53. # a0 - address
  54. # a1 - size
  55. # a2 - protection flags
  56. # a3 - map flags
  57. # a4 - fd
  58. # a5 - offset
  59. mmap2 : (a0, a1, a2, a3, a4, a5) {
  60. return(syscall(SYS_mmap2, a0, a1, a2, a3, a4, a5));
  61. }
  62. ## Get current work directory
  63. # a0 - buffer
  64. # a1 - buffer size
  65. getcwd : (a0, a1) { return(syscall(SYS_getcwd, a0, a1)); }
  66. ### Useful functions
  67. ## Align number up to another number (which is power of 2)
  68. # a0 - number to be aligned
  69. # a1 - number align to
  70. # @return aligned number
  71. align : (a0, a1) {
  72. return((a0 + a1 - 1) & ~(a1 - 1));
  73. }
  74. ## Align number down to another number (which is power of 2)
  75. # a0 - number to be aligned
  76. # a1 - number align to
  77. # @return aligned number
  78. align_down : (a0, a1) {
  79. return(a0 - (a0 & (a1 - 1)));
  80. }
  81. ### Conversions
  82. ## Unsigned int to string
  83. # a0 - value
  84. # a1 - result buffer
  85. # a2 - base
  86. utoa_digits : "0123456789abcdefghijklmnopqrstuvwxyz";
  87. utoa : (a0, a1, a2) {
  88. # x0 - i
  89. # x1 - j
  90. # x2 - tmp
  91. allocate(3);
  92. x0 = 0;
  93. x1 = 0;
  94. # Writing first digit
  95. writechar(a1, x0, readchar(utoa_digits, a0 % a2));
  96. # Going to next unit
  97. a0 = a0 / a2;
  98. # Going to next char position
  99. x0 = x0 + 1;
  100. # Iterating while value is not zero
  101. while (a0 != 0) {
  102. # Writing needed char to result buffer
  103. writechar(a1, x0, readchar(utoa_digits, a0 % a2));
  104. # Going to next unit
  105. a0 = a0 / a2;
  106. # Going to next char position
  107. x0 = x0 + 1;
  108. }
  109. # Writing '\0' to the end of string
  110. writechar(a1, x0, '\0');
  111. # Reversing string
  112. while (x1 < x0) {
  113. # Starting from penultimate char
  114. x0 = x0 - 1;
  115. # Storing char to temp
  116. x2 = readchar(a1, x1);
  117. # Moving a1[x0] to a1[x1]
  118. writechar(a1, x1, readchar(a1, x0));
  119. # Moving temp to a1[x0]
  120. writechar(a1, x0, x2);
  121. # Reversing next chars
  122. x1 = x1 + 1;
  123. }
  124. # Returning result buffer
  125. return(a1);
  126. }
  127. ### Memory Management
  128. ## Memory page size
  129. PAGE_SIZE => 4096; # 0x1000
  130. ## Heap segment start and end
  131. free_start : NULL;
  132. free_end : NULL;
  133. ## Allocate new memory page
  134. allocate_new_page : () {
  135. # If free_end is NULL, finding brk(NULL)
  136. if (free_end == NULL) { free_end = brk(NULL); free_start = free_end; }
  137. # Moving free memory segment pointers
  138. free_end = brk(free_end + PAGE_SIZE);
  139. }
  140. ## Allocate memory
  141. # a0 - size
  142. # @return allocated memory
  143. malloc : (a0) {
  144. # x0 - address
  145. allocate(1);
  146. # Checking if we need to allocate more memory
  147. while (free_start + a0 > free_end) { allocate_new_page(); }
  148. # Setting address
  149. x0 = free_start;
  150. # Moving free segment pointer
  151. free_start = x0 + a0;
  152. # Returning memory address
  153. return(x0);
  154. }
  155. ## Free memory
  156. # a0 - address
  157. # NOTE: in this implementation of memory allocator it is impossible
  158. free : (a0) {}
  159. ## Allocate cleared memory
  160. # a0 - size
  161. # @return allocated and cleared memory
  162. calloc : (a0) {
  163. # x0 - address
  164. allocate(1);
  165. # Allocating memory
  166. x0 = malloc(a0);
  167. # Clearing memory with zeroes
  168. memset(0, x0, a0);
  169. return(x0);
  170. }
  171. ## Reallocate memory with new size
  172. # a0 - allocated memory
  173. # a1 - old size
  174. # a2 - new size
  175. # @return new allocated memory
  176. realloc : (a0, a1, a2) {
  177. # x0 - old size/min size
  178. # x1 - new allocated memory space
  179. allocate(2);
  180. # Finding out the amount of memory to be copied
  181. x0 = a1;
  182. if (a2 < a1) { x0 = a2; }
  183. # Allocating new memory
  184. x1 = calloc(a2);
  185. # Copying data to new memory space
  186. memcpy(a0, x1, x0);
  187. # Freeing old memory space
  188. free(a0);
  189. return(x1);
  190. }
  191. ### Memory/String operations
  192. ## Fill memory block with value (char)
  193. # a0 - value
  194. # a1 - address
  195. # a2 - size
  196. memset : (a0, a1, a2) {
  197. # x0 - pos
  198. allocate(1);
  199. x0 = 0;
  200. # Writing value to memory block
  201. while (x0 < a2) {
  202. # Writing byte
  203. writechar(a1, x0, a0);
  204. # Moving to next byte
  205. x0 = x0 + 1;
  206. }
  207. }
  208. ## Copy data from one memory block to another
  209. # a0 - src
  210. # a1 - dest
  211. # a2 - size
  212. memcpy : (a0, a1, a2) {
  213. # x0 - pos
  214. allocate(1);
  215. x0 = 0;
  216. while (x0 < a2) {
  217. # Writing byte
  218. writechar(a1, x0, readchar(a0, x0));
  219. # Moving to next byte
  220. x0 = x0 + 1;
  221. }
  222. }
  223. ## Get length of string
  224. # a0 - string
  225. # @return string length
  226. strlen : (a0) {
  227. # x0 - string length
  228. allocate(1);
  229. x0 = 0;
  230. # While char is not NULL, incrementing length
  231. while (readchar(a0, x0) != '\0') { x0 = x0 + 1; }
  232. return(x0);
  233. }
  234. ## Get length of string counting escaped characters as one
  235. # a0 -string
  236. # @return string length
  237. stresclen : (a0) {
  238. # x0 - string length
  239. allocate(1);
  240. x0 = 0;
  241. # While char is not NULL, incrementing length
  242. while (readchar(a0, 0) != '\0') {
  243. if (readchar(a0, 0) == '\\') { a0 = a0 + 1; }
  244. a0 = a0 + 1;
  245. x0 = x0 + 1;
  246. }
  247. return(x0);
  248. }
  249. ## Copy string
  250. # a0 - src
  251. # a1 - dest
  252. # @return char count
  253. strcpy : (a0, a1) {
  254. # x0 - pos
  255. # x1 - char
  256. allocate(2);
  257. x0 = 0;
  258. # Reading char from string
  259. x1 = readchar(a0, x0);
  260. # Checking if char is not NULL
  261. while (x1 != '\0') {
  262. # Copying char from one string to another
  263. writechar(a1, x0, x1);
  264. # Moving to next char
  265. x0 = x0 + 1;
  266. # Reading next char
  267. x1 = readchar(a0, x0);
  268. }
  269. # Writing NULL character
  270. writechar(a1, x0, '\0');
  271. # Returning number of copied characters
  272. return(x0);
  273. }
  274. ## Compare string
  275. # a0 - first string
  276. # a1 - second string
  277. # @return difference between different chars (0 if strings are equal)
  278. strcmp : (a0, a1) {
  279. # x0 - pos
  280. # x1 - char from first string
  281. # x2 - char from second string
  282. allocate(3);
  283. x0 = 0;
  284. # Reading first chars from strings
  285. x1 = readchar(a0, x0);
  286. x2 = readchar(a1, x0);
  287. # Comparing char by char
  288. while (x1 != '\0') {
  289. if (x1 != x2) { return(x1 - x2); }
  290. # Moving to next char
  291. x0 = x0 + 1;
  292. # Reading next char
  293. x1 = readchar(a0, x0);
  294. x2 = readchar(a1, x0);
  295. }
  296. # As first string ended, finding the difference between ending chars
  297. return(x1 - x2);
  298. }
  299. ## Duplicate string
  300. # a0 - string
  301. # @return new string
  302. strdup : (a0) {
  303. # x0 - string length
  304. # x1 - new string
  305. allocate(2);
  306. # Allocating new string
  307. x0 = strlen(a0);
  308. x1 = calloc(x0 + 1);
  309. # Copying data to new string
  310. memcpy(a0, x1, x0);
  311. # Return new string
  312. return(x1);
  313. }
  314. ## Duplicate string literal (without double quotes)
  315. # a0 - string
  316. # @return new string
  317. strlitdup : (a0) {
  318. # x0 - string length
  319. # x1 - new string
  320. allocate(2);
  321. # Calculating length
  322. x0 = strlen(a0) - 2;
  323. # Allocating string
  324. x1 = calloc(x0 + 1);
  325. # Copying data to new string
  326. memcpy(a0 + 1, x1, x0);
  327. # Return new string
  328. return(x1);
  329. }
  330. ### Tuples
  331. ## Allocate tuple
  332. # a0 - size
  333. # @return allocated tuple
  334. allocate_tuple : (a0) { return(malloc(4 * a0)); }
  335. ## Make tuple with 1 value
  336. # a0 - value
  337. # @return tuple
  338. tuple1 : (a0) {
  339. # x0 - tuple
  340. allocate(1);
  341. x0 = allocate_tuple(1);
  342. # Fill tuple
  343. x0[0] = a0;
  344. return(x0);
  345. }
  346. ## Make tuple with 2 values
  347. # a0, a1 - values
  348. # @return tuple
  349. tuple2 : (a0, a1) {
  350. # x0 - tuple
  351. allocate(1);
  352. x0 = allocate_tuple(2);
  353. # Fill tuple
  354. x0[0] = a0;
  355. x0[1] = a1;
  356. return(x0);
  357. }
  358. ## Make tuple with 3 values
  359. # a0, a1, a2 - values
  360. # @return tuple
  361. tuple3 : (a0, a1, a2) {
  362. # x0 - tuple
  363. allocate(1);
  364. x0 = allocate_tuple(3);
  365. # Fill tuple
  366. x0[0] = a0;
  367. x0[1] = a1;
  368. x0[2] = a2;
  369. return(x0);
  370. }
  371. ## Make tuple with 4 values
  372. # a0, a1, a2, a3 - values
  373. # @return tuple
  374. tuple4 : (a0, a1, a2, a3) {
  375. # x0 - tuple
  376. allocate(1);
  377. x0 = allocate_tuple(4);
  378. # Fill tuple
  379. x0[0] = a0;
  380. x0[1] = a1;
  381. x0[2] = a2;
  382. x0[3] = a3;
  383. return(x0);
  384. }
  385. ## Make tuple with 5 values
  386. # a0, a1, a2, a3, a4 - values
  387. # @return tuple
  388. tuple5 : (a0, a1, a2, a3, a4) {
  389. # x0 - tuple
  390. allocate(1);
  391. x0 = allocate_tuple(5);
  392. # Fill tuple
  393. x0[0] = a0;
  394. x0[1] = a1;
  395. x0[2] = a2;
  396. x0[3] = a3;
  397. x0[4] = a4;
  398. return(x0);
  399. }
  400. ## Make tuple with 6 values
  401. # a0, a1, a2, a3, a4, a5 - values
  402. # @return tuple
  403. tuple6 : (a0, a1, a2, a3, a4, a5) {
  404. # x0 - tuple
  405. allocate(1);
  406. x0 = allocate_tuple(6);
  407. # Fill tuple
  408. x0[0] = a0;
  409. x0[1] = a1;
  410. x0[2] = a2;
  411. x0[3] = a3;
  412. x0[4] = a4;
  413. x0[5] = a5;
  414. return(x0);
  415. }
  416. ## Make tuple with 7 values
  417. # a0, a1, a2, a3, a4, a5, a6 - values
  418. # @return tuple
  419. tuple7 : (a0, a1, a2, a3, a4, a5, a6) {
  420. # x0 - tuple
  421. allocate(1);
  422. x0 = allocate_tuple(7);
  423. # Fill tuple
  424. x0[0] = a0;
  425. x0[1] = a1;
  426. x0[2] = a2;
  427. x0[3] = a3;
  428. x0[4] = a4;
  429. x0[5] = a5;
  430. x0[6] = a6;
  431. return(x0);
  432. }
  433. ## Make tuple with 8 values
  434. # a0, a1, a2, a3, a4, a5, a6, a7 - values
  435. # @return tuple
  436. tuple8 : (a0, a1, a2, a3, a4, a5, a6, a7) {
  437. # x0 - tuple
  438. allocate(1);
  439. x0 = allocate_tuple(8);
  440. # Fill tuple
  441. x0[0] = a0;
  442. x0[1] = a1;
  443. x0[2] = a2;
  444. x0[3] = a3;
  445. x0[4] = a4;
  446. x0[5] = a5;
  447. x0[6] = a6;
  448. x0[7] = a7;
  449. return(x0);
  450. }
  451. ### Linked List
  452. ## List parts
  453. LIST_NEXT => 0;
  454. LIST_VALUE => 1;
  455. ## Create linked list with value
  456. # a0 - value
  457. # @return list
  458. list : (a0) { return(list_insert(NULL, a0)); }
  459. ## Insert value in linked list
  460. # a0 - list
  461. # a1 - value
  462. # @return list with new value
  463. list_insert : (a0, a1) { return(tuple2(a0, a1)); }
  464. ## Get next element of linked list
  465. # a0 - list
  466. # @return list with next value
  467. list_next : (a0) { return(a0[LIST_NEXT]); }
  468. ## Get value of current element of linked list
  469. # a0 - list
  470. # @return current element value
  471. list_value : (a0) { return(a0[LIST_VALUE]); }
  472. ## Remove this element from list
  473. # a0 - list
  474. # @return reduced list
  475. list_pop : (a0) {
  476. # x0 - list continuation
  477. allocate(1);
  478. # Fetching next element
  479. x0 = list_next(a0);
  480. # Deallocating current element
  481. free(a0);
  482. return(x0);
  483. }
  484. ### Vector
  485. ## Vector parts
  486. VEC_SIZE => 0;
  487. VEC_CAP => 1;
  488. VEC_BUF => 2;
  489. ## Create character vector
  490. # a0 - size
  491. # a1 - capacity
  492. # @return character vector
  493. char_vector : (a0, a1) { return(tuple3(a0, a1, calloc(a1))); }
  494. ## Create vector
  495. # a0 - size
  496. # a1 - capacity
  497. # @return vector
  498. vector : (a0, a1) { return(tuple3(a0, a1, calloc(4 * a1))); }
  499. ## Get vector buffer
  500. # a0 - vector
  501. # @return vector buffer
  502. vector_buffer : (a0) { return(a0[VEC_BUF]); }
  503. ## Get vector size
  504. # a0 - vector
  505. # @return vector size
  506. vector_size : (a0) { return(a0[VEC_SIZE]); }
  507. ## Reserve more space for char vector buffer
  508. # a0 - char vector
  509. # a1 - new capacity
  510. char_vector_reserve : (a0, a1) {
  511. # Reserving, if new capacity is bigger
  512. if (a1 > a0[VEC_CAP]) {
  513. # Reallocating buffer
  514. a0[VEC_BUF] = realloc(a0[VEC_BUF], a0[VEC_CAP], a1);
  515. # Setting new capacity
  516. a0[VEC_CAP] = a1;
  517. }
  518. }
  519. ## Reserve more space for vector buffer
  520. # a0 - vector
  521. # a1 - new capacity
  522. vector_reserve : (a0, a1) {
  523. # Reserving, if new capacity is bigger
  524. if (a1 > a0[VEC_CAP]) {
  525. # Reallocating buffer
  526. a0[VEC_BUF] = realloc(a0[VEC_BUF], 4 * a0[VEC_CAP], 4 * a1);
  527. # Setting new capacity
  528. a0[VEC_CAP] = a1;
  529. }
  530. }
  531. ## Resize char vector
  532. # a0 - char vector
  533. # a1 - new size
  534. char_vector_resize : (a0, a1) {
  535. # Reserving, if required
  536. if (a1 > a0[VEC_SIZE]) { char_vector_reserve(a0, a1); }
  537. # Setting new size
  538. a0[VEC_SIZE] = a1;
  539. }
  540. ## Resize vector
  541. # a0 - vector
  542. # a1 - new size
  543. vector_resize : (a0, a1) {
  544. # Reserving, if required
  545. if (a1 > a0[VEC_SIZE]) { vector_reserve(a0, a1); }
  546. # Setting new size
  547. a0[VEC_SIZE] = a1;
  548. }
  549. ## Get value at index in char vector
  550. # a0 - char vector
  551. # a1 - index
  552. # @return char
  553. char_vector_get : (a0, a1) { return(readchar(a0[VEC_BUF], a1)); }
  554. ## Get value at index in vector
  555. # a0 - vector
  556. # a1 - index
  557. # @return value
  558. vector_get : (a0, a1) { return(a0[VEC_BUF][a1]); }
  559. ## Set value at index in char vector
  560. # a0 - char vector
  561. # a1 - index
  562. # a2 - char
  563. char_vector_set : (a0, a1, a2) { writechar(a0[VEC_BUF], a1, a2); }
  564. ## Set value at index in vector
  565. # a0 - vector
  566. # a1 - index
  567. # a2 - value
  568. vector_set : (a0, a1, a2) {
  569. # x0 - buffer
  570. allocate(1);
  571. x0 = a0[VEC_BUF];
  572. x0[a1] = a2;
  573. }
  574. ## Insert value in the end of char vector
  575. # a0 - char vector
  576. # a1 - value
  577. char_vector_push : (a0, a1) {
  578. # If vector buffer is full, reserving more
  579. if (a0[VEC_SIZE] == a0[VEC_CAP]) {
  580. char_vector_reserve(a0, a0[VEC_CAP] * 2);
  581. }
  582. # Putting new value
  583. char_vector_set(a0, a0[VEC_SIZE], a1);
  584. # Incrementing size
  585. a0[VEC_SIZE] = a0[VEC_SIZE] + 1;
  586. }
  587. ## Insert value in the end of vector
  588. # a0 - vector
  589. # a1 - value
  590. vector_push : (a0, a1) {
  591. # If vector buffer is full, reserving more
  592. if (a0[VEC_SIZE] == a0[VEC_CAP]) {
  593. vector_reserve(a0, a0[VEC_CAP] * 2);
  594. }
  595. # Putting new value
  596. vector_set(a0, a0[VEC_SIZE], a1);
  597. # Incrementing size
  598. a0[VEC_SIZE] = a0[VEC_SIZE] + 1;
  599. }
  600. ## Remove and return last value from char vector
  601. # a0 - char vector
  602. # @return char
  603. char_vector_pop : (a0) {
  604. # Decrementing size
  605. a0[VEC_SIZE] = a0[VEC_SIZE] - 1;
  606. # Returning the value
  607. return(char_vector_get(a0, a0[VEC_SIZE]));
  608. }
  609. ## Remove and return last value from vector
  610. # a0 - vector
  611. # @return value
  612. vector_pop : (a0) {
  613. # Decrementing size
  614. a0[VEC_SIZE] = a0[VEC_SIZE] - 1;
  615. # Returning the value
  616. return(vector_get(a0, a0[VEC_SIZE]));
  617. }
  618. ## Destroy vector
  619. # a0 - vector
  620. vector_destroy : (a0) {
  621. # Deallocating buffer
  622. free(a0[VEC_BUF]);
  623. # Deallocating vector
  624. free(a0);
  625. }
  626. ### I/O
  627. ## Open flags
  628. O_RDONLY => 0;
  629. O_WRONLY => 1;
  630. O_RDWR => 2;
  631. O_CREAT => 64;
  632. O_TRUNC => 512;
  633. O_APPEND => 1024;
  634. ## Parts of I/O channels
  635. CHAN_FD => 0;
  636. CHAN_BUF => 1;
  637. CHAN_IDX => 2;
  638. ICHAN_END => 3;
  639. ICHAN_EOF => 4;
  640. ## Buffer sizes
  641. IBUFFER_SIZE => 512;
  642. OBUFFER_SIZE => 512;
  643. ## Standard channels
  644. stdin : NULL;
  645. stdout : NULL;
  646. stderr : NULL;
  647. ## Create new input channel
  648. # a0 - fd
  649. # @return channel
  650. input_chan : (a0) {
  651. # x0 - buffer/channel
  652. allocate(1);
  653. # Allocating buffer
  654. x0 = malloc(IBUFFER_SIZE);
  655. # Creating channel
  656. x0 = tuple5(a0, x0, 0, 0, FALSE);
  657. return(x0);
  658. }
  659. ## Create new input channel based on file
  660. # a0 - file name
  661. # @return channel
  662. input_chan_file : (a0) {
  663. # x0 - fd/channel
  664. allocate(1);
  665. # Opening file with read permission
  666. x0 = open(a0, O_RDONLY, 0);
  667. # Checking for success
  668. assert(x0 > 0, "Couldn't open file input channel");
  669. # Returning new channel
  670. return(input_chan(x0));
  671. }
  672. ## Close input channel
  673. # a0 - channel
  674. input_chan_close : (a0) {
  675. # Closing fd
  676. close(a0[CHAN_FD]);
  677. # Deallocating buffer
  678. free(a0[CHAN_BUF]);
  679. # Deallocating channel
  680. free(a0);
  681. }
  682. ## Create new output channel
  683. # a0 - fd
  684. # @return channel
  685. output_chan : (a0) {
  686. # x0 - buffer/channel
  687. allocate(1);
  688. # Allocating buffer
  689. x0 = malloc(OBUFFER_SIZE);
  690. # Creating channel
  691. x0 = tuple3(a0, x0, 0);
  692. return(x0);
  693. }
  694. ## Create new output channel based on file
  695. # a0 - file name
  696. # @return channel
  697. output_chan_file : (a0) {
  698. # x0 - fd/channel
  699. allocate(1);
  700. # Opening file with read permission
  701. x0 = open(a0, O_WRONLY|O_CREAT|O_TRUNC, 420); # 420 = 0644
  702. # Checking for success
  703. assert(x0 > 0, "Couldn't open file output channel");
  704. # Returning new channel
  705. return(output_chan(x0));
  706. }
  707. ## Close output channel
  708. # a0 - channel
  709. output_chan_close : (a0) {
  710. # Flushing buffer
  711. flush(a0);
  712. # Closing fd
  713. close(a0[CHAN_FD]);
  714. # Deallocating buffer
  715. free(a0[CHAN_BUF]);
  716. # Deallocating channel
  717. free(a0);
  718. }
  719. ## Initialize standard I/O
  720. initialize_io : () {
  721. stdin = input_chan(0); # stdin=0
  722. stdout = output_chan(1); # stdout=1
  723. stderr = output_chan(2); # stderr=2
  724. }
  725. ## Fill input channel
  726. # a0 - channel
  727. fill : (a0) {
  728. # x0 - read count
  729. allocate(1);
  730. # Checking if file is ended
  731. if (a0[ICHAN_EOF]) { return; }
  732. # Checking if buffer is exhausted
  733. if (a0[CHAN_IDX] == a0[ICHAN_END]) {
  734. # Input buffer is empty, refilling
  735. x0 = read(a0[CHAN_FD], a0[CHAN_BUF], IBUFFER_SIZE);
  736. # Resetting index and setting size
  737. a0[CHAN_IDX] = 0;
  738. a0[ICHAN_END] = x0;
  739. # Setting EOF flag, if file is ended
  740. if (a0[ICHAN_END] == 0) { a0[ICHAN_EOF] = TRUE; }
  741. }
  742. }
  743. ## Read char
  744. # a0 - channel
  745. # @return char
  746. fgetc : (a0) {
  747. # x0 - char
  748. allocate(1);
  749. # Filling buffer, if needed
  750. fill(a0);
  751. # Checking if input is ended
  752. if (a0[CHAN_IDX] == a0[ICHAN_END]) { return(EOF); }
  753. # Fetching char
  754. x0 = readchar(a0[CHAN_BUF], a0[CHAN_IDX]);
  755. # Incrementing index and returning char
  756. a0[CHAN_IDX] = a0[CHAN_IDX] + 1;
  757. return(x0);
  758. }
  759. ## Read char from stdin
  760. # @return char
  761. getc : () { return(fgetc(stdin)); }
  762. ## Look ahead next char
  763. # a0 - channel
  764. # a1 - offset
  765. # @return char
  766. fnextc : (a0, a1) {
  767. # Filling buffer, if needed
  768. fill(a0);
  769. # Checking if input is ended
  770. if (a0[CHAN_IDX] + a1 >= a0[ICHAN_END]) { return(EOF); }
  771. # Fetching char
  772. return(readchar(a0[CHAN_BUF], a0[CHAN_IDX] + a1));
  773. }
  774. ## Look ahead next char from stdin
  775. # a0 - offset
  776. # @return char
  777. nextc : (a0) { return(fnextc(stdin, a0)); }
  778. ## Flush output channel
  779. # a0 - channel
  780. flush : (a0) {
  781. # If buffer is not empty
  782. if (a0[CHAN_IDX] > 0) {
  783. write(a0[CHAN_FD], a0[CHAN_BUF], a0[CHAN_IDX]);
  784. a0[CHAN_IDX] = 0;
  785. }
  786. }
  787. ## Put character
  788. # a0 - channel
  789. # a1 - char
  790. fputc : (a0, a1) {
  791. # Writing to buffer
  792. writechar(a0[CHAN_BUF], a0[CHAN_IDX], a1);
  793. # Incrementing index
  794. a0[CHAN_IDX] = a0[CHAN_IDX] + 1;
  795. # Checking if need to flush
  796. if (a0[CHAN_IDX] == OBUFFER_SIZE) { flush(a0); return; }
  797. if (a1 == '\n') { flush(a0); return; }
  798. }
  799. ## Put character in stdout
  800. # a0 - char
  801. putc : (a0) { fputc(stdout, a0); }
  802. ## Put character in stderr
  803. # a0 - char
  804. eputc : (a0) { fputc(stderr, a0); }
  805. ## Put string
  806. # a0 - channel
  807. # a1 - string
  808. fputs : (a0, a1) {
  809. # x0 - char
  810. allocate(1);
  811. # Fetching first char
  812. x0 = readchar(a1, 0);
  813. # Iterating, until we'll find '\0'
  814. while (x0 != '\0') {
  815. # Putting character
  816. fputc(a0, x0);
  817. # Reading next character
  818. a1 = a1 + 1;
  819. x0 = readchar(a1, 0);
  820. }
  821. }
  822. ## Put string in stdout
  823. # a0 - char
  824. puts : (a0) { fputs(stdout, a0); }
  825. ## Put string in stderr
  826. # a0 - char
  827. eputs : (a0) { fputs(stderr, a0); }
  828. ## Put number
  829. # a0 - channel
  830. # a1 - number
  831. # a2 - base
  832. fputn_buffer : NULL;
  833. fputn : (a0, a1, a2) {
  834. # Allocating number buffer, if needed
  835. if (fputn_buffer == NULL) { fputn_buffer = malloc(33); }
  836. # Clearing buffer
  837. memset(0, fputn_buffer, 33);
  838. # Converting number to string
  839. utoa(a1, fputn_buffer, a2);
  840. # Putting string
  841. fputs(a0, fputn_buffer);
  842. }
  843. ## Put decimal number in channel
  844. # a0 - channel
  845. # a1 - number
  846. fputd : (a0, a1) { fputn(a0, a1, 10); }
  847. ## Put hexadecimal number in channel
  848. # a0 - channel
  849. # a1 - number
  850. fputx : (a0, a1) { fputn(a0, a1, 16); }
  851. ## Put decimal number in stdout
  852. # a0 - number
  853. putd : (a0) { fputn(stdout, a0, 10); }
  854. ## Put hexadecimal number in stdout
  855. # a0 - number
  856. putx : (a0) { fputn(stdout, a0, 16); }
  857. ## Put decimal number in stderr
  858. # a0 - number
  859. eputd : (a0) { fputn(stderr, a0, 10); }
  860. ## Put hexadecimal number in stderr
  861. # a0 - number
  862. eputx : (a0) { fputn(stderr, a0, 16); }
  863. ## Assertion
  864. # a0 - condition
  865. # a1 - error string
  866. assert : (a0, a1) {
  867. # If condition success, returning
  868. if (a0) { return; }
  869. # Printing assertion error
  870. eputs("ASSERT: ");
  871. # If error string is null, printing default message. If not, print a1
  872. if (a1) { eputs(a1); } else { eputs("assertion failed"); }
  873. eputc('\n');
  874. # Exitting
  875. exit(1);
  876. }
  877. ### Lexer
  878. ## Character types
  879. CHAR_NULL => 0; # \0 */
  880. CHAR_INVALID => 1; # invalid characters */
  881. CHAR_SPACES => 2; # [\t\r ]
  882. CHAR_NEWLINE => 3; # \n
  883. CHAR_ZERO => 4; # 0
  884. CHAR_OCTAL => 5; # [1-7]
  885. CHAR_DECIMAL => 6; # [89]
  886. CHAR_HEX => 7; # [A-Fa-f] \ a
  887. CHAR_ALPHA => 8; # [G-Zg-z_] \ x
  888. CHAR_A => 9; # a
  889. CHAR_X => 10; # x
  890. CHAR_SQUOTE => 11; # \'
  891. CHAR_DQUOTE => 12; # \"
  892. CHAR_BACKSLASH => 13; # \\
  893. CHAR_SYMBOL => 14; # other characters
  894. ## Lexer parts
  895. LEX_ICHAN => 0;
  896. LEX_FILE => 1;
  897. LEX_LINE => 2;
  898. LEX_COLUMN => 3;
  899. LEX_HOLD => 4;
  900. LEX_TID => 5;
  901. LEX_TVALUE => 6;
  902. LEX_TTEXT => 7;
  903. ## Token location parts
  904. LOC_FILE => 0;
  905. LOC_LINE => 1;
  906. LOC_COLUMN => 2;
  907. ### Token IDs
  908. TOKEN_END => 256;
  909. TOKEN_INTEGER => 257;
  910. TOKEN_IDENTIFIER => 258;
  911. TOKEN_STRING => 259;
  912. TOKEN_ARGUMENT => 260;
  913. TOKEN_VARIABLE => 261;
  914. ## Keywords
  915. # Conditionals
  916. TOKEN_IF => 270;
  917. TOKEN_ELSE => 271;
  918. # Loops
  919. TOKEN_BREAK => 272;
  920. TOKEN_CONTINUE => 273;
  921. TOKEN_DO => 274;
  922. TOKEN_FOR => 275;
  923. TOKEN_WHILE => 276;
  924. # Functions
  925. TOKEN_ALLOCATE => 277;
  926. TOKEN_ASM => 278;
  927. TOKEN_RETURN => 279;
  928. TOKEN_SYSCALL => 280;
  929. # Labels
  930. TOKEN_GLOBAL => 281;
  931. TOKEN_GOTO => 282;
  932. TOKEN_LABEL => 283;
  933. # Byte operations
  934. TOKEN_READCHAR => 284;
  935. TOKEN_WRITECHAR => 285;
  936. # Array types
  937. TOKEN_TYPE_CHAR => 286;
  938. TOKEN_TYPE_INT => 287;
  939. # File
  940. TOKEN_INCLUDE => 288;
  941. TOKEN_INCLUDE_ONCE => 289;
  942. ## Symbols
  943. # Equality
  944. TOKEN_EQ => 300;
  945. TOKEN_NE => 301;
  946. # Compare
  947. TOKEN_LE => 302;
  948. TOKEN_GE => 303;
  949. # Arrow
  950. TOKEN_ARROW => 304;
  951. # Shift
  952. TOKEN_SHL => 305;
  953. TOKEN_SHR => 306;
  954. # Logic
  955. TOKEN_LAND => 307;
  956. TOKEN_LOR => 308;
  957. # Assignment
  958. TOKEN_AADD => 309;
  959. TOKEN_ASUB => 310;
  960. TOKEN_AMUL => 311;
  961. TOKEN_ADIV => 312;
  962. TOKEN_AMOD => 313;
  963. TOKEN_AAND => 314;
  964. TOKEN_AXOR => 315;
  965. TOKEN_AOR => 316;
  966. TOKEN_ASHL => 317;
  967. TOKEN_ASHR => 318;
  968. # Increment/decrement
  969. TOKEN_INC => 319;
  970. TOKEN_DEC => 320;
  971. # Ellipsis
  972. TOKEN_ELLIPSIS => 321;
  973. ## Char table
  974. char_table : [
  975. # X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 Xa Xb Xc Xd Xe Xf
  976. 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 2, 1, 1,
  977. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  978. 2, 14, 12, 14, 14, 14, 14, 11, 14, 14, 14, 14, 14, 14, 14, 14,
  979. 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 14, 14, 14, 14, 14, 14,
  980. 14, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  981. 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 14, 13, 14, 14, 7,
  982. 14, 9, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  983. 8, 8, 8, 8, 8, 8, 8, 8, 10, 8, 8, 14, 14, 14, 14, 1,
  984. # We're not using Extended-ASCII - marking as invalid
  985. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  986. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  987. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  988. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  989. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  990. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  991. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  992. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
  993. ];
  994. ## Char to escape table (only first 128 ASCII chars)
  995. char_to_escape : [
  996. # X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 Xa Xb Xc Xd Xe Xf
  997. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  998. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  999. 0, 0, 34, 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0, 0, 0,
  1000. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63,
  1001. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1002. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 0, 0, 0,
  1003. 0, 7, 8, 0, 0, 27, 12, 0, 0, 0, 0, 0, 0, 0, 10, 0,
  1004. 0, 0, 13, 0, 9, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0
  1005. ];
  1006. ## Reserved keywords
  1007. reserved_keywords : [
  1008. "if", "else", # Conditionals
  1009. "break", "continue", "do", "for", "while", # Loops
  1010. "allocate", "asm", "return", "syscall", # Functions
  1011. "global", "goto", "label", # Labels
  1012. "readchar", "writechar", # Byte operations
  1013. "char", "int", # Array types
  1014. "include", "include_once", # File operations
  1015. NULL
  1016. ];
  1017. ## Lexer triggers
  1018. lx00n:[lx01,le00,lx00,lx00,lx02,lx06,lx06,lx07,lx07,lx09,lx09,lx11,lx15,le00,lx18];
  1019. lx02n:[lx99,le00,lx99,lx99,le01,lx03,le02,le03,le00,le03,lx04,le00,le00,le00,lx99];
  1020. lx03n:[lx99,le00,lx99,lx99,lx03,lx03,le02,le03,le00,le03,le00,le00,le00,le00,lx99];
  1021. lx04n:[le04,le00,le04,le04,lx05,lx05,lx05,lx05,le00,lx05,le00,le00,le00,le00,le00];
  1022. lx05n:[lx99,le00,lx99,lx99,lx05,lx05,lx05,lx05,le00,lx05,le00,le00,le00,le00,lx99];
  1023. lx06n:[lx99,le00,lx99,lx99,lx06,lx06,lx06,le05,le00,le05,le00,le00,le00,le00,lx99];
  1024. lx07n:[lx08,le00,lx08,lx08,lx07,lx07,lx07,lx07,lx07,lx07,lx07,le00,le00,le00,lx08];
  1025. lx09n:[lx08,le00,lx08,lx08,lx10,lx10,lx10,lx07,lx07,lx07,lx07,le00,le00,le00,lx08];
  1026. lx10n:[lx99,le00,lx99,lx99,lx10,lx10,lx10,lx07,lx07,lx07,lx07,le00,le00,le00,lx99];
  1027. lx11n:[le06,le00,lx12,le06,lx12,lx12,lx12,lx12,lx12,lx12,lx12,lx12,lx12,lx13,lx12];
  1028. lx15n:[le07,le00,lx15,le07,lx15,lx15,lx15,lx15,lx15,lx15,lx15,lx15,lx17,lx16,lx15];
  1029. lx16n:[le07,le00,lx15,le07,lx15,lx15,lx15,lx15,lx15,lx15,lx15,lx15,lx17,lx16,lx15];
  1030. ## Create new lexer
  1031. # a0 - input channel
  1032. # a1 - filename
  1033. # @return lexer
  1034. lexer : (a0, a1) {
  1035. # x0 - token buffer
  1036. # x1 - lexer
  1037. allocate(2);
  1038. # Creating empty token buffer
  1039. x0 = char_vector(0, 1);
  1040. # Pushing '\0' to token buffer
  1041. char_vector_push(x0, '\0');
  1042. # Allocating lexer
  1043. x1 = tuple8(a0, a1, 1, 1, FALSE, 0, 0, x0);
  1044. return(x1);
  1045. }
  1046. ## Create new lexer using file name
  1047. # a0 - filename
  1048. # @return lexer
  1049. lexer_file : (a0) { return(lexer(input_chan_file(a0), a0)); }
  1050. ## Destroy lexer
  1051. # a0 - lexer
  1052. lexer_destroy : (a0) {
  1053. # Destroying token char vector
  1054. vector_destroy(a0[LEX_TTEXT]);
  1055. # Destroying input channel
  1056. input_chan_close(a0[LEX_ICHAN]);
  1057. # Freeing filename string
  1058. free(a0[LEX_FILE]);
  1059. # Destroying lexer
  1060. free(a0);
  1061. }
  1062. ## Look ahead for next character
  1063. # a0 - lexer
  1064. # @return char type
  1065. lexer_look : (a0) {
  1066. # x0 - char
  1067. allocate(1);
  1068. # Reading char
  1069. x0 = fnextc(a0[LEX_ICHAN], 0);
  1070. # If got EOF, returning CHAR_NULL
  1071. if (x0 == EOF) { return(CHAR_NULL); }
  1072. # Returning char type
  1073. return(char_table[x0]);
  1074. }
  1075. ## Consume char to token buffer
  1076. # a0 - lexer
  1077. # @return char
  1078. lexer_consume : (a0) {
  1079. # x0 - char
  1080. allocate(1);
  1081. # Reading char
  1082. x0 = fgetc(a0[LEX_ICHAN]);
  1083. # If newline, moving to new line
  1084. if (x0 == '\n') {
  1085. a0[LEX_LINE] = a0[LEX_LINE] + 1;
  1086. a0[LEX_COLUMN] = 0;
  1087. }
  1088. # Incrementing column
  1089. a0[LEX_COLUMN] = a0[LEX_COLUMN] + 1;
  1090. # Replacing last char (NULL) in buffer
  1091. char_vector_set(a0[LEX_TTEXT], lexer_token_length(a0) - 1, x0);
  1092. # Pushing NULL to the end
  1093. char_vector_push(a0[LEX_TTEXT], '\0');
  1094. # Returning char
  1095. return(x0);
  1096. }
  1097. ## Set token ID
  1098. # a0 - lexer
  1099. # a1 - token ID
  1100. lexer_token_set : (a0, a1) { a0[LEX_TID] = a1; }
  1101. ## Get token ID
  1102. # a0 - lexer
  1103. # @return token ID
  1104. lexer_token_get : (a0) { return(a0[LEX_TID]); }
  1105. ## Get token text
  1106. # a0 - lexer
  1107. # @return token text char vector
  1108. lexer_token_text : (a0) { return(vector_buffer(a0[LEX_TTEXT])); }
  1109. ## Get token text length
  1110. # a0 - lexer
  1111. # @return token text length
  1112. lexer_token_length : (a0) { return(vector_size(a0[LEX_TTEXT])); }
  1113. ## Get token value
  1114. # a0 - lexer
  1115. # @return token value
  1116. lexer_token_value : (a0) { return(a0[LEX_TVALUE]); }
  1117. ## Get token location
  1118. # a0 - lexer
  1119. # @return token location
  1120. lexer_token_location : (a0) {
  1121. return(tuple3(a0[LEX_FILE], a0[LEX_LINE], a0[LEX_COLUMN]));
  1122. }
  1123. ## Hold token
  1124. # a0 - lexer
  1125. lexer_hold : (a0) {
  1126. assert(a0[LEX_HOLD] == FALSE, "tried to hold two or more tokens");
  1127. a0[LEX_HOLD] = TRUE;
  1128. }
  1129. ## Reset token data
  1130. # a0 - lexer
  1131. lexer_reset_token : (a0) {
  1132. a0[LEX_TID] = 0; # Resetting token id
  1133. a0[LEX_TVALUE] = 0; # Resetting token value
  1134. # Resetting token text buffer
  1135. char_vector_resize(a0[LEX_TTEXT], 0);
  1136. char_vector_push(a0[LEX_TTEXT], '\0');
  1137. }
  1138. ## Print token location to output channel
  1139. # a0 - token location
  1140. # a1 - output channel
  1141. fputloc : (a0, a1) {
  1142. fputs(a1, a0[LOC_FILE]);
  1143. fputc(a1, ':');
  1144. fputn(a1, a0[LOC_LINE], 10);
  1145. fputc(a1, ':');
  1146. fputn(a1, a0[LOC_COLUMN], 10);
  1147. }
  1148. ## Print lexer error
  1149. # a0 - lexer
  1150. # a1 - error message
  1151. lexer_error : (a0, a1) {
  1152. # Printing location
  1153. fputloc(lexer_token_location(a0), stderr);
  1154. eputs(": ");
  1155. # Printing message
  1156. eputs(a1);
  1157. # Flushing
  1158. eputc('\n');
  1159. # Exitting
  1160. exit(1);
  1161. }
  1162. ## Fetch token
  1163. # a0 - lexer
  1164. # @return token type
  1165. lexer_lex : (a0) {
  1166. # x0 - current char
  1167. # x1, x2 - used variables by triggers
  1168. allocate(3);
  1169. # Checking if we are holding token
  1170. if (a0[LEX_HOLD]) {
  1171. a0[LEX_HOLD] = FALSE;
  1172. return(a0[LEX_TID]);
  1173. }
  1174. label lx00;
  1175. ## Entry point of lexer
  1176. # If token is space/tab/newline, skip it
  1177. if (lexer_look(a0) == CHAR_SPACES || lexer_look(a0) == CHAR_NEWLINE) {
  1178. # Consuming char
  1179. lexer_consume(a0);
  1180. # Retrying
  1181. goto &lx00;
  1182. }
  1183. # Resetting token buffer
  1184. lexer_reset_token(a0);
  1185. # Moving to next trigger
  1186. goto lx00n[lexer_look(a0)];
  1187. label lx01;
  1188. ## We got NULL char
  1189. return(TOKEN_END);
  1190. label lx02;
  1191. ## We got zero. It means we have either zero number, octal (e.g. 0664) or
  1192. ## hex number (e.g. 0xffff)
  1193. # Consuming zero char
  1194. lexer_consume(a0);
  1195. # Setting integer token ID
  1196. lexer_token_set(a0, TOKEN_INTEGER);
  1197. # Moving to next trigger
  1198. goto lx02n[lexer_look(a0)];
  1199. label lx03;
  1200. ## We have to parse octal number
  1201. # Fetching digit
  1202. x0 = lexer_consume(a0);
  1203. # Adding digit to number
  1204. a0[LEX_TVALUE] = a0[LEX_TVALUE] * 8 + (x0 - '0');
  1205. # Moving to next trigger
  1206. goto lx03n[lexer_look(a0)];
  1207. label lx04;
  1208. ## Trying to parse hexadecimal number
  1209. # We have 'x' symbol. Consuming it
  1210. lexer_consume(a0);
  1211. # Moving to next trigger
  1212. goto lx04n[lexer_look(a0)];
  1213. label lx05;
  1214. ## Parsing hexadecimal number
  1215. # Fetching digit
  1216. x0 = lexer_consume(a0);
  1217. # Checking if it is lower case hexadecimal digit
  1218. if (x0 >= 'a') {
  1219. x0 = x0 - 'a' + 10;
  1220. } else {
  1221. # Checking if it is higher case hexadecimal digit
  1222. if (x0 >= 'A') {
  1223. x0 = x0 - 'A' + 10;
  1224. } else {
  1225. # It is decimal digit
  1226. x0 = x0 - '0';
  1227. }
  1228. }
  1229. # Adding digit to number
  1230. a0[LEX_TVALUE] = a0[LEX_TVALUE] * 16 + x0;
  1231. # Moving to next trigger
  1232. goto lx05n[lexer_look(a0)];
  1233. label lx06;
  1234. ## We have to parse decimal number
  1235. # We have to set integer token ID
  1236. lexer_token_set(a0, TOKEN_INTEGER);
  1237. # Fetching digit
  1238. x0 = lexer_consume(a0);
  1239. # Adding digit to number
  1240. a0[LEX_TVALUE] = a0[LEX_TVALUE] * 10 + (x0 - '0');
  1241. # Moving to next trigger
  1242. goto lx06n[lexer_look(a0)];
  1243. label lx07;
  1244. ## We have to parse identifier
  1245. # We have to set identifier token ID
  1246. lexer_token_set(a0, TOKEN_IDENTIFIER);
  1247. # Adding char to token text
  1248. lexer_consume(a0);
  1249. # Moving to next trigger
  1250. goto lx07n[lexer_look(a0)];
  1251. label lx08;
  1252. ## Checking if identifier is keyword
  1253. # Resetting reserved keyword index
  1254. x1 = 0;
  1255. # Iterating through all reserved keywords
  1256. while (reserved_keywords[x1] != NULL) {
  1257. # If strings are equal
  1258. if (strcmp(reserved_keywords[x1], lexer_token_text(a0)) == NULL) {
  1259. # Setting keyword token ID
  1260. lexer_token_set(a0, TOKEN_IF + x1);
  1261. goto &lx99;
  1262. }
  1263. # Checking next keyword
  1264. x1 = x1 + 1;
  1265. }
  1266. # We are here because identifier is not a keyword. Exitting
  1267. goto &lx99;
  1268. label lx09;
  1269. ## We have to parse argument or variable (or identifier)
  1270. # Fetching char
  1271. x0 = lexer_consume(a0);
  1272. # Checking if we have argument or variable
  1273. x1 = TOKEN_VARIABLE;
  1274. if (x0 == 'a') { x1 = TOKEN_ARGUMENT; }
  1275. # Set token type
  1276. lexer_token_set(a0, x1);
  1277. # Moving to next trigger
  1278. goto lx09n[lexer_look(a0)];
  1279. label lx10;
  1280. ## Parsing number of argument/variable
  1281. # Fetching char
  1282. x0 = lexer_consume(a0);
  1283. # Adding digit to value
  1284. a0[LEX_TVALUE] = a0[LEX_TVALUE] * 10 + (x0 - '0');
  1285. # Moving to next trigger
  1286. goto lx10n[lexer_look(a0)];
  1287. label lx11;
  1288. ## Parsing single quote (character literal)
  1289. # Setting integer token ID
  1290. lexer_token_set(a0, TOKEN_INTEGER);
  1291. # Consuming char
  1292. lexer_consume(a0);
  1293. # Moving to next trigger
  1294. goto lx11n[lexer_look(a0)];
  1295. label lx12;
  1296. ## Parsing single quote 'simple' content (without backslash)
  1297. # Fetching char
  1298. x0 = lexer_consume(a0);
  1299. # Setting value
  1300. a0[LEX_TVALUE] = x0;
  1301. goto &lx14;
  1302. label lx13;
  1303. ## Parsing single quote content with backslash
  1304. # Consuming backslash
  1305. lexer_consume(a0);
  1306. # Fetching char
  1307. x0 = lexer_consume(a0);
  1308. # Setting escape value
  1309. a0[LEX_TVALUE] = char_to_escape[x0];
  1310. label lx14;
  1311. # Checking if quotes are closed
  1312. if (lexer_look(a0) != CHAR_SQUOTE) { goto &le06; }
  1313. # Consuming char
  1314. lexer_consume(a0);
  1315. goto &lx99;
  1316. label lx15;
  1317. ## Parsing double quote (string literal)
  1318. # Consuming char
  1319. lexer_consume(a0);
  1320. # Moving to next trigger
  1321. goto lx15n[lexer_look(a0)];
  1322. label lx16;
  1323. ## Parsing double quote content with backslash
  1324. # Consuming backslash
  1325. lexer_consume(a0);
  1326. # Consuming next character
  1327. lexer_consume(a0);
  1328. # Moving to next trigger
  1329. goto lx16n[lexer_look(a0)];
  1330. label lx17;
  1331. ## Ending string
  1332. # Consuming char
  1333. lexer_consume(a0);
  1334. # Setting string token ID
  1335. lexer_token_set(a0, TOKEN_STRING);
  1336. # Ending
  1337. goto &lx99;
  1338. label lx18;
  1339. ## Parsing symbol
  1340. # Fetching char
  1341. x0 = lexer_consume(a0);
  1342. # Setting token ID to char value
  1343. lexer_token_set(a0, x0);
  1344. # Fetching next chars
  1345. x1 = fnextc(a0[LEX_ICHAN], 0);
  1346. x2 = fnextc(a0[LEX_ICHAN], 1);
  1347. # Checking for double-char or triple-char symbols
  1348. if (x0 == '=') {
  1349. if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_EQ); }
  1350. else if (x1 == '>') { lexer_consume(a0); lexer_token_set(a0, TOKEN_ARROW); }
  1351. } else if (x0 == '!') {
  1352. if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_NE); }
  1353. } else if (x0 == '+') {
  1354. if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_AADD); }
  1355. else if (x1 == '+') { lexer_consume(a0); lexer_token_set(a0, TOKEN_INC); }
  1356. } else if (x0 == '-') {
  1357. if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_ASUB); }
  1358. else if (x1 == '-') { lexer_consume(a0); lexer_token_set(a0, TOKEN_DEC); }
  1359. } else if (x0 == '*') {
  1360. if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_AMUL); }
  1361. } else if (x0 == '/') {
  1362. if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_ADIV); }
  1363. } else if (x0 == '%') {
  1364. if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_AMOD); }
  1365. } else if (x0 == '<') {
  1366. if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_LE); }
  1367. else if (x1 == '<') {
  1368. if (x2 == '=') {
  1369. lexer_consume(a0); lexer_consume(a0); lexer_token_set(a0, TOKEN_ASHL);
  1370. } else {
  1371. lexer_consume(a0); lexer_token_set(a0, TOKEN_SHL);
  1372. }
  1373. }
  1374. } else if (x0 == '>') {
  1375. if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_GE); }
  1376. else if (x1 == '>') {
  1377. if (x2 == '=') {
  1378. lexer_consume(a0); lexer_consume(a0); lexer_token_set(a0, TOKEN_ASHR);
  1379. } else {
  1380. lexer_consume(a0); lexer_token_set(a0, TOKEN_SHR);
  1381. }
  1382. }
  1383. } else if (x0 == '^') {
  1384. if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_AXOR); }
  1385. } else if (x0 == '&') {
  1386. if (x1 == '&') { lexer_consume(a0); lexer_token_set(a0, TOKEN_LAND); }
  1387. else if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_AAND); }
  1388. } else if (x0 == '|') {
  1389. if (x1 == '|') { lexer_consume(a0); lexer_token_set(a0, TOKEN_LOR); }
  1390. else if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_AOR); }
  1391. } else if (x0 == '#') {
  1392. # Checking for comment
  1393. ## Skipping line
  1394. while (x0 != '\n' && x0 != '\0') { x0 = fgetc(a0[LEX_ICHAN]); }
  1395. # Resetting line and column
  1396. a0[LEX_LINE] = a0[LEX_LINE] + 1;
  1397. a0[LEX_COLUMN] = 1;
  1398. # Starting lexing from the start
  1399. goto &lx00;
  1400. } else if (x0 == '.') {
  1401. if (x1 == '.') {
  1402. if (x2 == '.') { lexer_consume(a0); lexer_consume(a0); lexer_token_set(a0, TOKEN_ELLIPSIS); }
  1403. }
  1404. }
  1405. label lx99;
  1406. ## Finishing lexing. Returning current token ID
  1407. return(a0[LEX_TID]);
  1408. label le00;
  1409. lexer_error(a0, "Invalid character");
  1410. label le01;
  1411. lexer_error(a0, "Unexpected second 0 char");
  1412. label le02;
  1413. lexer_error(a0, "Unexpected decimal digit (expected octal)");
  1414. label le03;
  1415. lexer_error(a0, "Unexpected hexadecimal digit (expected octal)");
  1416. label le04;
  1417. lexer_error(a0, "Unfinished hexadecimal number");
  1418. label le05;
  1419. lexer_error(a0, "Unexpected hexadecimal digit (expected decimal)");
  1420. label le06;
  1421. lexer_error(a0, "Unterminated character literal");
  1422. label le07;
  1423. lexer_error(a0, "Unterminated string literal");
  1424. }
  1425. ## Fetch assembly line
  1426. # a0 - lexer
  1427. # @return is this assembly line last
  1428. lexer_lex_asm : (a0) {
  1429. # x0 - char
  1430. # x1 - char type
  1431. # x2 - i
  1432. allocate(3);
  1433. label lax00;
  1434. ## Entry point of assembly line lexer
  1435. # If token is space/tab/newline, skip it
  1436. if (lexer_look(a0) == CHAR_SPACES || lexer_look(a0) == CHAR_NEWLINE) {
  1437. # Consuming char
  1438. lexer_consume(a0);
  1439. # Retrying
  1440. goto &lax00;
  1441. }
  1442. # Resetting token buffer
  1443. lexer_reset_token(a0);
  1444. # Fetching char
  1445. x0 = lexer_consume(a0);
  1446. label lax01;
  1447. ## Checking for block not being closed
  1448. if (x0 == '}') { goto &lae00; }
  1449. label lax02;
  1450. ## Reading chars until semicolon
  1451. x0 = fnextc(a0[LEX_ICHAN], 0);
  1452. # Checking for block not being closed
  1453. if (x0 == '}') { goto &lae00; }
  1454. # Exitting loop, if next char is semicolon
  1455. if (x0 == ';') { goto &lax03; }
  1456. # Consuming char
  1457. lexer_consume(a0);
  1458. # Making next iteration
  1459. goto &lax02;
  1460. label lax03;
  1461. ## Looking for next significant char to check if this is last line
  1462. x2 = 1;
  1463. do {
  1464. # Fetching char
  1465. ## FIXME: I guess this can get EOF when lexing at the end of buffer
  1466. x0 = fnextc(a0[LEX_ICHAN], x2++);
  1467. # Erroring, if we got EOF
  1468. if (x0 == EOF) { goto &lae01; }
  1469. # Fetching char type
  1470. x1 = char_table[x0];
  1471. } while (x1 == CHAR_SPACES || x1 == CHAR_NEWLINE);
  1472. # If we have } char, we return TRUE, otherwise FALSE
  1473. return(x0 == '}');
  1474. label lae00;
  1475. lexer_error(a0, "Unexpected assembly block close");
  1476. label lae01;
  1477. lexer_error(a0, "Unexpected end of file");
  1478. }
  1479. ### Parser
  1480. ## Parser parts
  1481. PARSER_LEX => 0;
  1482. PARSER_BUFFER => 1;
  1483. PARSER_SWITCH => 2;
  1484. PARSER_INCLIST => 3;
  1485. ## Node parts
  1486. NODE_TYPE => 0;
  1487. NODE_LOC => 1;
  1488. # 2, 3, ... are used as operands
  1489. ## Statement IDs
  1490. # Constant
  1491. STMT_EXPR_INTEGER => 0;
  1492. STMT_EXPR_STRING => 1;
  1493. # Simple
  1494. STMT_EXPR_IDENTIFIER => 2;
  1495. STMT_EXPR_ARGUMENT => 3;
  1496. STMT_EXPR_VARIABLE => 4;
  1497. STMT_EXPR_SYSCALL => 5;
  1498. STMT_EXPR_READCHAR => 6;
  1499. # Postfix
  1500. STMT_EXPR_CALL => 7;
  1501. STMT_EXPR_INDEX => 8;
  1502. # Prefix
  1503. STMT_EXPR_PLUS => 9;
  1504. STMT_EXPR_MINUS => 10;
  1505. STMT_EXPR_LOGICAL_NOT => 11;
  1506. STMT_EXPR_NOT => 12;
  1507. STMT_EXPR_DEREF => 13;
  1508. STMT_EXPR_ADDROF => 14;
  1509. # Multiplicative
  1510. STMT_EXPR_MUL => 15;
  1511. STMT_EXPR_DIV => 16;
  1512. STMT_EXPR_MOD => 17;
  1513. # Additive
  1514. STMT_EXPR_ADD => 18;
  1515. STMT_EXPR_SUB => 19;
  1516. # Bitwise shift
  1517. STMT_EXPR_SHL => 20;
  1518. STMT_EXPR_SHR => 21;
  1519. # Bitwise and
  1520. STMT_EXPR_AND => 22;
  1521. # Bitwise xor
  1522. STMT_EXPR_XOR => 23;
  1523. # Bitwise or
  1524. STMT_EXPR_OR => 24;
  1525. # Relational
  1526. STMT_EXPR_LESS => 25;
  1527. STMT_EXPR_GREATER => 26;
  1528. STMT_EXPR_LE => 27;
  1529. STMT_EXPR_GE => 28;
  1530. # Equality
  1531. STMT_EXPR_EQ => 29;
  1532. STMT_EXPR_NE => 30;
  1533. # Logical and
  1534. STMT_EXPR_LOGICAL_AND => 31;
  1535. # Logical or
  1536. STMT_EXPR_LOGICAL_OR => 32;
  1537. # Assignment
  1538. STMT_EXPR_ASSIGN => 33;
  1539. STMT_EXPR_MUL_ASSIGN => 34;
  1540. STMT_EXPR_DIV_ASSIGN => 35;
  1541. STMT_EXPR_MOD_ASSIGN => 36;
  1542. STMT_EXPR_ADD_ASSIGN => 37;
  1543. STMT_EXPR_SUB_ASSIGN => 38;
  1544. STMT_EXPR_SHL_ASSIGN => 39;
  1545. STMT_EXPR_SHR_ASSIGN => 40;
  1546. STMT_EXPR_AND_ASSIGN => 41;
  1547. STMT_EXPR_XOR_ASSIGN => 42;
  1548. STMT_EXPR_OR_ASSIGN => 43;
  1549. STMT_EXPR_PREDEC => 44;
  1550. STMT_EXPR_PREINC => 45;
  1551. STMT_EXPR_POSTDEC => 46;
  1552. STMT_EXPR_POSTINC => 47;
  1553. # Ternary
  1554. STMT_EXPR_TERNARY => 48;
  1555. # Statements
  1556. STMT_IF => 49;
  1557. STMT_FOR => 50;
  1558. STMT_DO_WHILE => 51;
  1559. STMT_WHILE => 52;
  1560. STMT_ALLOCATE => 53;
  1561. STMT_ASM => 54;
  1562. STMT_RETURN => 55;
  1563. STMT_BREAK => 56;
  1564. STMT_CONTINUE => 57;
  1565. STMT_WRITECHAR => 58;
  1566. STMT_LABEL => 59;
  1567. STMT_GOTO => 60;
  1568. # Block statement
  1569. STMT_BLOCK => 61;
  1570. # Top level statements
  1571. STMT_GLOBAL => 62;
  1572. STMT_MACRO => 63;
  1573. STMT_FUN_DECL => 64;
  1574. STMT_ARRAY_DECL => 65;
  1575. STMT_CHAR_ARRAY_DECL => 66;
  1576. STMT_INT_ARRAY_DECL => 67;
  1577. STMT_VAR_DECL => 68;
  1578. ## Create new parser
  1579. # a0 - lexer of main file
  1580. # a1 - string buffer
  1581. # @return parser
  1582. parser : (a0, a1) { return(tuple4(list(a0), a1, FALSE, vector(0, 1))); }
  1583. ## Get current lexer in parser
  1584. # a0 - parser
  1585. # @return lexer
  1586. parser_lexer : (a0) { return(list_value(a0[PARSER_LEX])); }
  1587. ## Get string buffer in parser
  1588. # a0 - parser
  1589. # @return string buffer
  1590. parser_buffer : (a0) { return(a0[PARSER_BUFFER]); }
  1591. ## Get current token location
  1592. # a0 - parser
  1593. # @return token location
  1594. parser_token_location : (a0) {
  1595. # x0 - token location
  1596. allocate(1);
  1597. # Fetching token location
  1598. x0 = lexer_token_location(parser_lexer(a0));
  1599. # Subtracting token length from column
  1600. x0[LOC_COLUMN] = x0[LOC_COLUMN] - lexer_token_length(parser_lexer(a0)) + 1;
  1601. # Returning location
  1602. return(x0);
  1603. }
  1604. ## Search if file was included before
  1605. # a0 - parser
  1606. # a1 - filename
  1607. parser_include_search : (a0, a1) {
  1608. # x0 - include list
  1609. # x1 - index
  1610. allocate(2);
  1611. # Getting include list
  1612. x0 = a0[PARSER_INCLIST];
  1613. # Iterating through includes
  1614. x1 = 0;
  1615. while (x1 < vector_size(x0)) {
  1616. # Checking if filenames are equal
  1617. if (!strcmp(vector_get(x0, x1), a1)) { return(TRUE); }
  1618. # Moving to next file
  1619. x1 = x1 + 1;
  1620. }
  1621. return(FALSE);
  1622. }
  1623. ## Add file to parser, if it is needed
  1624. # a0 - parser
  1625. # a1 - filename
  1626. # a2 - include once
  1627. parser_include_add : (a0, a1, a2) {
  1628. # Checking, if we need to add this file
  1629. if (!(a2 && parser_include_search(a0, a1))) {
  1630. # Adding lexer to list
  1631. a0[PARSER_LEX] = list_insert(a0[PARSER_LEX], lexer_file(a1));
  1632. # Adding file to vector, if we are including it once
  1633. if (a2) { vector_push(a0[PARSER_INCLIST], a1); }
  1634. }
  1635. }
  1636. ## Hold current lexer in parser
  1637. # a0 - parser
  1638. parser_lexer_hold : (a0) { lexer_hold(parser_lexer(a0)); }
  1639. ## Lex using current lexer in parser
  1640. # a0 - parser
  1641. # @return token ID
  1642. parser_lex : (a0) { return(lexer_lex(parser_lexer(a0))); }
  1643. ## Lex assembly line using current lexer in parser
  1644. # a0 - parser
  1645. # @return is last assembly line
  1646. parser_lex_asm : (a0) { return(lexer_lex_asm(parser_lexer(a0))); }
  1647. ## Print parser error
  1648. # a0 - parser
  1649. # a1 - token location
  1650. # a2 - error message
  1651. parser_error : (a0, a1, a2) {
  1652. # Printing location
  1653. fputloc(a1, stderr);
  1654. eputs(": ");
  1655. # Printing message
  1656. eputs(a2);
  1657. # Flushing
  1658. eputc('\n');
  1659. # Exitting
  1660. exit(1);
  1661. }
  1662. ## Parse file
  1663. # a0 - parser
  1664. # @return AST
  1665. parser_parse : (a0) {
  1666. # x0 - top level statements vector
  1667. # x1 - token
  1668. # x2 - statement
  1669. # x3 - token location
  1670. allocate(4);
  1671. # Allocating statements vector
  1672. x0 = vector(0, 1);
  1673. label parser_parse_continue;
  1674. ## Parsing file
  1675. # Fetching token for tests
  1676. x1 = parser_lex(a0);
  1677. # Testing until got end of file
  1678. while (x1 != TOKEN_END) {
  1679. # Fetching token location
  1680. x3 = parser_token_location(a0);
  1681. # Parsing statement
  1682. x2 = top_level_statement(a0, x1, x3);
  1683. # Adding statement, if needed
  1684. if (x2) { vector_push(x0, x2); }
  1685. # Fetching next token
  1686. x1 = parser_lex(a0);
  1687. }
  1688. # Destroying lexer
  1689. lexer_destroy(parser_lexer(a0));
  1690. # Switching to previous lexer
  1691. a0[PARSER_LEX] = list_next(a0[PARSER_LEX]);
  1692. # If lexer is not NULL, continuing
  1693. if (a0[PARSER_LEX] != NULL) { goto &parser_parse_continue; }
  1694. # Returning statemtents vector
  1695. return(x0);
  1696. }
  1697. ## Parse top level statement
  1698. # a0 - parser
  1699. # a1 - current token
  1700. # a2 - token location
  1701. # @return statement
  1702. top_level_statement : (a0, a1, a2) {
  1703. # x0 - next token
  1704. # x1 - identifier
  1705. allocate(2);
  1706. # Parsing global statement
  1707. if (a1 == TOKEN_GLOBAL) { return(global_statement(a0, a2)); }
  1708. # Parsing include statement
  1709. if (a1 == TOKEN_INCLUDE) { return(include_statement(a0, a2, FALSE)); }
  1710. # Parsing include_once statement
  1711. if (a1 == TOKEN_INCLUDE_ONCE) { return(include_statement(a0, a2, TRUE)); }
  1712. # Parsing statements starting with identifier
  1713. if (a1 == TOKEN_IDENTIFIER) {
  1714. # Duplicating identifier
  1715. x1 = strdup(lexer_token_text(parser_lexer(a0)));
  1716. # Fetching next token
  1717. x0 = parser_lex(a0);
  1718. # Parsing macro definition
  1719. if (x0 == TOKEN_ARROW) { return(macro_declaration(a0, x1, a2)); }
  1720. # Parsing identifier declarations
  1721. if (x0 == ':') {
  1722. # Fetching next token
  1723. x0 = parser_lex(a0);
  1724. # Parsing function declaration
  1725. if (x0 == '(') { return(function_declaration(a0, x1, a2)); }
  1726. # Parsing array declaration
  1727. if (x0 == '[') { return(array_declaration(a0, x1, a2)); }
  1728. # Parsing reserved char array declaration
  1729. if (x0 == TOKEN_TYPE_CHAR) { return(type_array_declaration(a0, x1, STMT_CHAR_ARRAY_DECL, a2)); }
  1730. # Parsing reserved int array declaration
  1731. if (x0 == TOKEN_TYPE_INT) { return(type_array_declaration(a0, x1, STMT_INT_ARRAY_DECL, a2)); }
  1732. # Parsing simple variable declaration
  1733. return(variable_declaration(a0, x1, x0, a2));
  1734. }
  1735. }
  1736. parser_error(a0, a2, "Syntax error - none of available top level statements matched");
  1737. }
  1738. ## Parse global statement
  1739. # a0 - parser
  1740. # a1 - token location
  1741. # @return statement
  1742. global_statement : (a0, a1) {
  1743. # x0 - ops vector
  1744. # x1 - next token
  1745. # x2 - identifier
  1746. allocate(3);
  1747. # Allocating ops vector
  1748. x0 = vector(0, 1);
  1749. # Fetching next token
  1750. x1 = parser_lex(a0);
  1751. # If next token is identifier - globalling only one identifier
  1752. if (x1 == TOKEN_IDENTIFIER) {
  1753. # Making a copy of identifier
  1754. x2 = strdup(lexer_token_text(parser_lexer(a0)));
  1755. # Pushing identifier to op1
  1756. vector_push(x0, x2);
  1757. } else {
  1758. # If not an identifier, check if it is (. If not, errorring.
  1759. if (x1 == '(') {
  1760. label global_statement_next;
  1761. # Fetching next token
  1762. x1 = parser_lex(a0);
  1763. # Checking if it is identifier
  1764. if (x1 != TOKEN_IDENTIFIER) { goto &global_statement_invalid; }
  1765. # Making a copy of identifier
  1766. x2 = strdup(lexer_token_text(parser_lexer(a0)));
  1767. # Pushing identifier to ops
  1768. vector_push(x0, x2);
  1769. ## If next token is , - redoing loop
  1770. # Fetching next token
  1771. x1 = parser_lex(a0);
  1772. if (x1 == ',') { goto &global_statement_next; }
  1773. # Checking for correct syntax
  1774. if (x1 != ')') { goto &global_statement_invalid; }
  1775. } else { goto &global_statement_invalid; }
  1776. }
  1777. # Fetching next token
  1778. x1 = parser_lex(a0);
  1779. # Checking for semicolon
  1780. if (x1 != ';') { goto &global_statement_end_error; }
  1781. # Returning statement
  1782. return(tuple3(STMT_GLOBAL, a1, x0));
  1783. label global_statement_invalid;
  1784. parser_error(a0, a1, "Invalid global statement");
  1785. label global_statement_end_error;
  1786. parser_error(a0, a1, "Global statement is not ended with semicolon");
  1787. }
  1788. ## Parse include statement
  1789. # a0 - parser
  1790. # a1 - token location
  1791. # a2 - include once
  1792. # @return NULL
  1793. include_statement : (a0, a1, a2) {
  1794. # x0 - file vector
  1795. # x1 - next token/i
  1796. # x2 - file
  1797. allocate(3);
  1798. # Allocating files vector
  1799. x0 = vector(0, 1);
  1800. # Fetching next token
  1801. x1 = parser_lex(a0);
  1802. # If next token is string - importing one file
  1803. if (x1 == TOKEN_STRING) {
  1804. # Making a copy of data in string
  1805. x2 = strlitdup(lexer_token_text(parser_lexer(a0)));
  1806. # Pushing file to vector
  1807. vector_push(x0, x2);
  1808. } else {
  1809. # If not an string, check if it is (. If not, erroring.
  1810. if (x1 == '(') {
  1811. label include_statement_next;
  1812. # Fetching next token
  1813. x1 = parser_lex(a0);
  1814. # Checking if it is string
  1815. if (x1 != TOKEN_STRING) { goto &include_statement_invalid; }
  1816. # Making a copy of data in string
  1817. x2 = strlitdup(lexer_token_text(parser_lexer(a0)));
  1818. # Pushing file to vector
  1819. vector_push(x0, x2);
  1820. ## If next token is , - redoing loop
  1821. # Fetching next token
  1822. x1 = parser_lex(a0);
  1823. if (x1 == ',') { goto &include_statement_next; }
  1824. # Checking for correct syntax
  1825. if (x1 != ')') { goto &include_statement_invalid; }
  1826. } else { goto &include_statement_invalid; }
  1827. }
  1828. # Fetching next token
  1829. x1 = parser_lex(a0);
  1830. # Checking for semicolon
  1831. if (x1 != ';') { goto &include_statement_end_error; }
  1832. # Creating lexers for specified files
  1833. x1 = 0;
  1834. while (x1 < vector_size(x0)) {
  1835. # Getting filename
  1836. x2 = vector_get(x0, x1);
  1837. # Adding file to parse
  1838. parser_include_add(a0, x2, a2);
  1839. # Moving to next file
  1840. x1 = x1 + 1;
  1841. }
  1842. # Marking we switched file
  1843. a0[PARSER_SWITCH] = TRUE;
  1844. return(NULL);
  1845. label include_statement_invalid;
  1846. parser_error(a0, a1, "Invalid include statement");
  1847. label include_statement_end_error;
  1848. parser_error(a0, a1, "Include statement is not ended with semicolon");
  1849. }
  1850. ## Parse macro declaration
  1851. # a0 - parser
  1852. # a1 - identifier
  1853. # a2 - token location
  1854. # @return statement
  1855. macro_declaration : (a0, a1, a2) {
  1856. # x0 - token
  1857. # x1 - expression
  1858. allocate(2);
  1859. # Fetching next token
  1860. x0 = parser_lex(a0);
  1861. # Fetching expression
  1862. x1 = expression(a0, x0);
  1863. # Fetching next token
  1864. x0 = parser_lex(a0);
  1865. # Checking for semicolon
  1866. if (x0 != ';') { goto &macro_declaration_end_error; }
  1867. # Returning expression
  1868. return(tuple4(STMT_MACRO, a2, a1, x1));
  1869. label macro_declaration_end_error;
  1870. parser_error(a0, a1, "Macro declaration is not ended with semicolon");
  1871. }
  1872. ## Parse function declaration
  1873. # a0 - parser
  1874. # a1 - identifier
  1875. # a2 - token location
  1876. # @return statement
  1877. function_declaration : (a0, a1, a2) {
  1878. # x0 - arg count
  1879. # x1 - token
  1880. # x2 - block statement
  1881. allocate(3);
  1882. # Resetting arg count
  1883. x0 = 0;
  1884. ## Parsing arguments
  1885. label function_declaration_arg_loop;
  1886. # Fetching next token
  1887. x1 = parser_lex(a0);
  1888. # Checking if arguments ended
  1889. if (x1 == ')') { goto &function_declaration_arg_end; }
  1890. # Checking for argument
  1891. if (x1 == TOKEN_ELLIPSIS) { x1 = parser_lex(a0); goto &function_declaration_arg_end; }
  1892. if (x1 != TOKEN_ARGUMENT) { goto &function_declaration_arg_invalid; }
  1893. if (x0 != lexer_token_value(parser_lexer(a0)))
  1894. { goto &function_declaration_arg_invalid; }
  1895. # Incrementing argument count
  1896. x0 = x0 + 1;
  1897. # Fetching next token
  1898. x1 = parser_lex(a0);
  1899. # Checking for next argument
  1900. if (x1 == ',') { goto &function_declaration_arg_loop; }
  1901. label function_declaration_arg_end;
  1902. # Checking we actually exitted arguments correctly
  1903. if (x1 != ')') { goto &function_declaration_arg_invalid; }
  1904. # Fetching next token
  1905. x1 = parser_lex(a0);
  1906. # Fetching block statement
  1907. x2 = block_statement(a0, x1, TRUE);
  1908. # Returning statement
  1909. return(tuple5(STMT_FUN_DECL, a2, a1, x0, x2));
  1910. label function_declaration_arg_invalid;
  1911. parser_error(a0, a2, "Invalid function arguments declaration syntax");
  1912. }
  1913. ## Parse array declaration
  1914. # a0 - parser
  1915. # a1 - identifier
  1916. # a2 - token location
  1917. # @return statement
  1918. array_declaration : (a0, a1, a2) {
  1919. # x0 - ops vector
  1920. # x1 - next token
  1921. # x2 - expression
  1922. allocate(3);
  1923. # Allocating operand vector
  1924. x0 = vector(0, 1);
  1925. label array_declaration_loop;
  1926. # Fetching next token
  1927. x1 = parser_lex(a0);
  1928. # Fetching next expression
  1929. x2 = expression(a0, x1);
  1930. # Pushing expression to operand vector
  1931. vector_push(x0, x2);
  1932. # Fetching next token
  1933. x1 = parser_lex(a0);
  1934. # Checking if there is another element
  1935. if (x1 == ',') { goto &array_declaration_loop; }
  1936. # Checking for correct syntax
  1937. if (x1 != ']') { goto &array_declaration_invalid; }
  1938. # Fetching next token
  1939. x1 = parser_lex(a0);
  1940. # Checking for semicolon
  1941. if (x1 != ';') { goto &array_declaration_end_error; }
  1942. # Returning statement
  1943. return(tuple4(STMT_ARRAY_DECL, a2, a1, x0));
  1944. label array_declaration_invalid;
  1945. parser_error(a0, a2, "Invalid array declaration syntax");
  1946. label array_declaration_end_error;
  1947. parser_error(a0, a1, "Array statement is not ended with semicolon");
  1948. }
  1949. ## Parse type array declaration
  1950. # a0 - parser
  1951. # a1 - identifier
  1952. # a2 - type
  1953. # a3 - token location
  1954. # @return statement
  1955. type_array_declaration : (a0, a1, a2, a3) {
  1956. # x0 - next token
  1957. # x1 - expression
  1958. allocate(2);
  1959. # Fetching next token
  1960. x0 = parser_lex(a0);
  1961. # Checking for correct syntax
  1962. if (x0 != '[') { goto &type_array_declaration_invalid; }
  1963. # Fetching next token
  1964. x0 = parser_lex(a0);
  1965. # Fetching next expression
  1966. x1 = expression(a0, x0);
  1967. # Fetching next token
  1968. x0 = parser_lex(a0);
  1969. # Checking for correct syntax
  1970. if (x0 != ']') { goto &type_array_declaration_invalid; }
  1971. # Fetching next token
  1972. x0 = parser_lex(a0);
  1973. # Checking for semicolon
  1974. if (x0 != ';') { goto &type_array_declaration_end_error; }
  1975. # Returning statement
  1976. return(tuple4(a2, a3, a1, x1));
  1977. label type_array_declaration_invalid;
  1978. parser_error(a0, a3, "Invalid type array declaration syntax");
  1979. label type_array_declaration_end_error;
  1980. parser_error(a0, a1, "Type array statement is not ended with semicolon");
  1981. }
  1982. ## Parse variable declaration
  1983. # a0 - parser
  1984. # a1 - identifier
  1985. # a2 - current token
  1986. # a3 - token location
  1987. # @return statement
  1988. variable_declaration : (a0, a1, a2, a3) {
  1989. # x0 - expression
  1990. # x1 - next token
  1991. allocate(2);
  1992. # Fetching next expression
  1993. x0 = expression(a0, a2);
  1994. # Fetching next token
  1995. x1 = parser_lex(a0);
  1996. # Checking for semicolon
  1997. if (x1 != ';') { goto &variable_declaration_end_error; }
  1998. # Returning statement
  1999. return(tuple4(STMT_VAR_DECL, a3, a1, x0));
  2000. label variable_declaration_end_error;
  2001. parser_error(a0, a1, "Variable statement is not ended with semicolon");
  2002. }
  2003. ## Parse block of statements
  2004. # a0 - parser
  2005. # a1 - current token
  2006. # a2 - strict
  2007. # @return statement
  2008. block_statement : (a0, a1, a2) {
  2009. # x0 - ops vector
  2010. # x1 - next token
  2011. # x2 - expression
  2012. # x3 - token location
  2013. # x4 - statement token location
  2014. allocate(5);
  2015. # Allocating operand (statements) vector
  2016. x0 = vector(0, 1);
  2017. # Fetching token location
  2018. x3 = parser_token_location(a0);
  2019. # Checking for correct syntax
  2020. if (a1 != '{') {
  2021. if (!a2) {
  2022. # Fetching statement
  2023. x2 = statement(a0, a1, x3);
  2024. # Checking if expression is ended with semicolon
  2025. if (x2[NODE_TYPE] < STMT_IF) {
  2026. # Fetching next token
  2027. x1 = parser_lex(a0);
  2028. # Checking for semicolon
  2029. if (x1 != ';') { goto &block_statement_content_invalid; }
  2030. }
  2031. # Pushing statement to vector
  2032. vector_push(x0, x2);
  2033. # Ending block statement
  2034. goto &block_statement_end;
  2035. } else { goto &block_statement_invalid; }
  2036. }
  2037. label block_statement_loop;
  2038. # Fetching next token
  2039. x1 = parser_lex(a0);
  2040. # Fetching statement token location
  2041. x4 = parser_token_location(a0);
  2042. # Checking if we need to exit the loop
  2043. if (x1 == '}') { goto &block_statement_end; }
  2044. # Checking if we have next block inside this
  2045. if (x1 == '{') { x2 = block_statement(a0, x1, TRUE); }
  2046. # Else fetching statement
  2047. else { x2 = statement(a0, x1, x4); }
  2048. # Pushing statement to vector
  2049. vector_push(x0, x2);
  2050. # Checking if expression is ended with semicolon
  2051. if (x2[NODE_TYPE] < STMT_IF) {
  2052. # Fetching next token
  2053. x1 = parser_lex(a0);
  2054. # Checking for semicolon
  2055. if (x1 != ';') { goto &block_statement_content_invalid; }
  2056. }
  2057. # So, making next iteration
  2058. goto &block_statement_loop;
  2059. label block_statement_end;
  2060. # Returning block statement
  2061. return(tuple3(STMT_BLOCK, x3, x0));
  2062. label block_statement_invalid;
  2063. parser_error(a0, x3, "Invalid block statement syntax");
  2064. label block_statement_content_invalid;
  2065. parser_error(a0, x4, "Expression inside block is not ended with semicolon");
  2066. }
  2067. ## Parse statement
  2068. # a0 - parser
  2069. # a1 - current token
  2070. # a2 - token location
  2071. # @return statement
  2072. statement : (a0, a1, a2) {
  2073. if (a1 == TOKEN_IF) { return(if_statement(a0, a2)); }
  2074. else if (a1 == TOKEN_FOR) { return(for_statement(a0, a2)); }
  2075. else if (a1 == TOKEN_DO) { return(do_while_statement(a0, a2)); }
  2076. else if (a1 == TOKEN_WHILE) { return(while_statement(a0, a2)); }
  2077. else if (a1 == TOKEN_ALLOCATE) { return(allocate_statement(a0, a2)); }
  2078. else if (a1 == TOKEN_ASM) { return(assembly_statement(a0, a2)); }
  2079. else if (a1 == TOKEN_RETURN) { return(return_statement(a0, a2)); }
  2080. else if (a1 == TOKEN_BREAK || a1 == TOKEN_CONTINUE) { return(loop_jump_statement(a0, a2, a1)); }
  2081. else if (a1 == TOKEN_WRITECHAR) { return(writechar_statement(a0, a2)); }
  2082. else if (a1 == TOKEN_LABEL) { return(label_statement(a0, a2)); }
  2083. else if (a1 == TOKEN_GOTO) { return(goto_statement(a0, a2)); }
  2084. else { return(expression(a0, a1)); }
  2085. }
  2086. ## Parse if statement
  2087. # a0 - parser
  2088. # a1 - token location
  2089. # @return statement
  2090. if_statement : (a0, a1) {
  2091. # x0 - ops vector
  2092. # x1 - next token
  2093. # x2 - expression
  2094. allocate(3);
  2095. # Allocating operand vector
  2096. x0 = vector(0, 3);
  2097. label if_statement_parse;
  2098. # Fetching next token
  2099. x1 = parser_lex(a0);
  2100. # Checking for correct syntax
  2101. if (x1 != '(') { goto &if_statement_invalid; }
  2102. # Fetching next token
  2103. x1 = parser_lex(a0);
  2104. # Fetching condition expression
  2105. x2 = expression(a0, x1);
  2106. # Append condition to vector
  2107. vector_push(x0, x2);
  2108. # Fetching next token
  2109. x1 = parser_lex(a0);
  2110. # Checking for correct syntax
  2111. if (x1 != ')') { goto &if_statement_invalid; }
  2112. # Fetching next token
  2113. x1 = parser_lex(a0);
  2114. # Fetching block statement
  2115. x2 = block_statement(a0, x1, FALSE);
  2116. # Append if block to vector
  2117. vector_push(x0, x2);
  2118. # Fetching next token
  2119. x1 = parser_lex(a0);
  2120. # Checking if we have else statement
  2121. if (x1 != TOKEN_ELSE) { parser_lexer_hold(a0); goto &if_statement_end; }
  2122. # Fetching next token
  2123. x1 = parser_lex(a0);
  2124. # If this else have condition, parsing as if statement
  2125. if (x1 == TOKEN_IF) { goto &if_statement_parse; }
  2126. # Fetching else block statement
  2127. x2 = block_statement(a0, x1, FALSE);
  2128. # Append else block to vector
  2129. vector_push(x0, x2);
  2130. # Fetching next token
  2131. x1 = parser_lex(a0);
  2132. # Checking if we have another else branch
  2133. if (x1 == TOKEN_ELSE) { goto &if_statement_else_invalid; }
  2134. # Holding token
  2135. parser_lexer_hold(a0);
  2136. label if_statement_end;
  2137. # Return statement
  2138. return(tuple3(STMT_IF, a1, x0));
  2139. label if_statement_invalid;
  2140. parser_error(a0, a1, "Invalid if statement syntax");
  2141. label if_statement_else_invalid;
  2142. parser_error(a0, a1, "Put else branches with condition before last else branch.");
  2143. }
  2144. ## Parse for statement
  2145. # a0 - parser
  2146. # a1 - token location
  2147. # @return statement
  2148. for_statement : (a0, a1) {
  2149. # x0 - next token
  2150. # x1 - init expression
  2151. # x2 - cond expression
  2152. # x3 - iteration expressions vector
  2153. # x4 - block statement
  2154. # x5 - expression
  2155. allocate(6);
  2156. # Initializing iteration expressions vector
  2157. x3 = vector(0, 1);
  2158. # Fetching next token
  2159. x0 = parser_lex(a0);
  2160. # Checking for correct syntax
  2161. if (x0 != '(') { goto &for_statement_invalid; }
  2162. # Fetching next token
  2163. x0 = parser_lex(a0);
  2164. # Checking if init expression is empty
  2165. if (x0 == ';') { x1 = NULL; goto &for_statement_skip_init; }
  2166. # Fetching init expression
  2167. x1 = expression(a0, x0);
  2168. # Fetching next token
  2169. x0 = parser_lex(a0);
  2170. # Checking for correct syntax
  2171. if (x0 != ';') { goto &for_statement_invalid; }
  2172. label for_statement_skip_init;
  2173. # Fetching next token
  2174. x0 = parser_lex(a0);
  2175. # Checking if cond expression is empty
  2176. if (x0 == ';') { x2 = NULL; goto &for_statement_skip_cond; }
  2177. # Fetching cond expression
  2178. x2 = expression(a0, x0);
  2179. # Fetching next token
  2180. x0 = parser_lex(a0);
  2181. # Checking for correct syntax
  2182. if (x0 != ';') { goto &for_statement_invalid; }
  2183. label for_statement_skip_cond;
  2184. # Fetching next token
  2185. x0 = parser_lex(a0);
  2186. # Checking for iteration expressions
  2187. if (x0 == ')') { goto &for_statement_iter_end; }
  2188. # Holding token
  2189. parser_lexer_hold(a0);
  2190. label for_statement_iter_loop;
  2191. # Fetching next token
  2192. x0 = parser_lex(a0);
  2193. # Checking for correct syntax
  2194. if (x0 == ')') { goto &for_statement_invalid; }
  2195. # Fetching expression
  2196. x5 = expression(a0, x0);
  2197. # Pushing expression to vector
  2198. vector_push(x3, x5);
  2199. # Fetching next token
  2200. x0 = parser_lex(a0);
  2201. # Checking for another iteration expressions
  2202. if (x0 == ',') { goto &for_statement_iter_loop; }
  2203. label for_statement_iter_end;
  2204. # Checking for correct syntax
  2205. if (x0 != ')') { goto &for_statement_invalid; }
  2206. # Fetching next token
  2207. x0 = parser_lex(a0);
  2208. # Fetching block statement
  2209. x4 = block_statement(a0, x0, FALSE);
  2210. # Returning statement
  2211. return(tuple6(STMT_FOR, a1, x1, x2, x3, x4));
  2212. label for_statement_invalid;
  2213. parser_error(a0, a1, "Invalid for statement syntax");
  2214. }
  2215. ## Parse do-while statement
  2216. # a0 - parser
  2217. # a1 - token location
  2218. # @return statement
  2219. do_while_statement : (a0, a1) {
  2220. # x0 - next token
  2221. # x1 - cond expression
  2222. # x2 - block statement
  2223. allocate(3);
  2224. # Fetching next token
  2225. x0 = parser_lex(a0);
  2226. # Fetching block statement
  2227. x2 = block_statement(a0, x0, FALSE);
  2228. # Fetching next token
  2229. x0 = parser_lex(a0);
  2230. # Checking for correct syntax
  2231. if (x0 != TOKEN_WHILE) { goto &do_while_statement_invalid; }
  2232. # Fetching next token
  2233. x0 = parser_lex(a0);
  2234. # Checking for correct syntax
  2235. if (x0 != '(') { goto &do_while_statement_invalid; }
  2236. # Fetching next token
  2237. x0 = parser_lex(a0);
  2238. # Fetching condition expression
  2239. x1 = expression(a0, x0);
  2240. # Fetching next token
  2241. x0 = parser_lex(a0);
  2242. # Checking for correct syntax
  2243. if (x0 != ')') { goto &do_while_statement_invalid; }
  2244. # Fetching next token
  2245. x0 = parser_lex(a0);
  2246. # Checking for semicolon
  2247. if (x0 != ';') { goto &do_while_statement_end_error; }
  2248. # Return statement
  2249. return(tuple4(STMT_DO_WHILE, a1, x1, x2));
  2250. label do_while_statement_invalid;
  2251. parser_error(a0, a1, "Invalid do-while statement syntax");
  2252. label do_while_statement_end_error;
  2253. parser_error(a0, a1, "Do-while statement is not ended with semicolon");
  2254. }
  2255. ## Parse while statement
  2256. # a0 - parser
  2257. # a1 - token location
  2258. # @return statement
  2259. while_statement : (a0, a1) {
  2260. # x0 - next token
  2261. # x1 - cond expression
  2262. # x2 - block statement
  2263. allocate(3);
  2264. # Fetching next token
  2265. x0 = parser_lex(a0);
  2266. # Checking for correct syntax
  2267. if (x0 != '(') { goto &while_statement_invalid; }
  2268. # Fetching next token
  2269. x0 = parser_lex(a0);
  2270. # Fetching condition expression
  2271. x1 = expression(a0, x0);
  2272. # Fetching next token
  2273. x0 = parser_lex(a0);
  2274. # Checking for correct syntax
  2275. if (x0 != ')') { goto &while_statement_invalid; }
  2276. # Fetching next token
  2277. x0 = parser_lex(a0);
  2278. # Fetching block statement
  2279. x2 = block_statement(a0, x0, FALSE);
  2280. # Return statement
  2281. return(tuple4(STMT_WHILE, a1, x1, x2));
  2282. label while_statement_invalid;
  2283. parser_error(a0, a1, "Invalid while statement syntax");
  2284. }
  2285. ## Parse allocate statement
  2286. # a0 - parser
  2287. # a1 - token location
  2288. # @return statement
  2289. allocate_statement : (a0, a1) {
  2290. # x0 - next token
  2291. # x1 - expression
  2292. allocate(2);
  2293. # Fetching next token
  2294. x0 = parser_lex(a0);
  2295. # Checking for correct syntax
  2296. if (x0 != '(') { goto &allocate_statement_invalid; }
  2297. # Fetching next token
  2298. x0 = parser_lex(a0);
  2299. # Fetching size expression
  2300. x1 = expression(a0, x0);
  2301. # Fetching next token
  2302. x0 = parser_lex(a0);
  2303. # Checking for correct syntax
  2304. if (x0 != ')') { goto &allocate_statement_invalid; }
  2305. # Fetching next token
  2306. x0 = parser_lex(a0);
  2307. # Checking for semicolon
  2308. if (x0 != ';') { goto &loop_jump_statement_end_error; }
  2309. # Return statement
  2310. return(tuple3(STMT_ALLOCATE, a1, x1));
  2311. label allocate_statement_invalid;
  2312. parser_error(a0, a1, "Invalid allocate statement syntax");
  2313. label allocate_statement_end_error;
  2314. parser_error(a0, a1, "Allocate statement is not ended with semicolon");
  2315. }
  2316. ## Parse assembly statement
  2317. # a0 - parser
  2318. # a1 - token location
  2319. # @return statement
  2320. assembly_statement : (a0, a1) {
  2321. # x0 - asm vector
  2322. # x1 - next token/asm line
  2323. # x2 - last line
  2324. allocate(3);
  2325. # Allocating assembly vector
  2326. x0 = vector(0, 1);
  2327. # Fetching next token
  2328. x1 = parser_lex(a0);
  2329. # Checking for correct syntax
  2330. if (x1 != '{') { goto &assembly_statement_invalid; }
  2331. label assembly_statement_loop;
  2332. # Parsing assembly...
  2333. x2 = parser_lex_asm(a0);
  2334. # Duplicating assembly line
  2335. x1 = strdup(lexer_token_text(parser_lexer(a0)));
  2336. # Pushing assembly line to vector
  2337. vector_push(x0, x1);
  2338. # Fetching next token
  2339. x1 = parser_lex(a0);
  2340. # This must be a semicolon
  2341. if (x1 != ';') { goto &assembly_statement_invalid; }
  2342. # If we have another lines, parsing them
  2343. if (!x2) { goto &assembly_statement_loop; }
  2344. # Fetching next token
  2345. x1 = parser_lex(a0);
  2346. # Checking for correct syntax
  2347. if (x1 != '}') { goto &assembly_statement_invalid; }
  2348. # Returning statement
  2349. return(tuple3(STMT_ASM, a1, x0));
  2350. label assembly_statement_invalid;
  2351. parser_error(a0, a1, "Invalid assembly statement syntax");
  2352. }
  2353. ## Parse return statement
  2354. # a0 - parser
  2355. # a1 - token location
  2356. # @return statement
  2357. return_statement : (a0, a1) {
  2358. # x0 - ops vector
  2359. # x1 - next token
  2360. # x2 - expression
  2361. allocate(3);
  2362. # Allocating operand vector
  2363. x0 = vector(0, 1);
  2364. # Fetching next token
  2365. x1 = parser_lex(a0);
  2366. # Checking if we are returning with no result
  2367. if (x1 == ';') { parser_lexer_hold(a0); goto &return_statement_end; }
  2368. # Fetching return expression
  2369. x2 = expression(a0, x1);
  2370. # Pushing expression to operand vector
  2371. vector_push(x0, x2);
  2372. label return_statement_end;
  2373. # Fetching next token
  2374. x1 = parser_lex(a0);
  2375. # Checking for semicolon
  2376. if (x1 != ';') { goto &return_statement_end_error; }
  2377. # Return statement
  2378. return(tuple3(STMT_RETURN, a1, x0));
  2379. label return_statement_end_error;
  2380. parser_error(a0, a1, "Return statement is not ended with semicolon");
  2381. }
  2382. ## Parse break/continue statement
  2383. # a0 - parser
  2384. # a1 - token location
  2385. # a2 - statement token
  2386. # @return statement
  2387. loop_jump_statement : (a0, a1, a2) {
  2388. # x0 - next token
  2389. allocate(1);
  2390. # Fetching next token
  2391. x0 = parser_lex(a0);
  2392. # Checking for semicolon
  2393. if (x0 != ';') { goto &loop_jump_statement_end_error; }
  2394. # Return statement
  2395. return(tuple2(STMT_BREAK + a2 - TOKEN_BREAK, a1));
  2396. label loop_jump_statement_end_error;
  2397. parser_error(a0, a1, "Break/continue statement is not ended with semicolon");
  2398. }
  2399. ## Parse writechar statement
  2400. # a0 - parser
  2401. # a1 - token location
  2402. # @return statement
  2403. writechar_statement : (a0, a1) {
  2404. # x0 - next token
  2405. # x1 - array expression
  2406. # x2 - index expression
  2407. # x3 - value expression
  2408. allocate(4);
  2409. # Fetching next token
  2410. x0 = parser_lex(a0);
  2411. # Checking for correct syntax
  2412. if (x0 != '(') { goto &writechar_statement_invalid; }
  2413. # Fetching next token
  2414. x0 = parser_lex(a0);
  2415. # Fetching array expression
  2416. x1 = expression(a0, x0);
  2417. # Fetching next token
  2418. x0 = parser_lex(a0);
  2419. # Checking for correct syntax
  2420. if (x0 != ',') { goto &writechar_statement_invalid; }
  2421. # Fetching next token
  2422. x0 = parser_lex(a0);
  2423. # Fetching index expression
  2424. x2 = expression(a0, x0);
  2425. # Fetching next token
  2426. x0 = parser_lex(a0);
  2427. # Checking for correct syntax
  2428. if (x0 != ',') { goto &writechar_statement_invalid; }
  2429. # Fetching next token
  2430. x0 = parser_lex(a0);
  2431. # Fetching value expression
  2432. x3 = expression(a0, x0);
  2433. # Fetching next token
  2434. x0 = parser_lex(a0);
  2435. # Checking for correct syntax
  2436. if (x0 != ')') { goto &writechar_statement_invalid; }
  2437. # Fetching next token
  2438. x0 = parser_lex(a0);
  2439. # Checking for semicolon
  2440. if (x0 != ';') { goto &writechar_statement_end_error; }
  2441. # Returning statement
  2442. return(tuple5(STMT_WRITECHAR, a1, x1, x2, x3));
  2443. label writechar_statement_invalid;
  2444. parser_error(a0, a1, "Invalid writechar statement");
  2445. label writechar_statement_end_error;
  2446. parser_error(a0, a1, "Writechar statement is not ended with semicolon");
  2447. }
  2448. ## Parselabel statement
  2449. # a0 - parser
  2450. # a1 - token location
  2451. # @return statement
  2452. label_statement : (a0, a1) {
  2453. # x0 - next token
  2454. # x1 - duplicated identifier
  2455. allocate(2);
  2456. # Fetching next token
  2457. x0 = parser_lex(a0);
  2458. # Checking for correct syntax
  2459. if (x0 != TOKEN_IDENTIFIER) { goto &label_statement_invalid; }
  2460. # Duplicating identifier
  2461. x1 = strdup(lexer_token_text(parser_lexer(a0)));
  2462. # Fetching next token
  2463. x0 = parser_lex(a0);
  2464. # Checking for semicolon
  2465. if (x0 != ';') { goto &label_statement_end_error; }
  2466. # Returning statement
  2467. return(tuple3(STMT_LABEL, a1, x1));
  2468. label label_statement_invalid;
  2469. parser_error(a0, a1, "Invalidlabel statement syntax");
  2470. label label_statement_end_error;
  2471. parser_error(a0, a1, "Label statement is not ended with semicolon");
  2472. }
  2473. ## Parse goto statement
  2474. # a0 - parser
  2475. # a1 - token location
  2476. # @return statement
  2477. goto_statement : (a0, a1) {
  2478. # x0 - next token
  2479. # x1 - expression
  2480. allocate(2);
  2481. # Fetching next token
  2482. x0 = parser_lex(a0);
  2483. # Fetching goto expression
  2484. x1 = expression(a0, x0);
  2485. # Fetching next token
  2486. x0 = parser_lex(a0);
  2487. # Checking for semicolon
  2488. if (x0 != ';') { goto &label_statement_end_error; }
  2489. # Returning statement
  2490. return(tuple3(STMT_GOTO, a1, x1));
  2491. label goto_statement_end_error;
  2492. parser_error(a0, a1, "Goto statement is not ended with semicolon");
  2493. }
  2494. ## Parse expression
  2495. # a0 - parser
  2496. # a1 - current token
  2497. # @return statement
  2498. expression : (a0, a1) {
  2499. # x0 - token location
  2500. allocate(1);
  2501. # Fetching token location
  2502. x0 = parser_token_location(a0);
  2503. return(assignment_expression(a0, a1, x0));
  2504. }
  2505. ## Try to parse assignment expression
  2506. # a0 - parser
  2507. # a1 - current token
  2508. # a2 - token location
  2509. # @return statement
  2510. assignment_expression : (a0, a1, a2) {
  2511. # x0 - op1
  2512. # x1 - next token
  2513. allocate(2);
  2514. # Fetching first operand
  2515. x0 = ternary_expression(a0, a1, a2);
  2516. # Fetching next token
  2517. x1 = parser_lex(a0);
  2518. # Parsing assignment
  2519. if (x1 == '=') { return(assignment_expr(a0, x0, STMT_EXPR_ASSIGN, a2)); }
  2520. if (x1 == TOKEN_AADD) { return(assignment_expr(a0, x0, STMT_EXPR_ADD_ASSIGN, a2)); }
  2521. if (x1 == TOKEN_ASUB) { return(assignment_expr(a0, x0, STMT_EXPR_SUB_ASSIGN, a2)); }
  2522. if (x1 == TOKEN_AMUL) { return(assignment_expr(a0, x0, STMT_EXPR_MUL_ASSIGN, a2)); }
  2523. if (x1 == TOKEN_ADIV) { return(assignment_expr(a0, x0, STMT_EXPR_DIV_ASSIGN, a2)); }
  2524. if (x1 == TOKEN_AMOD) { return(assignment_expr(a0, x0, STMT_EXPR_MOD_ASSIGN, a2)); }
  2525. if (x1 == TOKEN_AAND) { return(assignment_expr(a0, x0, STMT_EXPR_AND_ASSIGN, a2)); }
  2526. if (x1 == TOKEN_AXOR) { return(assignment_expr(a0, x0, STMT_EXPR_XOR_ASSIGN, a2)); }
  2527. if (x1 == TOKEN_AOR) { return(assignment_expr(a0, x0, STMT_EXPR_OR_ASSIGN, a2)); }
  2528. if (x1 == TOKEN_ASHL) { return(assignment_expr(a0, x0, STMT_EXPR_SHL_ASSIGN, a2)); }
  2529. if (x1 == TOKEN_ASHR) { return(assignment_expr(a0, x0, STMT_EXPR_SHR_ASSIGN, a2)); }
  2530. # Holding token
  2531. parser_lexer_hold(a0);
  2532. # Returning operand
  2533. return(x0);
  2534. }
  2535. ## Parse assignment expression
  2536. # a0 - parser
  2537. # a1 - op1
  2538. # a2 - assign type
  2539. # a3 - token location
  2540. # @return statement
  2541. assignment_expr : (a0, a1, a2, a3) {
  2542. # x0 - next token
  2543. # x1 - op2
  2544. # x2 - token location
  2545. allocate(3);
  2546. # Fetching next token
  2547. x0 = parser_lex(a0);
  2548. # Fetching token location
  2549. x2 = parser_token_location(a0);
  2550. # Fetching second operand
  2551. x1 = ternary_expression(a0, x0, x2);
  2552. # Returning expression
  2553. return(tuple4(a2, a3, a1, x1));
  2554. }
  2555. ## Try to parse ternary expression
  2556. # a0 - parser
  2557. # a1 - current token
  2558. # a2 - token location
  2559. # @return statement
  2560. ternary_expression : (a0, a1, a2) {
  2561. # x0 - op1
  2562. # x1 - next token
  2563. allocate(2);
  2564. # Fetching first operand
  2565. x0 = logical_or_expression(a0, a1, NULL, a2);
  2566. # Fetching next token
  2567. x1 = parser_lex(a0);
  2568. # Parsing ternary
  2569. if (x1 == '?') { return(ternary_expr(a0, x0, a2, x2)); }
  2570. # Holding token
  2571. parser_lexer_hold(a0);
  2572. # Returning operand
  2573. return(x0);
  2574. }
  2575. ## Parse ternary expression
  2576. # a0 - parser
  2577. # a1 - op1
  2578. # a2 - token location
  2579. # @return statement
  2580. ternary_expr : (a0, a1, a2) {
  2581. # x0 - next token
  2582. # x1 - op2
  2583. # x2 - op3
  2584. # x3 - token location
  2585. allocate(4);
  2586. # Fetching next token
  2587. x0 = parser_lex(a0);
  2588. # Fetching token location
  2589. x3 = parser_token_location(a0);
  2590. # Fetching second operand
  2591. x1 = ternary_expression(a0, x0, x3);
  2592. # Fetching next token
  2593. x0 = parser_lex(a0);
  2594. # Checking for correct syntax
  2595. if (x0 != ':') { goto &ternary_expr_error; }
  2596. # Fetching next token
  2597. x0 = parser_lex(a0);
  2598. # Fetching token location
  2599. x3 = parser_token_location(a0);
  2600. # Fetching third operand
  2601. x2 = ternary_expression(a0, x0, x3);
  2602. # Returning expression
  2603. return(tuple5(STMT_EXPR_TERNARY, a2, a1, x1, x2));
  2604. label ternary_expr_error;
  2605. parser_error(a0, a2, "Incorrect ternary expression syntax");
  2606. }
  2607. ## Try to parse logical or expression
  2608. # a0 - parser
  2609. # a1 - current token
  2610. # a2 - expression
  2611. # a3 - token location
  2612. # @return statement
  2613. logical_or_expression : (a0, a1, a2, a3) {
  2614. # x0 - next token
  2615. allocate(1);
  2616. # Fetching first operand
  2617. if (a2 == NULL) { a2 = logical_and_expression(a0, a1, NULL, a3); }
  2618. # Fetching next token
  2619. x0 = parser_lex(a0);
  2620. # Parsing logical or
  2621. if (x0 == TOKEN_LOR) { return(logical_or_expr(a0, a2, a3)); }
  2622. # Holding token
  2623. parser_lexer_hold(a0);
  2624. # Returning operand
  2625. return(a2);
  2626. }
  2627. ## Parse logical or expression
  2628. # a0 - parser
  2629. # a1 - op1
  2630. # a2 - token location
  2631. # @return statement
  2632. logical_or_expr : (a0, a1, a2) {
  2633. # x0 - next token
  2634. # x1 - op2
  2635. # x2 - token location
  2636. allocate(3);
  2637. # Fetching next token
  2638. x0 = parser_lex(a0);
  2639. # Fetching token location
  2640. x2 = parser_token_location(a0);
  2641. # Fetching second operand
  2642. x1 = logical_and_expression(a0, x0, NULL, x2);
  2643. # Returning expression
  2644. return(logical_or_expression(a0, NULL, tuple4(STMT_EXPR_LOGICAL_OR, a2, a1, x1), a2));
  2645. }
  2646. ## Try to parse logical and expression
  2647. # a0 - parser
  2648. # a1 - current token
  2649. # a2 - expression
  2650. # a3 - token location
  2651. # @return statement
  2652. logical_and_expression : (a0, a1, a2) {
  2653. # x0 - next token
  2654. allocate(1);
  2655. # Fetching first operand
  2656. if (a2 == NULL) { a2 = equality_expression(a0, a1, NULL, a3); }
  2657. # Fetching next token
  2658. x0 = parser_lex(a0);
  2659. # Parsing logical and
  2660. if (x0 == TOKEN_LAND) { return(logical_and_expr(a0, a2, a3)); }
  2661. # Holding token
  2662. parser_lexer_hold(a0);
  2663. # Returning operand
  2664. return(a2);
  2665. }
  2666. ## Parse logical and expression
  2667. # a0 - parser
  2668. # a1 - op1
  2669. # a2 - token location
  2670. # @return statement
  2671. logical_and_expr : (a0, a1, a2) {
  2672. # x0 - next token
  2673. # x1 - op2
  2674. # x2 - token location
  2675. allocate(3);
  2676. # Fetching next token
  2677. x0 = parser_lex(a0);
  2678. # Fetching token location
  2679. x2 = parser_token_location(a0);
  2680. # Fetching second operand
  2681. x1 = equality_expression(a0, x0, NULL, x2);
  2682. # Returning expression
  2683. return(logical_and_expression(a0, NULL, tuple4(STMT_EXPR_LOGICAL_AND, a2, a1, x1), a2));
  2684. }
  2685. ## Try to parse equality expression
  2686. # a0 - parser
  2687. # a1 - current token
  2688. # a2 - expression
  2689. # a3 - token location
  2690. # @return statement
  2691. equality_expression : (a0, a1, a2, a3) {
  2692. # x0 - next token
  2693. allocate(1);
  2694. # Fetching first operand
  2695. if (a2 == NULL) { a2 = relational_expression(a0, a1, NULL, a3); }
  2696. # Fetching next token
  2697. x0 = parser_lex(a0);
  2698. # Parsing equal
  2699. if (x0 == TOKEN_EQ) { return(equality_expr(a0, a2, STMT_EXPR_EQ, a3)); }
  2700. # Parsing not equal
  2701. if (x0 == TOKEN_NE) { return(equality_expr(a0, a2, STMT_EXPR_NE, a3)); }
  2702. # Holding token
  2703. parser_lexer_hold(a0);
  2704. # Returning operand
  2705. return(a2);
  2706. }
  2707. ## Parse equality expression
  2708. # a0 - parser
  2709. # a1 - op1
  2710. # a2 - operation
  2711. # a3 - token location
  2712. # @return statement
  2713. equality_expr : (a0, a1, a2, a3) {
  2714. # x0 - next token
  2715. # x1 - op2
  2716. # x2 - token location
  2717. allocate(3);
  2718. # Fetching next token
  2719. x0 = parser_lex(a0);
  2720. # Fetching token location
  2721. x2 = parser_token_location(a0);
  2722. # Fetching second operand
  2723. x1 = relational_expression(a0, x0, NULL, x2);
  2724. # Returning expression
  2725. return(equality_expression(a0, NULL, tuple4(a2, a3, a1, x1), a3));
  2726. }
  2727. ## Try to parse relational expression
  2728. # a0 - parser
  2729. # a1 - current token
  2730. # a2 - expression
  2731. # a3 - token location
  2732. # @return statement
  2733. relational_expression : (a0, a1, a2, a3) {
  2734. # x0 - next token
  2735. allocate(1);
  2736. # Fetching first operand
  2737. if (a2 == NULL) { a2 = bitwise_or_expression(a0, a1, NULL, a3); }
  2738. # Fetching next token
  2739. x0 = parser_lex(a0);
  2740. # Parsing less
  2741. if (x0 == '<') { return(relational_expr(a0, a2, STMT_EXPR_LESS, a3)); }
  2742. # Parsing greater
  2743. if (x0 == '>') { return(relational_expr(a0, a2, STMT_EXPR_GREATER, a3)); }
  2744. # Parsing less or equal
  2745. if (x0 == TOKEN_LE) { return(relational_expr(a0, a2, STMT_EXPR_LE, a3)); }
  2746. # Parsing greater or equal
  2747. if (x0 == TOKEN_GE) { return(relational_expr(a0, a2, STMT_EXPR_GE, a3)); }
  2748. # Holding token
  2749. parser_lexer_hold(a0);
  2750. # Returning operand
  2751. return(a2);
  2752. }
  2753. ## Parse relational expression
  2754. # a0 - parser
  2755. # a1 - op1
  2756. # a2 - operation
  2757. # a3 - token location
  2758. # @return statement
  2759. relational_expr : (a0, a1, a2, a3) {
  2760. # x0 - next token
  2761. # x1 - op2
  2762. # x2 - token location
  2763. allocate(3);
  2764. # Fetching next token
  2765. x0 = parser_lex(a0);
  2766. # Fetching token location
  2767. x2 = parser_token_location(a0);
  2768. # Fetching second operand
  2769. x1 = bitwise_or_expression(a0, x0, NULL, x2);
  2770. # Returning expression
  2771. return(relational_expression(a0, NULL, tuple4(a2, a3, a1, x1), a3));
  2772. }
  2773. ## Try to parse bitwise or expression
  2774. # a0 - parser
  2775. # a1 - current token
  2776. # a2 - expression
  2777. # a3 - token location
  2778. # @return statement
  2779. bitwise_or_expression : (a0, a1, a2, a3) {
  2780. # x0 - next token
  2781. allocate(1);
  2782. # Fetching first operand
  2783. if (a2 == NULL) { a2 = bitwise_xor_expression(a0, a1, NULL, a3); }
  2784. # Fetching next token
  2785. x0 = parser_lex(a0);
  2786. # Parsing bitwise or
  2787. if (x0 == '|') { return(bitwise_or_expr(a0, a2, a3)); }
  2788. # Holding token
  2789. parser_lexer_hold(a0);
  2790. # Returning operand
  2791. return(a2);
  2792. }
  2793. ## Parse bitwise or expression
  2794. # a0 - parser
  2795. # a1 - op1
  2796. # a2 - token location
  2797. # @return statement
  2798. bitwise_or_expr : (a0, a1, a2) {
  2799. # x0 - next token
  2800. # x1 - op2
  2801. # x2 - token location
  2802. allocate(3);
  2803. # Fetching next token
  2804. x0 = parser_lex(a0);
  2805. # Fetching token location
  2806. x2 = parser_token_location(a0);
  2807. # Fetching second operand
  2808. x1 = bitwise_xor_expression(a0, x0, NULL, x2);
  2809. # Returning expression
  2810. return(bitwise_or_expression(a0, NULL, tuple4(STMT_EXPR_OR, a2, a1, x1), a2));
  2811. }
  2812. ## Try to parse bitwise xor expression
  2813. # a0 - parser
  2814. # a1 - current token
  2815. # a2 - expression
  2816. # a3 - token location
  2817. # @return statement
  2818. bitwise_xor_expression : (a0, a1, a2, a3) {
  2819. # x0 - next token
  2820. allocate(1);
  2821. # Fetching first operand
  2822. if (a2 == NULL) { a2 = bitwise_and_expression(a0, a1, NULL, a3); }
  2823. # Fetching next token
  2824. x0 = parser_lex(a0);
  2825. # Parsing bitwise xor
  2826. if (x0 == '^') { return(bitwise_xor_expr(a0, a2, a3)); }
  2827. # Holding token
  2828. parser_lexer_hold(a0);
  2829. # Returning operand
  2830. return(a2);
  2831. }
  2832. ## Parse bitwise xor expression
  2833. # a0 - parser
  2834. # a1 - op1
  2835. # a2 - token location
  2836. # @return statement
  2837. bitwise_xor_expr : (a0, a1, a2) {
  2838. # x0 - next token
  2839. # x1 - op2
  2840. # x2 - token location
  2841. allocate(3);
  2842. # Fetching next token
  2843. x0 = parser_lex(a0);
  2844. # Fetching token location
  2845. x2 = parser_token_location(a0);
  2846. # Fetching second operand
  2847. x1 = bitwise_and_expression(a0, x0, NULL, x2);
  2848. # Returning expression
  2849. return(bitwise_xor_expression(a0, NULL, tuple4(STMT_EXPR_XOR, a2, a1, x1), a2));
  2850. }
  2851. ## Try to parse bitwise and expression
  2852. # a0 - parser
  2853. # a1 - current token
  2854. # a2 - expression
  2855. # a3 - token location
  2856. # @return statement
  2857. bitwise_and_expression : (a0, a1, a2, a3) {
  2858. # x0 - next token
  2859. allocate(1);
  2860. # Fetching first operand
  2861. if (a2 == NULL) { a2 = bitwise_shift_expression(a0, a1, NULL, a3); }
  2862. # Fetching next token
  2863. x0 = parser_lex(a0);
  2864. # Parsing bitwise and
  2865. if (x0 == '&') { return(bitwise_and_expr(a0, a2, a3)); }
  2866. # Holding token
  2867. parser_lexer_hold(a0);
  2868. # Returning operand
  2869. return(a2);
  2870. }
  2871. ## Parse bitwise and expression
  2872. # a0 - parser
  2873. # a1 - op1
  2874. # a2 - token location
  2875. # @return statement
  2876. bitwise_and_expr : (a0, a1, a2) {
  2877. # x0 - next token
  2878. # x1 - op2
  2879. # x2 - token location
  2880. allocate(3);
  2881. # Fetching next token
  2882. x0 = parser_lex(a0);
  2883. # Fetching token location
  2884. x2 = parser_token_location(a0);
  2885. # Fetching second operand
  2886. x1 = bitwise_shift_expression(a0, x0, NULL, x2);
  2887. # Returning expression
  2888. return(bitwise_and_expression(a0, NULL, tuple4(STMT_EXPR_AND, a2, a1, x1), a2));
  2889. }
  2890. ## Try to parse bitwise shift expression
  2891. # a0 - parser
  2892. # a1 - current token
  2893. # a2 - expression
  2894. # a3 - token location
  2895. # @return statement
  2896. bitwise_shift_expression : (a0, a1, a2, a3) {
  2897. # x0 - next token
  2898. allocate(1);
  2899. # Fetching first operand
  2900. if (a2 == NULL) { a2 = additive_expression(a0, a1, NULL, a3); }
  2901. # Fetching next token
  2902. x0 = parser_lex(a0);
  2903. # Parsing left shift
  2904. if (x0 == TOKEN_SHL) { return(bitwise_shift_expr(a0, a2, STMT_EXPR_SHL, a3)); }
  2905. # Parsing right shift
  2906. if (x0 == TOKEN_SHR) { return(bitwise_shift_expr(a0, a2, STMT_EXPR_SHR, a3)); }
  2907. # Holding token
  2908. parser_lexer_hold(a0);
  2909. # Returning operand
  2910. return(a2);
  2911. }
  2912. ## Parse bitwise shift expression
  2913. # a0 - parser
  2914. # a1 - op1
  2915. # a2 - operation
  2916. # a3 - token location
  2917. # @return statement
  2918. bitwise_shift_expr : (a0, a1, a2, a3) {
  2919. # x0 - next token
  2920. # x1 - op2
  2921. # x2 - token location
  2922. allocate(3);
  2923. # Fetching next token
  2924. x0 = parser_lex(a0);
  2925. # Fetching token location
  2926. x2 = parser_token_location(a0);
  2927. # Fetching second operand
  2928. x1 = additive_expression(a0, x0, NULL, x2);
  2929. # Returning expression
  2930. return(bitwise_shift_expression(a0, NULL, tuple4(a2, a3, a1, x1), a3));
  2931. }
  2932. ## Try to parse additive expression
  2933. # a0 - parser
  2934. # a1 - current token
  2935. # a2 - expression
  2936. # a3 - token location
  2937. # @return statement
  2938. additive_expression : (a0, a1, a2, a3) {
  2939. # x0 - next token
  2940. allocate(1);
  2941. # Fetching first operand
  2942. if (a2 == NULL) { a2 = multiplicative_expression(a0, a1, NULL, a3); }
  2943. # Fetching next token
  2944. x0 = parser_lex(a0);
  2945. # Parsing addition
  2946. if (x0 == '+') { return(additive_expr(a0, a2, STMT_EXPR_ADD, a3)); }
  2947. # Parsing subtraction
  2948. if (x0 == '-') { return(additive_expr(a0, a2, STMT_EXPR_SUB, a3)); }
  2949. # Holding token
  2950. parser_lexer_hold(a0);
  2951. # Returning operand
  2952. return(a2);
  2953. }
  2954. ## Parse additive expression
  2955. # a0 - parser
  2956. # a1 - op1
  2957. # a2 - operation
  2958. # a3 - token location
  2959. # @return statement
  2960. additive_expr : (a0, a1, a2, a3) {
  2961. # x0 - next token
  2962. # x1 - op2
  2963. # x2 - token location
  2964. allocate(3);
  2965. # Fetching next token
  2966. x0 = parser_lex(a0);
  2967. # Fetching token location
  2968. x2 = parser_token_location(a0);
  2969. # Fetching second operand
  2970. x1 = multiplicative_expression(a0, x0, NULL, x2);
  2971. # Returning expression
  2972. return(additive_expression(a0, NULL, tuple4(a2, a3, a1, x1), a3));
  2973. }
  2974. ## Try to parse multiplicative expression
  2975. # a0 - parser
  2976. # a1 - current token
  2977. # a2 - expression
  2978. # a3 - token location
  2979. # @return statement
  2980. multiplicative_expression : (a0, a1, a2, a3) {
  2981. # x0 - next token
  2982. allocate(1);
  2983. # Fetching first operand
  2984. if (a2 == NULL) { a2 = prefix_expression(a0, a1, a3); }
  2985. # Fetching next token
  2986. x0 = parser_lex(a0);
  2987. # Parsing multiplication
  2988. if (x0 == '*') { return(multiplicative_expr(a0, a2, STMT_EXPR_MUL, a3)); }
  2989. # Parsing integer division
  2990. if (x0 == '/') { return(multiplicative_expr(a0, a2, STMT_EXPR_DIV, a3)); }
  2991. # Parsing modulus division
  2992. if (x0 == '%') { return(multiplicative_expr(a0, a2, STMT_EXPR_MOD, a3)); }
  2993. # Holding token
  2994. parser_lexer_hold(a0);
  2995. # Returning operand
  2996. return(a2);
  2997. }
  2998. ## Parse multiplicative expression
  2999. # a0 - parser
  3000. # a1 - op1
  3001. # a2 - operation
  3002. # a3 - token location
  3003. # @return statement
  3004. multiplicative_expr : (a0, a1, a2, a3) {
  3005. # x0 - next token
  3006. # x1 - op2
  3007. # x2 - token location
  3008. allocate(3);
  3009. # Fetching next token
  3010. x0 = parser_lex(a0);
  3011. # Fetching token location
  3012. x2 = parser_token_location(a0);
  3013. # Fetching second operand
  3014. x1 = prefix_expression(a0, x0, x2);
  3015. # Returning expression
  3016. return(multiplicative_expression(a0, NULL, tuple4(a2, a3, a1, x1), a3));
  3017. }
  3018. ## Try to parse prefix expression
  3019. # a0 - parser
  3020. # a1 - current token
  3021. # a2 - token location
  3022. # @return statement
  3023. prefix_expression : (a0, a1, a2) {
  3024. if (a1 == '+') { return(prefix_expr(a0, STMT_EXPR_PLUS, a2)); }
  3025. else if (a1 == '-') { return(prefix_expr(a0, STMT_EXPR_MINUS, a2)); }
  3026. else if (a1 == '!') { return(prefix_expr(a0, STMT_EXPR_LOGICAL_NOT, a2)); }
  3027. else if (a1 == '~') { return(prefix_expr(a0, STMT_EXPR_NOT, a2)); }
  3028. else if (a1 == '*') { return(prefix_expr(a0, STMT_EXPR_DEREF, a2)); }
  3029. else if (a1 == '&') { return(prefix_expr(a0, STMT_EXPR_ADDROF, a2)); }
  3030. else if (a1 == TOKEN_DEC) { return(prefix_expr(a0, STMT_EXPR_PREDEC, a2)); }
  3031. else if (a1 == TOKEN_INC) { return(prefix_expr(a0, STMT_EXPR_PREINC, a2)); }
  3032. else { return(simple_expression(a0, a1, a2)); }
  3033. }
  3034. ## Parse prefix expression
  3035. # a0 - parser
  3036. # a1 - operation
  3037. # a2 - token location
  3038. # @return statement
  3039. prefix_expr : (a0, a1, a2) {
  3040. # x0 - token
  3041. # x1 - token location
  3042. # x2 - op1
  3043. allocate(3);
  3044. # Fetching token
  3045. x0 = parser_lex(a0);
  3046. # Fetching token location
  3047. x1 = parser_token_location(a0);
  3048. # Fetching operand
  3049. x2 = prefix_expression(a0, x0, x1);
  3050. # Returning expression
  3051. return(tuple3(a1, a2, x2));
  3052. }
  3053. ## Try to parse simple expression
  3054. # a0 - parser
  3055. # a1 - current token
  3056. # a2 - token location
  3057. # @return statement
  3058. simple_expression : (a0, a1, a2) {
  3059. # Parsing identifier
  3060. if (a1 == TOKEN_IDENTIFIER) { return(simple_identifier_expression(a0, a2)); }
  3061. # Parsing argument
  3062. if (a1 == TOKEN_ARGUMENT) { return(simple_argvar_expression(a0, TRUE, a2)); }
  3063. # Parsing variable
  3064. if (a1 == TOKEN_VARIABLE) { return(simple_argvar_expression(a0, FALSE, a2)); }
  3065. # Parsing syscall
  3066. if (a1 == TOKEN_SYSCALL) { return(simple_syscall_expression(a0, a2)); }
  3067. # Parsing readchar
  3068. if (a1 == TOKEN_READCHAR) { return(simple_readchar_expression(a0, a2)); }
  3069. # Parsing parantheses grouping
  3070. if (a1 == '(') { return(simple_parantheses_expression(a0, a2)); }
  3071. # Parsing constant expression
  3072. return(simple_postfix_expression(a0, constant_expression(a0, a1, a2)));
  3073. }
  3074. ## Parse simple identifier expression
  3075. # a0 - parser
  3076. # a1 - token location
  3077. # @return statement
  3078. simple_identifier_expression : (a0, a1) {
  3079. # x0 - duplicated identifier string
  3080. allocate(1);
  3081. # Duplicating the identifier
  3082. x0 = strdup(lexer_token_text(parser_lexer(a0)));
  3083. # Returning expression
  3084. return(simple_postfix_expression(a0, tuple3(STMT_EXPR_IDENTIFIER, a1, x0)));
  3085. }
  3086. ## Parse simple argument/variable expression
  3087. # a0 - parser
  3088. # a1 - is argument
  3089. # a2 - token location
  3090. # @return statement
  3091. simple_argvar_expression : (a0, a1, a2) {
  3092. # x0 - expression type
  3093. allocate(1);
  3094. # Getting expression type
  3095. x0 = STMT_EXPR_VARIABLE;
  3096. if (a1) { x0 = STMT_EXPR_ARGUMENT; }
  3097. # Returning expression
  3098. return(simple_postfix_expression(a0, tuple3(x0, a2, lexer_token_value(parser_lexer(a0)))));
  3099. }
  3100. ## Parse simple syscall expression
  3101. # a0 - parser
  3102. # a1 - token location
  3103. # @return statement
  3104. simple_syscall_expression : (a0, a1) {
  3105. # x0 - ops vector
  3106. # x1 - token
  3107. # x2 - expression
  3108. allocate(3);
  3109. # Allocating operand vector
  3110. x0 = vector(0, 1);
  3111. # Fetching next token
  3112. x1 = parser_lex(a0);
  3113. # Checking for correct syntax
  3114. if (x1 != '(') { goto &simple_syscall_expression_invalid; }
  3115. label simple_syscall_expression_loop;
  3116. # Fetching next token
  3117. x1 = parser_lex(a0);
  3118. # Fetching expression
  3119. x2 = expression(a0, x1);
  3120. # Pushing expression to operand vector
  3121. vector_push(x0, x2);
  3122. # Fetching next token
  3123. x1 = parser_lex(a0);
  3124. # Checking if there is next expression as argument
  3125. if (x1 == ',') { goto &simple_syscall_expression_loop; }
  3126. # Checking for correct syntax
  3127. if (x1 != ')') { goto &simple_syscall_expression_invalid; }
  3128. # Checking for argument count
  3129. if (vector_size(x0) > 7) { goto &simple_syscall_expression_too_many; }
  3130. # Returning expression
  3131. return(simple_postfix_expression(a0, tuple3(STMT_EXPR_SYSCALL, a1, x0)));
  3132. label simple_syscall_expression_invalid;
  3133. parser_error(a0, a1, "Invalid syscall expression");
  3134. label simple_syscall_expression_too_many;
  3135. parser_error(a0, a1, "Too many arguments in syscall expression");
  3136. }
  3137. ## Parse simple readchar expression
  3138. # a0 - parser
  3139. # a1 - token location
  3140. # @return statement
  3141. simple_readchar_expression : (a0, a1) {
  3142. # x0 - next token
  3143. # x1 - array expression
  3144. # x2 - index expression
  3145. allocate(3);
  3146. # Fetching next token
  3147. x0 = parser_lex(a0);
  3148. # Checking for correct syntax
  3149. if (x0 != '(') { goto &simple_readchar_expression_invalid; }
  3150. # Fetching next token
  3151. x0 = parser_lex(a0);
  3152. # Fetching array expression
  3153. x1 = expression(a0, x0);
  3154. # Fetching next token
  3155. x0 = parser_lex(a0);
  3156. # Checking for correct syntax
  3157. if (x0 != ',') { goto &simple_readchar_expression_invalid; }
  3158. # Fetching next token
  3159. x0 = parser_lex(a0);
  3160. # Fetching index expression
  3161. x2 = expression(a0, x0);
  3162. # Fetching next token
  3163. x0 = parser_lex(a0);
  3164. # Checking for correct syntax
  3165. if (x0 != ')') { goto &simple_readchar_expression_invalid; }
  3166. # Returning expression
  3167. return(simple_postfix_expression(a0, tuple4(STMT_EXPR_READCHAR, a1, x1, x2)));
  3168. label simple_readchar_expression_invalid;
  3169. parser_error(a0, a1, "Invalid readchar expression");
  3170. }
  3171. ## Parse simple parantheses expression
  3172. # a0 - parser
  3173. # a1 - token location
  3174. # @return statement
  3175. simple_parantheses_expression : (a0, a1) {
  3176. # x0 - next token
  3177. # x1 - expression
  3178. allocate(2);
  3179. # Fetching next token
  3180. x0 = parser_lex(a0);
  3181. # Fetching expression in parantheses
  3182. x1 = expression(a0, x0);
  3183. # Fetching next token
  3184. x0 = parser_lex(a0);
  3185. # Checking parantheses are closed
  3186. if (x0 != ')') { goto &simple_parantheses_expression_invalid; }
  3187. # Returning expression
  3188. return(simple_postfix_expression(a0, x1));
  3189. label simple_parantheses_expression_invalid;
  3190. parser_error(a0, a1, "Parantheses expression is not closed");
  3191. }
  3192. ## Parse simple expression postfixes
  3193. # a0 - parser
  3194. # a1 - expression
  3195. # @return statement
  3196. simple_postfix_expression : (a0, a1) {
  3197. # x0 - next token
  3198. # x1 - token location
  3199. allocate(2);
  3200. # Fetching next token
  3201. x0 = parser_lex(a0);
  3202. # Fetching token location
  3203. x1 = parser_token_location(a0);
  3204. if (x0 == '(') { return(simple_address_call_expression(a0, a1, x1)); }
  3205. else if (x0 == '[') { return(simple_address_index_expression(a0, a1, x1)); }
  3206. else if (x0 == TOKEN_DEC) { return(simple_postfix_expression(a0, tuple3(STMT_EXPR_POSTDEC, x1, a1))); }
  3207. else if (x0 == TOKEN_INC) { return(simple_postfix_expression(a0, tuple3(STMT_EXPR_POSTINC, x1, a1))); }
  3208. # Holding the token
  3209. parser_lexer_hold(a0);
  3210. # Freeing unused token location
  3211. free(x1);
  3212. # Returning expression
  3213. return(a1);
  3214. }
  3215. ## Parse simple address call expression
  3216. # a0 - parser
  3217. # a1 - expression
  3218. # a2 - token location
  3219. # @return statement
  3220. simple_address_call_expression : (a0, a1, a2) {
  3221. # x0 - ops vector
  3222. # x1 - next token
  3223. # x2 - expression
  3224. allocate(3);
  3225. # Allocating operand vector
  3226. x0 = vector(0, 1);
  3227. # Pushing expression to operand vector
  3228. vector_push(x0, a1);
  3229. label simple_address_call_expression_loop;
  3230. # Fetching next token
  3231. x1 = parser_lex(a0);
  3232. # Checking if arguments ended
  3233. if (x1 == ')') { goto &simple_address_call_expression_end; }
  3234. # Fetching expression
  3235. x2 = expression(a0, x1);
  3236. # Pushing expression to operand vector
  3237. vector_push(x0, x2);
  3238. # Fetching next token
  3239. x1 = parser_lex(a0);
  3240. # Checking for next argument
  3241. if (x1 == ',') { goto &simple_address_call_expression_loop; }
  3242. # Checking for correct syntax
  3243. if (x1 != ')') { goto &simple_address_call_expression_invalid; }
  3244. label simple_address_call_expression_end;
  3245. # Returning expression
  3246. return(simple_postfix_expression(a0, tuple3(STMT_EXPR_CALL, a2, x0)));
  3247. label simple_address_call_expression_invalid;
  3248. parser_error(a0, a2, "Call expression is not closed");
  3249. }
  3250. ## Parse simple address indexing expression
  3251. # a0 - parser
  3252. # a1 - expression
  3253. # a2 - token location
  3254. # @return statement
  3255. simple_address_index_expression : (a0, a1, a2) {
  3256. # x0 - next token
  3257. # x1 - expression
  3258. allocate(2);
  3259. # Fetching next token
  3260. x0 = parser_lex(a0);
  3261. # Fetching expression
  3262. x1 = expression(a0, x0);
  3263. # Fetching next token
  3264. x0 = parser_lex(a0);
  3265. # Checking for correct syntax
  3266. if (x0 != ']') { goto &simple_address_index_expression_invalid; }
  3267. # Returning expression
  3268. return(simple_postfix_expression(a0, tuple4(STMT_EXPR_INDEX, a2, a1, x1)));
  3269. label simple_address_index_expression_invalid;
  3270. parser_error(a0, a2, "Index expression is not closed");
  3271. }
  3272. ## Try to parse constant expression
  3273. # a0 - parser
  3274. # a1 - current token
  3275. # a2 - token location
  3276. # @return statement
  3277. constant_expression : (a0, a1, a2) {
  3278. # Parsing integer
  3279. if (a1 == TOKEN_INTEGER) { return(constant_integer_expression(a0, a2)); }
  3280. # Parsing string
  3281. if (a1 == TOKEN_STRING) { return(constant_string_expression(a0, a2)); }
  3282. parser_error(a0, a2, "Syntax error - none of available expressions matched");
  3283. }
  3284. ## Parse constant integer expression
  3285. # a0 - parser
  3286. # a1 - token location
  3287. # @return statement
  3288. constant_integer_expression : (a0, a1) {
  3289. # Returning basic integer expression
  3290. return(tuple3(STMT_EXPR_INTEGER, a1, lexer_token_value(parser_lexer(a0))));
  3291. }
  3292. ## Parse constant string expression
  3293. # a0 - parser
  3294. # a1 - token location
  3295. # @return statement
  3296. constant_string_expression : (a0, a1) {
  3297. # x0 - duplicated string
  3298. allocate(1);
  3299. # Fetching lexer text buffer address
  3300. x0 = lexer_token_text(parser_lexer(a0));
  3301. # Returning expression
  3302. return(tuple3(STMT_EXPR_STRING, a1, string_buffer_push(parser_buffer(a0), x0)));
  3303. }
  3304. ### String buffer
  3305. ## String buffer constants
  3306. STRBUF_SIZE => 16384;
  3307. ## String buffer parts
  3308. STRBUF_BUFFER => 0;
  3309. STRBUF_OFFSET => 1;
  3310. STRBUF_LENGTH => 2;
  3311. ## Create new string buffer
  3312. # @return buffer
  3313. string_buffer : () { return(tuple3(calloc(STRBUF_SIZE), 0, 0)); }
  3314. ## Get string buffer offset
  3315. # a0 - string buffer
  3316. # @return offset
  3317. string_buffer_offset : (a0) { return(a0[STRBUF_OFFSET]); }
  3318. ## Get string buffer length
  3319. # a0 - string buffer
  3320. # @return offset
  3321. string_buffer_length : (a0) { return(a0[STRBUF_LENGTH]); }
  3322. ## Find string in string buffer
  3323. # a0 - string buffer
  3324. # a1 - string
  3325. # @return string offset
  3326. string_buffer_find : (a0, a1) {
  3327. # x0 - string buffer address
  3328. # x1 - string buffer length
  3329. # x2 - string buffer offset
  3330. # x3 - string offset
  3331. # x4 - string address
  3332. allocate(5);
  3333. # Fetching string buffer length
  3334. x1 = string_buffer_length(a0);
  3335. # If string buffer is empty, don't even try to search string
  3336. if (x1 != 0) {
  3337. # Fetching string buffer address
  3338. x0 = a0[STRBUF_BUFFER];
  3339. # Iterating through strings
  3340. x2 = 0;
  3341. x3 = 0;
  3342. while (x2 < x1) {
  3343. # Calculating string address
  3344. x4 = x0 + x2;
  3345. # Comparing string. If matched, return offset
  3346. if (strcmp(x4, a1) == 0) { return(x3); }
  3347. # String didn't match. Moving to next string
  3348. x2 = x2 + strlen(x4) + 1;
  3349. x3 = x3 + stresclen(x4) - 1;
  3350. }
  3351. }
  3352. # We didn't find the string.
  3353. return(-1);
  3354. }
  3355. ## Find or push string to string buffer
  3356. # a0 - string buffer
  3357. # a1 - string
  3358. # @return string offset
  3359. string_buffer_push : (a0, a1) {
  3360. # x0 - string offset
  3361. allocate(3);
  3362. # Trying to find string in buffer
  3363. x0 = string_buffer_find(a0, a1);
  3364. # If not -1, return offset
  3365. if (x0 != -1) { return(x0); }
  3366. ## We are here, because we didn't find string. Pushing string to buffer
  3367. # x0 - old offset
  3368. # x1 - old length
  3369. # x2 - string length
  3370. # Fetching string buffer offset
  3371. x0 = string_buffer_offset(a0);
  3372. # Fetching string buffer length
  3373. x1 = string_buffer_length(a0);
  3374. # Getting length of string
  3375. x2 = strlen(a1);
  3376. # If buffer can't hold new string, printing error.
  3377. # TODO: when `free` function will be done, reallocate buffer. For now it can
  3378. # take too much memory for our purposes.
  3379. if (x1 + x2 >= STRBUF_SIZE) {
  3380. eputs("ERROR: string buffer is full.\n");
  3381. exit(1);
  3382. }
  3383. # Copying string to buffer
  3384. strcpy(a1, a0[STRBUF_BUFFER] + x1);
  3385. # Setting new offset and length (strings are with "")
  3386. a0[STRBUF_OFFSET] = x0 + stresclen(a1) - 1;
  3387. a0[STRBUF_LENGTH] = x1 + x2 + 1;
  3388. # Returning old offset
  3389. return(x0);
  3390. }
  3391. ## Export string buffer to output channel
  3392. # a0 - string buffer
  3393. # a1 - output channel
  3394. string_buffer_export : (a0, a1) {
  3395. # x0 - string buffer address
  3396. # x1 - string buffer length
  3397. # x2 - offset
  3398. allocate(3);
  3399. # Fetching string buffer length
  3400. x1 = string_buffer_length(a0);
  3401. # Exporting if string buffer is not empty
  3402. if (x1 != 0) {
  3403. # Fetching string buffer address
  3404. x0 = a0[STRBUF_BUFFER];
  3405. # Creating `strbuf` label
  3406. fputs(a1, "strbuf:\n");
  3407. # Iterating through strings
  3408. x2 = 0;
  3409. while (x2 < x1) {
  3410. # Writing .string macro
  3411. fputs(a1, ".string ");
  3412. # Writing string
  3413. fputs(a1, x0 + x2);
  3414. fputc(a1, '\n');
  3415. # Moving to new offset
  3416. x2 = x2 + strlen(x0 + x2) + 1;
  3417. }
  3418. }
  3419. }
  3420. ### Compiler
  3421. ## Compiler parts
  3422. COM_OCHAN => 0;
  3423. COM_STRBUF => 1;
  3424. COM_LABEL => 2;
  3425. COM_SEGMENT => 3;
  3426. COM_REGFLAGS => 4;
  3427. COM_LOOP => 5;
  3428. ## Segment flags
  3429. SEG_TEXT => 0;
  3430. SEG_DATA => 1;
  3431. SEG_RODATA => 2;
  3432. ## Register usage flags
  3433. REG_EAX => 0;
  3434. REG_EBX => 1;
  3435. REG_ECX => 2;
  3436. REG_EDX => 3;
  3437. REG_ESI => 4;
  3438. REG_EDI => 5;
  3439. # Register list (%ebp used only for syscalls)
  3440. registers : [ "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp" ];
  3441. ## Create new compiler
  3442. # a0 - output chan
  3443. # a1 - string buffer
  3444. # @return compiler
  3445. compiler : (a0, a1) {
  3446. return(tuple6(a0, a1, 0, SEG_TEXT, tuple6(0,0,0,0,0,0), list(NULL)));
  3447. }
  3448. ## Get compiler's output channel
  3449. # a0 - compiler
  3450. # @return output channel
  3451. compiler_output_chan : (a0) { return(a0[COM_OCHAN]); }
  3452. ## Get compiler's current label ID
  3453. # a0 - compiler
  3454. # @return label ID
  3455. compiler_label_id : (a0) { return(a0[COM_LABEL]); }
  3456. ## Set new segment if needed
  3457. # a0 - compiler
  3458. # a1 - segment
  3459. compiler_set_segment : (a0, a1) {
  3460. # x0 - compiler output channel
  3461. allocate(1);
  3462. # Getting compiler output channel
  3463. x0 = compiler_output_chan(a0);
  3464. # Checking if we already in this segment
  3465. if (a0[COM_SEGMENT] == a1) { return; }
  3466. # Switching segment in compiler
  3467. a0[COM_SEGMENT] = a1;
  3468. # Printing required text, to switch segment
  3469. if (a1 == SEG_TEXT) { fputs(x0, ".text\n"); }
  3470. if (a1 == SEG_DATA) { fputs(x0, ".data\n"); }
  3471. if (a1 == SEG_RODATA) { fputs(x0, ".section .rodata\n"); }
  3472. }
  3473. ## Prepare register for use
  3474. # a0 - compiler
  3475. # a1 - register
  3476. compiler_allocate_register : (a0, a1) {
  3477. # x0 - compiler output channel
  3478. # x1 - registers state
  3479. allocate(2);
  3480. # Getting output channel
  3481. x0 = compiler_output_chan(a0);
  3482. # Fetching register state
  3483. x1 = a0[COM_REGFLAGS];
  3484. # Checking if we need to push register to stack (do this, if regstate != 0)
  3485. if (x1[a1] != 0) {
  3486. # Printing push instruction
  3487. fputs(x0, "\tpushl ");
  3488. # Printing register
  3489. fputs(x0, registers[a1]);
  3490. # Printing newline
  3491. fputc(x0, '\n');
  3492. }
  3493. # Increment register state by one
  3494. x1[a1] = x1[a1] + 1;
  3495. }
  3496. ## Restore register state, if it is needed
  3497. # a0 - compiler
  3498. # a1 - register
  3499. compiler_free_register : (a0, a1) {
  3500. # x0 - compiler output channel
  3501. # x1 - register state
  3502. allocate(2);
  3503. # Getting output channel
  3504. x0 = compiler_output_chan(a0);
  3505. # Fetching register state
  3506. x1 = a0[COM_REGFLAGS];
  3507. # Checking, if register was never allocated
  3508. assert(x1[a1] != 0, "Tried to free not used register.");
  3509. # Checking if we need to pop register from stack (do this, if regstate != 1)
  3510. if (x1[a1] != 1) {
  3511. # Printing push instruction
  3512. fputs(x0, "\tpopl ");
  3513. # Printing register
  3514. fputs(x0, registers[a1]);
  3515. # Printing newline
  3516. fputc(x0, '\n');
  3517. }
  3518. # Decrement register state by one
  3519. x1[a1] = x1[a1] - 1;
  3520. }
  3521. ## Set new label ID
  3522. # a0 - compiler
  3523. # @return old label ID
  3524. compiler_new_label : (a0) {
  3525. # x0 - old label ID
  3526. allocate(1);
  3527. # Fetching label ID
  3528. x0 = compiler_label_id(a0);
  3529. # Incrementing label ID
  3530. a0[COM_LABEL] = x0 + 1;
  3531. # Returning old label ID
  3532. return(x0);
  3533. }
  3534. ## Get current loop
  3535. # a0 - compiler
  3536. # @return loop tuple
  3537. compiler_loop : (a0) { return(list_value(a0[COM_LOOP])); }
  3538. ## Push new loop to list
  3539. # a0 - compiler
  3540. # a1 - label to start of loop
  3541. # a2 - label to end of loop
  3542. compiler_push_loop : (a0, a1, a2) {
  3543. # x0 - loop tuple
  3544. allocate(1);
  3545. # Creating tuple
  3546. x0 = tuple2(a1, a2);
  3547. # Pushing loop to list
  3548. a0[COM_LOOP] = list_insert(a0[COM_LOOP], x0);
  3549. }
  3550. ## Pop loop from list
  3551. # a0 - compiler
  3552. compiler_pop_loop : (a0, a1, a2) {
  3553. # x0 - loop tuple
  3554. allocate(1);
  3555. # Fetching loop from list
  3556. x0 = list_value(a0[COM_LOOP]);
  3557. # Checking if list wasn't empty
  3558. assert(x0 != NULL, "Tried to pop loop from empty list.");
  3559. # Popping
  3560. a0[COM_LOOP] = list_pop(a0[COM_LOOP]);
  3561. }
  3562. ## Print label
  3563. # a0 - output chan
  3564. # a1 - label ID
  3565. fputlabel : (a0, a1) {
  3566. # Printing the `_label` heading
  3567. fputs(a0, "_label");
  3568. # Printing the ID of label
  3569. fputd(a0, a1);
  3570. }
  3571. ## Print label definition
  3572. # a0 - output chan
  3573. # a1 - label ID
  3574. fputlabeldef : (a0, a1) {
  3575. # Printing label
  3576. fputlabel(a0, a1);
  3577. # Printing definition
  3578. fputs(a0, ":\n");
  3579. }
  3580. ## Print compiler error
  3581. # a0 - compiler
  3582. # a1 - statement
  3583. # a2 - error message
  3584. compiler_error : (a0, a1, a2) {
  3585. eputs("ERROR: ");
  3586. fputloc(a1[1], stderr);
  3587. eputs(": ");
  3588. eputs(a2);
  3589. eputc('\n');
  3590. }
  3591. ## Compile AST
  3592. # a0 - compiler
  3593. # a1 - AST
  3594. compiler_compile : (a0, a1) {
  3595. # x0 - i
  3596. allocate(1);
  3597. # Using macros in AST instead of matching identifiers
  3598. compiler_use_macros(a0, a1);
  3599. # Resetting index
  3600. x0 = 0;
  3601. # Iterating through statements
  3602. while (x0 < vector_size(a1)) {
  3603. compile_top_level_statement(a0, vector_get(a1, x0));
  3604. x0 = x0 + 1;
  3605. }
  3606. # Changing segment to .rodata
  3607. compiler_set_segment(a0, SEG_RODATA);
  3608. # Exporting string buffer
  3609. string_buffer_export(a0[COM_STRBUF], a0[COM_OCHAN]);
  3610. }
  3611. ## Use macros instead of identifiers matching their name
  3612. # a0 - compiler
  3613. # a1 - AST
  3614. compiler_use_macros : (a0, a1) {
  3615. # x0 - i
  3616. # x1 - statement
  3617. # x2 - j
  3618. allocate(3);
  3619. # Resetting index
  3620. x0 = 0;
  3621. # Iterating through statements
  3622. while (x0 < vector_size(a1)) {
  3623. # Getting current statement
  3624. x1 = vector_get(a1, x0);
  3625. # Checking if current statement is a macro
  3626. if (x1[NODE_TYPE] == STMT_MACRO) {
  3627. # Looping through all next statements
  3628. x2 = x0 + 1;
  3629. while (x2 < vector_size(a1)) {
  3630. # Using macro in statement
  3631. compiler_use_macro(a0, vector_get(a1, x2), x1);
  3632. x2 = x2 + 1;
  3633. }
  3634. }
  3635. # Moving to next statement
  3636. x0 = x0 + 1;
  3637. }
  3638. }
  3639. ## Use macro in statement's children
  3640. # a0 - compiler
  3641. # a1 - statement
  3642. # a2 - macro statement
  3643. compiler_use_macro : (a0, a1, a2) {
  3644. # x0 - statement ID
  3645. # x1 - children pos
  3646. # x2 - children count
  3647. # x3 - child
  3648. allocate(4);
  3649. # Presetting default values
  3650. x1 = 2; # Children in tuples start from second element in most cases
  3651. # Fetching statement ID
  3652. x0 = a1[NODE_TYPE];
  3653. ### Checking if we have statement, that has children
  3654. # Vectored expressions
  3655. if (x0 == STMT_EXPR_SYSCALL) { goto &compiler_use_macro_vector; }
  3656. if (x0 == STMT_EXPR_CALL) { goto &compiler_use_macro_vector; }
  3657. # Not vectored expressions
  3658. if (x0 == STMT_EXPR_READCHAR) { x2 = 2; goto &compiler_use_macro_tuple; }
  3659. if (x0 == STMT_EXPR_INDEX) { x2 = 2; goto &compiler_use_macro_tuple; }
  3660. # Unary operators
  3661. ## From STMT_EXPR_PLUS to STMT_EXPR_ADDROF
  3662. if (x0 >= STMT_EXPR_PLUS && x0 <= STMT_EXPR_ADDROF
  3663. || x0 >= STMT_EXPR_PREDEC && x0 <= STMT_EXPR_POSTINC) {
  3664. x2 = 1;
  3665. goto &compiler_use_macro_tuple;
  3666. }
  3667. # Binary operators
  3668. ## From STMT_EXPR_MUL to STMT_EXPR_OR_ASSIGN
  3669. if (x0 >= STMT_EXPR_MUL && x0 <= STMT_EXPR_OR_ASSIGN) {
  3670. x2 = 2;
  3671. goto &compiler_use_macro_tuple;
  3672. }
  3673. # Ternary operator
  3674. if (x0 == STMT_EXPR_TERNARY) { x2 = 3; goto &compiler_use_macro_tuple; }
  3675. # Statements
  3676. ## Vectored statements
  3677. if (x0 == STMT_IF) { goto &compiler_use_macro_vector; }
  3678. if (x0 == STMT_BLOCK) { goto &compiler_use_macro_vector; }
  3679. if (x0 == STMT_RETURN) { goto &compiler_use_macro_vector; }
  3680. if (x0 == STMT_ARRAY_DECL) { x1 = 3; goto &compiler_use_macro_vector; }
  3681. ## Not vectored statements
  3682. if (x0 == STMT_ALLOCATE) { x2 = 1; goto &compiler_use_macro_tuple; }
  3683. if (x0 == STMT_FOR) { x2 = 4; goto &compiler_use_macro_tuple; }
  3684. if (x0 == STMT_DO_WHILE) { x2 = 2; goto &compiler_use_macro_tuple; }
  3685. if (x0 == STMT_WHILE) { x2 = 2; goto &compiler_use_macro_tuple; }
  3686. if (x0 == STMT_WRITECHAR) { x2 = 3; goto &compiler_use_macro_tuple; }
  3687. if (x0 == STMT_GOTO) { x2 = 1; goto &compiler_use_macro_tuple; }
  3688. if (x0 == STMT_MACRO) { x1 = 3; x2 = 1; goto &compiler_use_macro_tuple; }
  3689. if (x0 == STMT_FUN_DECL) { x1 = 4; x2 = 1; goto &compiler_use_macro_tuple; }
  3690. if (x0 == STMT_CHAR_ARRAY_DECL) { x1 = 3; x2 = 1; goto &compiler_use_macro_tuple; }
  3691. if (x0 == STMT_INT_ARRAY_DECL) { x1 = 3; x2 = 1; goto &compiler_use_macro_tuple; }
  3692. if (x0 == STMT_VAR_DECL) { x1 = 3; x2 = 1; goto &compiler_use_macro_tuple; }
  3693. # If we are here, we have expression that shouldn't apply macro. Exitting.
  3694. label compiler_use_macro_exit;
  3695. return;
  3696. label compiler_use_macro_tuple;
  3697. # x0 - index
  3698. x0 = 0; # Resetting index
  3699. while (x0 < x2) {
  3700. # Fetching child
  3701. x3 = a1[x1 + x0];
  3702. # Checking if we can apply macro
  3703. if (x3) {
  3704. if (x3[NODE_TYPE] == STMT_EXPR_IDENTIFIER) {
  3705. # Checking if identifier and macro names are equal
  3706. if (strcmp(x3[2], a2[2]) == 0) {
  3707. # Replacing child with macro
  3708. a1[x1 + x0] = a2[3];
  3709. }
  3710. } else {
  3711. # Maybe it's another statement/expression with children?
  3712. compiler_use_macro(a0, x3, a2);
  3713. }
  3714. }
  3715. # Moving to next child
  3716. x0 = x0 + 1;
  3717. }
  3718. goto &compiler_use_macro_exit;
  3719. label compiler_use_macro_vector;
  3720. # x0 - index
  3721. x0 = 0; # Resetting index
  3722. while (x0 < vector_size(a1[x1])) {
  3723. # Fetching child
  3724. x3 = vector_get(a1[x1], x0);
  3725. # Checking if we can apply macro
  3726. if (x3[NODE_TYPE] == STMT_EXPR_IDENTIFIER) {
  3727. # Checking if identifier and macro names are equal
  3728. if (strcmp(x3[2], a2[2]) == 0) {
  3729. # Replacing child with macro
  3730. vector_set(a1[x1], x0, a2[3]);
  3731. }
  3732. } else {
  3733. # Maybe it's another statement/expression with children?
  3734. compiler_use_macro(a0, x3, a2);
  3735. }
  3736. # Moving to next child
  3737. x0 = x0 + 1;
  3738. }
  3739. goto &compiler_use_macro_exit;
  3740. }
  3741. ## Compile top level statement
  3742. # a0 - compiler
  3743. # a1 - top level statement
  3744. compile_top_level_statement : (a0, a1) {
  3745. # x0 - statement ID
  3746. allocate(1);
  3747. # Getting statement ID
  3748. x0 = a1[NODE_TYPE];
  3749. if (x0 == STMT_GLOBAL) { compile_global_statement(a0, a1); }
  3750. else if (x0 == STMT_FUN_DECL) { compile_function_declaration(a0, a1); }
  3751. else if (x0 == STMT_ARRAY_DECL) { compile_array_declaration(a0, a1); }
  3752. else if (x0 == STMT_CHAR_ARRAY_DECL) { compile_type_array_declaration(a0, a1, 1); }
  3753. else if (x0 == STMT_INT_ARRAY_DECL) { compile_type_array_declaration(a0, a1, 4); }
  3754. else if (x0 == STMT_VAR_DECL) { compile_variable_declaration(a0, a1); }
  3755. }
  3756. ## Compile global statement
  3757. # a0 - compiler
  3758. # a1 - statement
  3759. compile_global_statement : (a0, a1) {
  3760. # x0 - compiler output channel
  3761. # x1 - counter
  3762. allocate(2);
  3763. # Getting output channel
  3764. x0 = compiler_output_chan(a0);
  3765. # Writing .global macro
  3766. fputs(x0, ".global ");
  3767. # Iterating through identifiers
  3768. x1 = 0;
  3769. while (x1 < vector_size(a1[2])) {
  3770. if (x1 != 0) { fputc(x0, ','); }
  3771. fputs(x0, vector_get(a1[2], x1));
  3772. x1 = x1 + 1;
  3773. }
  3774. # Writing new line to finish macro
  3775. fputc(x0, '\n');
  3776. }
  3777. ## Compile function declaration
  3778. # a0 - compiler
  3779. # a1 - statement
  3780. compile_function_declaration : (a0, a1) {
  3781. # x0 - compiler output channel
  3782. # x1 - block statement
  3783. allocate(2);
  3784. # Getting compiler output channel
  3785. x0 = compiler_output_chan(a0);
  3786. # Fetching block statement
  3787. x1 = a1[4];
  3788. # Changing segment to .text, if needed
  3789. compiler_set_segment(a0, SEG_TEXT);
  3790. # Writing function label
  3791. fputs(x0, a1[2]);
  3792. fputs(x0, ":\n");
  3793. # Backing up stack pointer
  3794. fputs(x0, "\tpushl %ebp\n");
  3795. fputs(x0, "\tmovl %esp, %ebp\n");
  3796. # Compiling our function block
  3797. compile_block_statement(a0, x1);
  3798. # `leave` and `ret` if needed
  3799. compile_function_declaration_return(x0, x1);
  3800. }
  3801. ## Compile return function instructions, if needed
  3802. # a0 - compiler output channel
  3803. # a1 - block statement
  3804. compile_function_declaration_return : (a0, a1) {
  3805. # x0 - statements
  3806. # x1 - last statement
  3807. # x2 - last statement type
  3808. allocate(3);
  3809. # Fetching statements in block
  3810. x0 = a1[2];
  3811. # Checking if statements vector is empty
  3812. if (vector_size(x0) == 0) { goto &compile_function_declaration_return_print; }
  3813. # Fetching last statement in block( we can change AST now, as we compiled it)
  3814. x1 = vector_pop(x0);
  3815. x2 = x1[NODE_TYPE];
  3816. # If last statement is return, returning :D
  3817. if (x2 == STMT_RETURN) { return; }
  3818. label compile_function_declaration_return_print;
  3819. # If we are here, we need to print return things :3
  3820. fputs(a0, "\tleave\n\tret\n");
  3821. }
  3822. ## Compile array declaration
  3823. # a0 - compiler
  3824. # a1 - statement
  3825. compile_array_declaration : (a0, a1) {
  3826. # x0 - compiler output channel
  3827. # x1 - index
  3828. allocate(2);
  3829. # Getting output channel
  3830. x0 = compiler_output_chan(a0);
  3831. # Changing segment to .data, if needed
  3832. compiler_set_segment(a0, SEG_DATA);
  3833. # Creating array label
  3834. fputs(x0, a1[2]);
  3835. fputs(x0, ":.long ");
  3836. # Compiling array values
  3837. x1 = 0;
  3838. while (x1 < vector_size(a1[3])) {
  3839. if (x1 != 0) { fputc(x0, ','); }
  3840. compile_simple_value(a0, vector_get(a1[3], x1));
  3841. x1 = x1 + 1;
  3842. }
  3843. # Writing newline to end .long macro
  3844. fputc(x0, '\n');
  3845. }
  3846. ## Compile type array declaration
  3847. # a0 - compiler
  3848. # a1 - statement
  3849. # a2 - type size
  3850. compile_type_array_declaration : (a0, a1, a2) {
  3851. # x0 - compiler output channel
  3852. # x1 - size expression
  3853. allocate(2);
  3854. # Getting output channel
  3855. x0 = compiler_output_chan(a0);
  3856. # Fetching size expression
  3857. x1 = a1[3];
  3858. # Checking we have integer in size expression
  3859. if (x1[NODE_TYPE] != STMT_EXPR_INTEGER) {
  3860. compiler_error(a0, x2, "Only integers can be used for size of array");
  3861. }
  3862. # Changing segment to .data, if needed
  3863. compiler_set_segment(a0, SEG_DATA);
  3864. # Writing .comm macro
  3865. fputs(x0, ".comm ");
  3866. # Writing array name
  3867. fputs(x0, a1[2]);
  3868. # Writing array size
  3869. fputc(x0, ',');
  3870. fputd(x0, x1[2] * a2);
  3871. # Writing new line to finish macro
  3872. fputc(x0, '\n');
  3873. }
  3874. ## Compile variable declaration
  3875. # a0 - compiler
  3876. # a1 - statement
  3877. compile_variable_declaration : (a0, a1) {
  3878. # x0 - compiler output channel
  3879. allocate(1);
  3880. # Getting output channel
  3881. x0 = compiler_output_chan(a0);
  3882. # Changing segment to .data, if needed
  3883. compiler_set_segment(a0, SEG_DATA);
  3884. # Creating variable label
  3885. fputs(x0, a1[2]);
  3886. fputs(x0, ":.long ");
  3887. # Compile variable value
  3888. compile_simple_value(a0, a1[3]);
  3889. # Writing newline to end .long macro
  3890. fputc(x0, '\n');
  3891. }
  3892. ## Compile block statement
  3893. # a0 - compiler
  3894. # a1 - block statement
  3895. compile_block_statement : (a0, a1) {
  3896. # x0 - statement index
  3897. # x1 - unreachable
  3898. # x2 - statement
  3899. # x3 - statement type
  3900. allocate(4);
  3901. # Iterating through statements
  3902. x0 = 0;
  3903. x1 = FALSE;
  3904. while (x0 < vector_size(a1[2])) {
  3905. # Fetching statement
  3906. x2 = vector_get(a1[2], x0);
  3907. # Fetching statement type
  3908. x3 = x2[NODE_TYPE];
  3909. # Checking if code is unreachable
  3910. if (x1) {
  3911. # If we are defining new label, toggle unreachable flag
  3912. if (x3 == STMT_LABEL) {
  3913. x1 = FALSE;
  3914. } else {
  3915. # Otherwise, erroring
  3916. compiler_error(a0, x2, "Unreachable code.");
  3917. }
  3918. }
  3919. # Compiling statement
  3920. compile_statement(a0, vector_get(a1[2], x0));
  3921. # Checking if statement is break/continue/goto/return
  3922. if (x3 == STMT_RETURN || x3 == STMT_GOTO
  3923. || x3 == STMT_BREAK || x3 == STMT_CONTINUE) { x1 = TRUE; }
  3924. # Moving to next statement
  3925. x0 = x0 + 1;
  3926. }
  3927. }
  3928. ## Compile statement
  3929. # a0 - compiler
  3930. # a1 - statement
  3931. compile_statement : (a0, a1) {
  3932. # x0 - statement ID
  3933. allocate(1);
  3934. # Fetching statement ID
  3935. x0 = a1[NODE_TYPE];
  3936. # Searching for statement
  3937. if (x0 == STMT_IF) { compile_if_statement(a0, a1); }
  3938. else if (x0 == STMT_FOR) { compile_for_statement(a0, a1); }
  3939. else if (x0 == STMT_DO_WHILE) { compile_while_statement(a0, a1, TRUE); }
  3940. else if (x0 == STMT_WHILE) { compile_while_statement(a0, a1, FALSE); }
  3941. else if (x0 == STMT_ALLOCATE) { compile_allocate_statement(a0, a1); }
  3942. else if (x0 == STMT_ASM) { compile_assembly_statement(a0, a1); }
  3943. else if (x0 == STMT_RETURN) { compile_return_statement(a0, a1); }
  3944. else if (x0 == STMT_BREAK || x0 == STMT_CONTINUE) { compile_loop_jump_statement(a0, a1); }
  3945. else if (x0 == STMT_WRITECHAR) { compile_writechar_statement(a0, a1); }
  3946. else if (x0 == STMT_LABEL) { compile_label_statement(a0, a1); }
  3947. else if (x0 == STMT_GOTO) { compile_goto_statement(a0, a1); }
  3948. else if (x0 == STMT_BLOCK) { compile_block_statement(a0, a1); }
  3949. # If none of statements wasn't found, trying to compile expression
  3950. else { compile_expression(a0, a1); }
  3951. }
  3952. ## Compile if statement
  3953. # a0 - compiler
  3954. # a1 - statement
  3955. compile_if_statement : (a0, a1) {
  3956. # x0 - compiler output channel
  3957. # x1 - exit label
  3958. # x2 - next label
  3959. # x3 - counter
  3960. # x4 - statement
  3961. allocate(5);
  3962. # Getting output channel
  3963. x0 = compiler_output_chan(a0);
  3964. # Generating exit label
  3965. x1 = compiler_new_label(a0);
  3966. # Iterating through branches
  3967. x3 = 0;
  3968. while (x3 < vector_size(a1[2])) {
  3969. # If we compiling else branch, finish previous branch
  3970. if (x3 != 0) {
  3971. # Jump to exit label
  3972. fputs(x0, "\tjmp ");
  3973. fputlabel(x0, x1);
  3974. fputc(x0, '\n');
  3975. # Print next label definition
  3976. fputlabeldef(x0, x2);
  3977. }
  3978. # Fetching statement
  3979. x4 = vector_get(a1[2], x3);
  3980. # If statement is not a block (thus it's condition)
  3981. if (x4[NODE_TYPE] != STMT_BLOCK) {
  3982. # Compiling condition
  3983. compile_expression(a0, x4);
  3984. # Generating next label, if this is not last branch. Otherwise, use exit
  3985. x2 = x3 + 2 < vector_size(a1[2]) ? compiler_new_label(a0) : x1;
  3986. # Compiling condition jump (value == 0 -> jump to next label)
  3987. fputs(x0, "\tcmpl $0, %eax\n");
  3988. fputs(x0, "\tje "); fputlabel(x0, x2); fputc(x0, '\n');
  3989. # Moving to block statement
  3990. x3 = x3 + 1;
  3991. x4 = vector_get(a1[2], x3);
  3992. }
  3993. # Compiling block statement
  3994. compile_block_statement(a0, x4);
  3995. # Moving to next statement
  3996. x3 = x3 + 1;
  3997. }
  3998. # Printing exit label definition
  3999. fputlabeldef(x0, x1);
  4000. }
  4001. ## Compile for statement
  4002. # a0 - compiler
  4003. # a1 - statement
  4004. compile_for_statement : (a0, a1) {
  4005. # x0 - compiler output channel
  4006. # x1, x2, x3 - labels
  4007. # x4 - counter
  4008. allocate(5);
  4009. # Getting output channel
  4010. x0 = compiler_output_chan(a0);
  4011. # Getting label IDs
  4012. x1 = compiler_new_label(a0);
  4013. x2 = compiler_new_label(a0);
  4014. x3 = compiler_new_label(a0);
  4015. # Compile init expression if it's not null
  4016. if (a1[2]) { compile_expression(a0, a1[2]); }
  4017. # Printing label for "loop start"
  4018. fputlabeldef(x0, x1);
  4019. # Compiling condition if it is not null
  4020. if (a1[3]) {
  4021. compile_expression(a0, a1[3]);
  4022. # Comparing condition with zero
  4023. fputs(x0, "\tcmpl $0, %eax\n");
  4024. # If equal to zero, exitting loop
  4025. fputs(x0, "\tje ");
  4026. fputlabel(x0, x3);
  4027. fputc(x0, '\n');
  4028. }
  4029. # Pushing loop to list
  4030. compiler_push_loop(a0, x2, x3);
  4031. # Compiling for block
  4032. compile_block_statement(a0, a1[5]);
  4033. # Pop loop from list
  4034. compiler_pop_loop(a0);
  4035. # Printing label for iterating expressions
  4036. fputlabeldef(x0, x2);
  4037. # Iterating through iteration list
  4038. x4 = 0;
  4039. while (x4 < vector_size(a1[4])) {
  4040. # Compiling iteration expression
  4041. compile_expression(a0, vector_get(a1[4], x4));
  4042. # Moving to next expression
  4043. x4 = x4 + 1;
  4044. }
  4045. # Making next iteration in for loop
  4046. fputs(x0, "\tjmp ");
  4047. fputlabel(x0, x1);
  4048. fputc(x0, '\n');
  4049. # Printing exit label
  4050. fputlabeldef(x0, x3);
  4051. }
  4052. ## Compile while statement
  4053. # a0 - compiler
  4054. # a1 - statement
  4055. # a2 - cond in the end
  4056. compile_while_statement : (a0, a1, a2) {
  4057. # x0 - compiler output channel
  4058. # x1, x2 - labels
  4059. allocate(3);
  4060. # Getting output channel
  4061. x0 = compiler_output_chan(a0);
  4062. # Getting label IDs
  4063. x1 = compiler_new_label(a0);
  4064. x2 = compiler_new_label(a0);
  4065. # Printing label for "loop start"
  4066. fputlabeldef(x0, x1);
  4067. # Compiling condition if it's in the start
  4068. if (!a2) { compile_while_condition(a0, a1[2], a2, x1, x2); }
  4069. # Pushing loop to list
  4070. compiler_push_loop(a0, x1, x2);
  4071. # Compiling while block
  4072. compile_block_statement(a0, a1[3]);
  4073. # Pop loop from list
  4074. compiler_pop_loop(a0, x1, x2);
  4075. # Compiling condition if it's in the end
  4076. if (a2) {
  4077. compile_while_condition(a0, a1[2], a2, x1, x2);
  4078. } else {
  4079. # Making next iteration in while loop
  4080. fputs(x0, "\tjmp ");
  4081. fputlabel(x0, x1);
  4082. fputc(x0, '\n');
  4083. }
  4084. # Printing exit label
  4085. fputlabeldef(x0, x2);
  4086. }
  4087. ## Compile while condition
  4088. # a0 - compiler
  4089. # a1 - expression
  4090. # a2 - cond in the end
  4091. # a3 - loop label
  4092. # a4 - exit label
  4093. compile_while_condition : (a0, a1, a2, a3, a4) {
  4094. # x0 - compiler output channel
  4095. allocate(1);
  4096. # Getting output channel
  4097. x0 = compiler_output_chan(a0);
  4098. # Compiling condition
  4099. compile_expression(a0, a1);
  4100. # Comparing condition with zero
  4101. fputs(x0, "\tcmpl $0, %eax\n");
  4102. # If in the start, jump to exit label when == 0
  4103. # If in the end, jump to loop label when != 0
  4104. fputs(x0, a2 ? "\tjne " : "\tje ");
  4105. fputlabel(x0, a2 ? a3 : a4);
  4106. fputc(x0, '\n');
  4107. }
  4108. ## Compile allocate statement
  4109. # a0 - compiler
  4110. # a1 - statement
  4111. compile_allocate_statement : (a0, a1) {
  4112. # x0 - compiler output channel
  4113. # x1 - size expression
  4114. # x2 - allocate size
  4115. allocate(3);
  4116. # Getting output channel
  4117. x0 = compiler_output_chan(a0);
  4118. # Fetching size expression
  4119. x1 = a1[2];
  4120. # Expression must be integer
  4121. if (x1[NODE_TYPE] != STMT_EXPR_INTEGER) {
  4122. compiler_error(a0, x1, "Size must be integer in allocate statement.");
  4123. }
  4124. # Calculating allocation size (expr value * 4)
  4125. x2 = x1[2] * 4;
  4126. # Allocation size must be bigger than zero
  4127. if (x2 <= 0) {
  4128. compiler_error(a0, x1, "Size must be positive integer.");
  4129. }
  4130. # Compiling allocation
  4131. fputs(x0, "\tsubl $");
  4132. fputd(x0, x2);
  4133. fputs(x0, ", %esp\n");
  4134. }
  4135. ## Compile assembly statement
  4136. # a0 - compiler
  4137. # a1 - statement
  4138. compile_assembly_statement : (a0, a1) {
  4139. # x0 - compiler output channel
  4140. # x1 - index
  4141. allocate(2);
  4142. # Getting output channel
  4143. x0 = compiler_output_chan(a0);
  4144. # Iterating through assembly lines
  4145. for (x1 = 0; x1 < vector_size(a1[2]); x1++) {
  4146. # Printing assembly line
  4147. fputc(x0, '\t');
  4148. fputs(x0, vector_get(a1[2], x1));
  4149. fputc(x0, '\n');
  4150. }
  4151. }
  4152. ## Compile return statement
  4153. # a0 - compiler
  4154. # a1 - statement
  4155. compile_return_statement : (a0, a1) {
  4156. # x0 - return array
  4157. allocate(1);
  4158. # Fetching return array
  4159. x0 = a1[2];
  4160. # Compiling return value, if it is there
  4161. if (vector_size(x0) != 0) {
  4162. # Compiling return value
  4163. compile_expression(a0, vector_pop(x0));
  4164. }
  4165. # Compiling return expression
  4166. fputs(compiler_output_chan(a0), "\tleave\n\tret\n");
  4167. }
  4168. ## Compile break/continue statement
  4169. # a0 - compiler
  4170. # a1 - statement
  4171. compile_loop_jump_statement : (a0, a1) {
  4172. # x0 - compiler output channel
  4173. # x1 - loop tuple
  4174. # x2 - label
  4175. allocate(3);
  4176. # Getting output channel
  4177. x0 = compiler_output_chan(a0);
  4178. # Getting current loop
  4179. x1 = compiler_loop(a0);
  4180. # If loop is there, compiling instruction
  4181. if (x1) {
  4182. # Taking label from tuple
  4183. x2 = a1[NODE_TYPE] == STMT_BREAK ? x1[1] : x1[0];
  4184. # Jump to label
  4185. fputs(x0, "\tjmp ");
  4186. fputlabel(x0, x2);
  4187. fputc(x0, '\n');
  4188. } else {
  4189. compiler_error(a0, a1, "Tried to break/continue with no loop there.");
  4190. }
  4191. }
  4192. ## Compile writechar statement
  4193. # a0 - compiler
  4194. # a1 - statement
  4195. compile_writechar_statement : (a0, a1) {
  4196. # x0 - compiler output channel
  4197. allocate(1);
  4198. # Getting output channel
  4199. x0 = compiler_output_chan(a0);
  4200. # Compiling base address expression
  4201. compile_expression(a0, a1[2]);
  4202. # Storing base address in %edx and reserving it
  4203. compiler_allocate_register(a0, REG_EDX);
  4204. fputs(x0, "\tmovl %eax, %edx\n");
  4205. # Compiling index expression
  4206. compile_expression(a0, a1[3]);
  4207. # Adding index to base address
  4208. fputs(x0, "\taddl %eax, %edx\n");
  4209. # Compiling value expression
  4210. compile_expression(a0, a1[4]);
  4211. # Storing byte to address and freeing %edx
  4212. fputs(x0, "\tmovb %al, (%edx)\n");
  4213. compiler_free_register(a0, REG_EDX);
  4214. }
  4215. ## Compile label statement
  4216. # a0 - compiler
  4217. # a1 - statement
  4218. compile_label_statement : (a0, a1) {
  4219. # x0 - compiler output channel
  4220. allocate(1);
  4221. # Getting compiler output channel
  4222. x0 = compiler_output_chan(a0);
  4223. # Printing label definition
  4224. fputs(x0, a1[2]);
  4225. fputs(x0, ":\n");
  4226. }
  4227. ## Compile goto statement
  4228. # a0 - compiler
  4229. # a1 - statement
  4230. compile_goto_statement : (a0, a1) {
  4231. # Compile goto expression
  4232. compile_expression(a0, a1[2]);
  4233. # Printing jump to this value
  4234. fputs(compiler_output_chan(a0), "\tjmp *%eax\n");
  4235. }
  4236. ## Compile expression
  4237. # a0 - compiler
  4238. # a1 - statement
  4239. compile_expression : (a0, a1) {
  4240. # x0 - statement ID
  4241. allocate(1);
  4242. # Fetching statement ID
  4243. x0 = a1[0];
  4244. # Looking for expressions
  4245. ## Constant
  4246. if (x0 == STMT_EXPR_INTEGER) { compile_integer_expression(a0, a1); }
  4247. else if (x0 == STMT_EXPR_STRING) { compile_string_expression(a0, a1); }
  4248. ## Postfix
  4249. else if (x0 == STMT_EXPR_CALL) { compile_address_call_expression(a0, a1); }
  4250. else if (x0 == STMT_EXPR_INDEX) { compile_address_index_expression(a0, a1); }
  4251. ## Simple
  4252. else if (x0 == STMT_EXPR_IDENTIFIER) { compile_simple_identifier_expression(a0, a1); }
  4253. else if (x0 == STMT_EXPR_ARGUMENT) { compile_simple_argument_expression(a0, a1); }
  4254. else if (x0 == STMT_EXPR_VARIABLE) { compile_simple_variable_expression(a0, a1); }
  4255. else if (x0 == STMT_EXPR_SYSCALL) { compile_simple_syscall_expression(a0, a1); }
  4256. else if (x0 == STMT_EXPR_READCHAR) { compile_simple_readchar_expression(a0, a1); }
  4257. ## Prefix
  4258. else if (x0 == STMT_EXPR_PLUS) { compile_prefix_plus_expression(a0, a1); }
  4259. else if (x0 == STMT_EXPR_MINUS) { compile_prefix_minus_expression(a0, a1); }
  4260. else if (x0 == STMT_EXPR_LOGICAL_NOT) { compile_prefix_logical_not_expression(a0, a1); }
  4261. else if (x0 == STMT_EXPR_NOT) { compile_prefix_not_expression(a0, a1); }
  4262. else if (x0 == STMT_EXPR_DEREF) { compile_prefix_deref_expression(a0, a1); }
  4263. else if (x0 == STMT_EXPR_ADDROF) { compile_prefix_addrof_expression(a0, a1); }
  4264. ## Arithmetic and bitwise operations
  4265. else if (x0 >= STMT_EXPR_MUL && x0 <= STMT_EXPR_OR) {
  4266. compile_bitwise_arithmetic_expression(a0, x0, a1[2], a1[3]);
  4267. }
  4268. ## Relational
  4269. else if (x0 == STMT_EXPR_LESS) { compile_comparison_expression(a0, a1); }
  4270. else if (x0 == STMT_EXPR_GREATER) { compile_comparison_expression(a0, a1); }
  4271. else if (x0 == STMT_EXPR_LE) { compile_comparison_expression(a0, a1); }
  4272. else if (x0 == STMT_EXPR_GE) { compile_comparison_expression(a0, a1); }
  4273. ## Equality
  4274. else if (x0 == STMT_EXPR_EQ) { compile_comparison_expression(a0, a1); }
  4275. else if (x0 == STMT_EXPR_NE) { compile_comparison_expression(a0, a1); }
  4276. ## Logical operations
  4277. else if (x0 == STMT_EXPR_LOGICAL_AND) { compile_logical_expression(a0, a1, FALSE); }
  4278. else if (x0 == STMT_EXPR_LOGICAL_OR) { compile_logical_expression(a0, a1, TRUE); }
  4279. ## Ternary
  4280. else if (x0 == STMT_EXPR_TERNARY) { compile_ternary_expression(a0, a1); }
  4281. ## Assignment and {post,pre}{dec,inc}
  4282. else if (x0 >= STMT_EXPR_ASSIGN && x0 <= STMT_EXPR_POSTINC) {
  4283. compile_assignment_expression(a0, a1);
  4284. } else {
  4285. compiler_error(a0, a1, "This expression can't be compiled.");
  4286. }
  4287. }
  4288. ## Compile address call expression
  4289. # a0 - compiler
  4290. # a1 - statement
  4291. compile_address_call_expression : (a0, a1) {
  4292. # x0 - compiler output channel
  4293. # x1 - call vector
  4294. # x2 - call vector size
  4295. # x3 - argument index
  4296. # x4 - expression to call
  4297. allocate(5);
  4298. # Getting output channel
  4299. x0 = compiler_output_chan(a0);
  4300. # Fetching call vector
  4301. x1 = a1[2];
  4302. # Getting call vector size
  4303. x2 = vector_size(x1);
  4304. # Fetching expression to call
  4305. x4 = vector_get(x1, 0);
  4306. # Backing up all needed registers
  4307. compiler_allocate_register(a0, REG_EBX);
  4308. # We need to put address in %ebx, if we have address and not identifier
  4309. if (x4[NODE_TYPE] != STMT_EXPR_IDENTIFIER) {
  4310. # Compiling expression to call
  4311. compile_expression(a0, x4);
  4312. # Moving it in %ebx
  4313. fputs(x0, "\tmovl %eax, %ebx\n");
  4314. }
  4315. # Backing up the rest of registers
  4316. compiler_allocate_register(a0, REG_ECX);
  4317. compiler_allocate_register(a0, REG_EDX);
  4318. compiler_allocate_register(a0, REG_ESI);
  4319. compiler_allocate_register(a0, REG_EDI);
  4320. # Allocating enough stack for arguments
  4321. if (x2 > 1) {
  4322. fputs(x0, "\tsubl $");
  4323. fputd(x0, 4 * (x2 - 1));
  4324. fputs(x0, ", %esp\n");
  4325. }
  4326. # Iterating through arguments
  4327. x3 = 1;
  4328. while (x3 < x2) {
  4329. # Compiling argument expression
  4330. compile_expression(a0, vector_get(x1, x3));
  4331. # Moving value to stack
  4332. fputs(x0, "\tmovl %eax, ");
  4333. fputd(x0, 4 * (x3 - 1));
  4334. fputs(x0, "(%esp)\n");
  4335. # Moving to next argument
  4336. x3 = x3 + 1;
  4337. }
  4338. # Printing call instruction
  4339. if (x4[NODE_TYPE] == STMT_EXPR_IDENTIFIER) {
  4340. fputs(x0, "\tcall ");
  4341. fputs(x0, x4[2]);
  4342. fputc(x0, '\n');
  4343. } else {
  4344. # Calling it (it's in %ebx)
  4345. fputs(x0, "\tcall *%ebx\n");
  4346. }
  4347. # Restoring stack
  4348. if (x2 > 1) {
  4349. fputs(x0, "\taddl $");
  4350. fputd(x0, 4 * (x2 - 1));
  4351. fputs(x0, ", %esp\n");
  4352. }
  4353. # Restoring all backed up registers
  4354. compiler_free_register(a0, REG_EDI);
  4355. compiler_free_register(a0, REG_ESI);
  4356. compiler_free_register(a0, REG_EDX);
  4357. compiler_free_register(a0, REG_ECX);
  4358. compiler_free_register(a0, REG_EBX);
  4359. }
  4360. ## Compile address index expression
  4361. # a0 - compiler
  4362. # a1 - statement
  4363. compile_address_index_expression : (a0, a1) {
  4364. # x0 - compiler output channel
  4365. # x1 - to be called
  4366. allocate(2);
  4367. # Getting output channel
  4368. x0 = compiler_output_chan(a0);
  4369. # Compiling address
  4370. compile_address_index_address(a0, a1);
  4371. # Loading value from address
  4372. fputs(x0, "\tmovl (%eax), %eax\n");
  4373. }
  4374. ## Compile address from address index expression
  4375. # a0 - compiler
  4376. # a1 - statement
  4377. compile_address_index_address : (a0, a1) {
  4378. # x0 - compiler output channel
  4379. # x1 - first operand
  4380. # x2 - first operand type
  4381. allocate(3);
  4382. # Getting output channel
  4383. x0 = compiler_output_chan(a0);
  4384. # Fetching first operand
  4385. x1 = a1[2];
  4386. # Fetching first operand type
  4387. x2 = x1[NODE_TYPE];
  4388. # Checking if first operand is another address index
  4389. if (x2 == STMT_EXPR_INDEX) {
  4390. # Compile it
  4391. compile_address_index_address(a0, x1);
  4392. # Moving address to %edx
  4393. compiler_allocate_register(a0, REG_EDX);
  4394. fputs(x0, "\tmovl (%eax), %edx\n");
  4395. } else {
  4396. # If indexed expression is not identifier, compiling it and moving to %ebx
  4397. if (x2 != STMT_EXPR_IDENTIFIER) {
  4398. # Compiling expression
  4399. compile_expression(a0, x1);
  4400. # Moving value to %ebx
  4401. compiler_allocate_register(a0, REG_EDX);
  4402. fputs(x0, "\tmovl %eax, %edx\n");
  4403. }
  4404. }
  4405. # Compiling second operand
  4406. compile_expression(a0, a1[3]);
  4407. # Calculating address
  4408. fputs(x0, "\tleal ");
  4409. if (x2 == STMT_EXPR_IDENTIFIER) {
  4410. fputs(x0, x1[2]);
  4411. fputs(x0, "(,%eax,4), %eax\n");
  4412. } else {
  4413. fputs(x0, "(%edx,%eax,4), %eax\n");
  4414. compiler_free_register(a0, REG_EDX);
  4415. }
  4416. }
  4417. ## Compile identifier expression
  4418. # a0 - compiler
  4419. # a1 - statement
  4420. compile_simple_identifier_expression : (a0, a1) {
  4421. # x0 - compiler output channel
  4422. allocate(1);
  4423. # Getting output channel
  4424. x0 = compiler_output_chan(a0);
  4425. # Printing instruction
  4426. fputs(x0, "\tmovl ");
  4427. fputs(x0, a1[2]);
  4428. fputs(x0, ", %eax\n");
  4429. }
  4430. ## Compile argument expression
  4431. # a0 - compiler
  4432. # a1 - statement
  4433. compile_simple_argument_expression : (a0, a1) {
  4434. # x0 - compiler output channel
  4435. allocate(1);
  4436. # Getting output channel
  4437. x0 = compiler_output_chan(a0);
  4438. # Printing instruction
  4439. fputs(x0, "\tmovl ");
  4440. fputarg(x0, a1[2]);
  4441. fputs(x0, ", %eax\n");
  4442. }
  4443. ## Print argument to output channel
  4444. # a0 - output channel
  4445. # a1 - number
  4446. fputarg : (a0, a1) {
  4447. # Print offset to %ebp
  4448. fputd(a0, 4 * (a1 + 2));
  4449. fputs(a0, "(%ebp)");
  4450. }
  4451. ## Compile variable expression
  4452. # a0 - compiler
  4453. # a1 - statement
  4454. compile_simple_variable_expression : (a0, a1) {
  4455. # x0 - compiler output channel
  4456. allocate(1);
  4457. # Getting output channel
  4458. x0 = compiler_output_chan(a0);
  4459. # Printing instruction
  4460. fputs(x0, "\tmovl ");
  4461. fputvar(x0, a1[2]);
  4462. fputs(x0, ", %eax\n");
  4463. }
  4464. ## Print variable to output channel
  4465. # a0 - output channel
  4466. # a1 - number
  4467. fputvar : (a0, a1) {
  4468. # Print offset to %ebp
  4469. fputc(a0, '-');
  4470. fputd(a0, 4 * (a1 + 1));
  4471. fputs(a0, "(%ebp)");
  4472. }
  4473. ## Compile syscall expression
  4474. # a0 - compiler
  4475. # a1 - statement
  4476. compile_simple_syscall_expression : (a0, a1) {
  4477. # x0 - compiler output channel
  4478. # x1 - arguments
  4479. # x2 - argument count
  4480. # x3 - argument index
  4481. allocate(4);
  4482. # Getting output channel
  4483. x0 = compiler_output_chan(a0);
  4484. # Fetching arguments
  4485. x1 = a1[2];
  4486. # Fetching arguments count
  4487. x2 = vector_size(x1);
  4488. # Backing up all needed registers
  4489. if (x2 > 1) { compiler_allocate_register(a0, REG_EBX); }
  4490. if (x2 > 2) { compiler_allocate_register(a0, REG_ECX); }
  4491. if (x2 > 3) { compiler_allocate_register(a0, REG_EDX); }
  4492. if (x2 > 4) { compiler_allocate_register(a0, REG_ESI); }
  4493. if (x2 > 5) { compiler_allocate_register(a0, REG_EDI); }
  4494. if (x2 > 6) { fputs(x0, "\tpushl %ebp\n"); }
  4495. # Iterating through arguments
  4496. x3 = 0;
  4497. while (x3 < x2) {
  4498. # Compiling argument value
  4499. compile_expression(a0, vector_get(x1, x3));
  4500. # Moving value to needed register (when x3 != 0 != %eax)
  4501. if (x3 != 0) {
  4502. fputs(x0, "\tmovl %eax, ");
  4503. fputs(x0, registers[x3]);
  4504. fputc(x0, '\n');
  4505. }
  4506. # If we have more than one argument, we HAVE to push %eax, as it will be
  4507. # overwritten by compilation of expressions
  4508. if ((x3 == 0) * (x2 > 1)) { fputs(x0, "\tpushl %eax\n"); }
  4509. # Moving to next argument
  4510. x3 = x3 + 1;
  4511. }
  4512. # Restoring %eax, if needed
  4513. if (x2 > 1) { fputs(x0, "\tpopl %eax\n"); }
  4514. # Calling syscall
  4515. fputs(x0, "\tint $0x80\n");
  4516. # Restoring all backed up registers
  4517. if (x2 > 6) { fputs(x0, "\tpopl %ebp\n"); }
  4518. if (x2 > 5) { compiler_free_register(a0, REG_EDI); }
  4519. if (x2 > 4) { compiler_free_register(a0, REG_ESI); }
  4520. if (x2 > 3) { compiler_free_register(a0, REG_EDX); }
  4521. if (x2 > 2) { compiler_free_register(a0, REG_ECX); }
  4522. if (x2 > 1) { compiler_free_register(a0, REG_EBX); }
  4523. }
  4524. ## Compile readchar expression
  4525. # a0 - compiler
  4526. # a1 - statement
  4527. compile_simple_readchar_expression : (a0, a1) {
  4528. # x0 - compiler output channel
  4529. allocate(1);
  4530. # Getting output channel
  4531. x0 = compiler_output_chan(a0);
  4532. # Compiling base address expression
  4533. compile_expression(a0, a1[2]);
  4534. # Storing base address in %ebx and reserving it
  4535. compiler_allocate_register(a0, REG_EDX);
  4536. fputs(x0, "\tmovl %eax, %edx\n");
  4537. # Compiling index expression
  4538. compile_expression(a0, a1[3]);
  4539. # Adding index to base address
  4540. fputs(x0, "\taddl %eax, %edx\n");
  4541. # Loading byte from address and freeing %ebx
  4542. fputs(x0, "\tmovsbl (%edx), %eax\n");
  4543. compiler_free_register(a0, REG_EDX);
  4544. }
  4545. ## Compile plus prefix expression
  4546. # a0 - compiler
  4547. # a1 - statement
  4548. compile_prefix_plus_expression : (a0, a1) {
  4549. # DO NOTHING :P
  4550. }
  4551. ## Compile minus prefix expression
  4552. # a0 - compiler
  4553. # a1 - statement
  4554. compile_prefix_minus_expression : (a0, a1) {
  4555. # x0 - compiler output channel
  4556. allocate(1);
  4557. # Getting output channel
  4558. x0 = compiler_output_chan(a0);
  4559. # Compiling expression to negate
  4560. compile_expression(a0, a1[2]);
  4561. # Negating
  4562. fputs(x0, "\tnegl %eax\n");
  4563. }
  4564. ## Compile logical not expression
  4565. # a0 - compiler
  4566. # a1 - statement
  4567. compile_prefix_logical_not_expression : (a0, a1) {
  4568. # x0 - compiler output channel
  4569. allocate(1);
  4570. # Getting output channel
  4571. x0 = compiler_output_chan(a0);
  4572. # Compiling expression to negate
  4573. compile_expression(a0, a1[2]);
  4574. # Comparing with zero
  4575. fputs(x0, "\tcmpl $0, %eax\n");
  4576. # Setting equal flag
  4577. fputs(x0, "\tsete %al\n");
  4578. fputs(x0, "\tmovzbl %al, %eax\n");
  4579. }
  4580. ## Compile not expression
  4581. # a0 - compiler
  4582. # a1 - statement
  4583. compile_prefix_not_expression : (a0, a1) {
  4584. # x0 - compiler output channel
  4585. allocate(1);
  4586. # Getting output channel
  4587. x0 = compiler_output_chan(a0);
  4588. # Compiling expression to bitwise negate
  4589. compile_expression(a0, a1[2]);
  4590. # Bitwise negating
  4591. fputs(x0, "\tnotl %eax\n");
  4592. }
  4593. ## Compile dereference expression
  4594. # a0 - compiler
  4595. # a1 - statement
  4596. compile_prefix_deref_expression : (a0, a1) {
  4597. # x0 - compiler output channel
  4598. allocate(1);
  4599. # Getting output channel
  4600. x0 = compiler_output_chan(a0);
  4601. # Compiling expression to dereference
  4602. compile_expression(a0, a1[2]);
  4603. # Dereferencing
  4604. fputs(x0, "\tmovl (%eax), %eax\n");
  4605. }
  4606. ## Compile addressof expression
  4607. # a0 - compiler
  4608. # a1 - statement
  4609. compile_prefix_addrof_expression : (a0, a1) {
  4610. # x0 - compiler output channel
  4611. # x1 - expression
  4612. # x2 - expression type
  4613. allocate(3);
  4614. # Getting output channel
  4615. x0 = compiler_output_chan(a0);
  4616. # Getting expression
  4617. x1 = a1[2];
  4618. # Getting expression type
  4619. x2 = x1[NODE_TYPE];
  4620. # Compiling
  4621. if (x2 == STMT_EXPR_IDENTIFIER) { fputs(x0, "\tmovl $"); fputs(x0, x1[2]); fputs(x0, ", %eax\n"); return; }
  4622. if (x2 == STMT_EXPR_ARGUMENT) { fputs(x0, "\tleal "); fputarg(x0, x1[2]); fputs(x0, ", %eax\n"); return; }
  4623. if (x2 == STMT_EXPR_VARIABLE) { fputs(x0, "\tleal "); fputvar(x0, x1[2]); fputs(x0, ", %eax\n"); return; }
  4624. compiler_error(a0, a1, "Can't compile addressof for this expression.");
  4625. }
  4626. ## Compile bitwise arithmetic expression
  4627. # a0 - compiler
  4628. # a1 - operation type
  4629. # a2 - first operand
  4630. # a3 - second operand
  4631. compile_bitwise_arithmetic_expression : (a0, a1, a2, a3) {
  4632. # x0 - compiler output channel
  4633. allocate(1);
  4634. # Getting output channel
  4635. x0 = compiler_output_chan(a0);
  4636. # Compiling first operand
  4637. compile_expression(a0, a2);
  4638. # Reserving %ebx
  4639. compiler_allocate_register(a0, REG_EBX);
  4640. # Moving it's value
  4641. if (a1 == STMT_EXPR_SUB) {
  4642. # If SUB, DIV or MOD, we are pushing %eax to stack
  4643. fputs(x0, "\tpushl %eax\n");
  4644. } else if (a1 == STMT_EXPR_DIV || a1 == STMT_EXPR_MOD) {
  4645. # Allocating %edx for future purposes
  4646. compiler_allocate_register(a0, REG_EDX);
  4647. # If SUB, DIV or MOD, we are pushing %eax to stack
  4648. fputs(x0, "\tpushl %eax\n");
  4649. } else {
  4650. # Move value to %ebx when not SUB, DIV or MOD
  4651. fputs(x0, "\tmovl %eax, %ebx\n");
  4652. }
  4653. # Compiling second operand
  4654. compile_expression(a0, a3);
  4655. # Moving it's value
  4656. if (a1 == STMT_EXPR_SHL || a1 == STMT_EXPR_SHR) {
  4657. # If we have shift expressions, move value to %ecx
  4658. compiler_allocate_register(a0, REG_ECX);
  4659. fputs(x0, "\tmovl %eax, %ecx\n");
  4660. # Moving value to be shifted to %eax
  4661. fputs(x0, "\tmovl %ebx, %eax\n");
  4662. } else if (a1 == STMT_EXPR_SUB) {
  4663. # If we have SUB, DIV or MOD, move value to %ebx
  4664. fputs(x0, "\tmovl %eax, %ebx\n");
  4665. # And restore %eax
  4666. fputs(x0, "\tpopl %eax\n");
  4667. } else if (a1 == STMT_EXPR_DIV || a1 == STMT_EXPR_MOD) {
  4668. # If we have SUB, DIV or MOD, move value to %ebx
  4669. fputs(x0, "\tmovl %eax, %ebx\n");
  4670. # And restore %eax
  4671. fputs(x0, "\tpopl %eax\n");
  4672. # Clear %edx for division
  4673. fputs(x0, "\movl $0, %edx\n");
  4674. }
  4675. # Compiling operation
  4676. fputc(x0, '\t');
  4677. if (a1 == STMT_EXPR_MUL) { fputs(x0, "imull %ebx, %eax"); }
  4678. else if (a1 == STMT_EXPR_DIV) { fputs(x0, "divl %ebx"); }
  4679. else if (a1 == STMT_EXPR_MOD) { fputs(x0, "divl %ebx\n\tmovl %edx, %eax"); }
  4680. else if (a1 == STMT_EXPR_ADD) { fputs(x0, "addl %ebx, %eax"); }
  4681. else if (a1 == STMT_EXPR_SUB) { fputs(x0, "subl %ebx, %eax"); }
  4682. else if (a1 == STMT_EXPR_SHL) { fputs(x0, "shll %cl, %eax"); }
  4683. else if (a1 == STMT_EXPR_SHR) { fputs(x0, "shrl %cl, %eax"); }
  4684. else if (a1 == STMT_EXPR_AND) { fputs(x0, "andl %ebx, %eax"); }
  4685. else if (a1 == STMT_EXPR_XOR) { fputs(x0, "xorl %ebx, %eax"); }
  4686. else if (a1 == STMT_EXPR_OR) { fputs(x0, "orl %ebx, %eax"); }
  4687. fputc(x0, '\n');
  4688. # Freeing used registers
  4689. if (a1 == STMT_EXPR_SHL || a1 == STMT_EXPR_SHR) { compiler_free_register(a0, REG_ECX); }
  4690. else if (a1 == STMT_EXPR_DIV || a1 == STMT_EXPR_MOD) { compiler_free_register(a0, REG_EDX); }
  4691. compiler_free_register(a0, REG_EBX);
  4692. }
  4693. ## Compile comparison expression
  4694. # a0 - compiler
  4695. # a1 - statement
  4696. compile_comparison_expression : (a0, a1) {
  4697. # x0 - compiler output channel
  4698. # x1 - statement ID
  4699. allocate(2);
  4700. # Getting output channel
  4701. x0 = compiler_output_chan(a0);
  4702. # Fetching statement ID
  4703. x1 = a1[NODE_TYPE];
  4704. # TODO: make support of chained equations and comparisons.
  4705. # Compiling first operand
  4706. compile_expression(a0, a1[2]);
  4707. # Moving it's value to %edx
  4708. compiler_allocate_register(a0, REG_EDX);
  4709. fputs(x0, "\tmovl %eax, %edx\n");
  4710. # Compiling second operand
  4711. compile_expression(a0, a1[3]);
  4712. # Comparing values
  4713. fputs(x0, "\tcmpl %eax, %edx\n");
  4714. compiler_free_register(a0, REG_EDX);
  4715. # Putting (not) equal flag in %eax
  4716. fputc(x0, '\t');
  4717. if (x1 == STMT_EXPR_EQ) { fputs(x0, "sete"); }
  4718. if (x1 == STMT_EXPR_NE) { fputs(x0, "setne"); }
  4719. if (x1 == STMT_EXPR_LESS) { fputs(x0, "setl"); }
  4720. if (x1 == STMT_EXPR_GREATER) { fputs(x0, "setg"); }
  4721. if (x1 == STMT_EXPR_LE) { fputs(x0, "setle"); }
  4722. if (x1 == STMT_EXPR_GE) { fputs(x0, "setge"); }
  4723. fputs(x0, " %al\n\tmovzbl %al, %eax\n");
  4724. }
  4725. ## Compile logical expression
  4726. # a0 - compiler
  4727. # a1 - statement
  4728. # a2 - AND/OR
  4729. compile_logical_expression : (a0, a1, a2) {
  4730. # x0 - compiler output channel
  4731. # x1, x2 - labels
  4732. allocate(3);
  4733. # Getting output channel
  4734. x0 = compiler_output_chan(a0);
  4735. # Getting label IDs
  4736. x1 = compiler_new_label(a0);
  4737. x2 = compiler_new_label(a0);
  4738. # Compiling first operand
  4739. compile_logical_expression_operand(a0, a1[2], a2, x1);
  4740. # Compiling second operand
  4741. compile_logical_expression_operand(a0, a1[3], a2, x1);
  4742. ## AND -> if we are here, we need to put 1 in %eax
  4743. ## OR -> if we are here, we need to put 0 in %eax
  4744. if (a2) { fputs(x0, "\tmovl $0, %eax\n"); } else { fputs(x0, "\tmovl $1, %eax\n"); }
  4745. # Jump to second (exit) label
  4746. fputs(x0, "\tjmp ");
  4747. fputlabel(x0, x2);
  4748. fputc(x0, '\n');
  4749. # Printing first label definition
  4750. fputlabeldef(x0, x1);
  4751. ## AND -> if we are here, we need to put 0 in %eax
  4752. ## OR -> if we are here, we need to put 1 in %eax
  4753. if (a2) { fputs(x0, "\tmovl $1, %eax\n"); } else { fputs(x0, "\tmovl $0, %eax\n"); }
  4754. # Printing second label definition
  4755. fputlabeldef(x0, x2);
  4756. }
  4757. ## Compile logical expression operand
  4758. # a0 - compiler
  4759. # a1 - expression
  4760. # a2 - AND/OR
  4761. # a3 - first label
  4762. compile_logical_expression_operand : (a0, a1, a2, a3) {
  4763. # x0 - compiler output channel
  4764. allocate(1);
  4765. # Getting output channel
  4766. x0 = compiler_output_chan(a0);
  4767. # If operand is another logical expression, try to compile it there too.
  4768. if (a1[NODE_TYPE] - STMT_EXPR_LOGICAL_AND == a2) {
  4769. # Compiling first operand of logical expression
  4770. compile_logical_expression_operand(a0, a1[2], a2, a3);
  4771. # Compiling second operand of logical expression
  4772. compile_logical_expression_operand(a0, a1[3], a2, a3);
  4773. return;
  4774. }
  4775. # We have non-logical expression. Compiling it by default way.
  4776. compile_expression(a0, a1);
  4777. # Comparing it with zero
  4778. fputs(x0, "\tcmpl $0, %eax\n");
  4779. ## AND -> jump to first label if zero
  4780. ## OR -> jump to first label if not zero
  4781. if (a2) { fputs(x0, "\tjne "); } else { fputs(x0, "\tje "); }
  4782. fputlabel(x0, a3);
  4783. fputc(x0, '\n');
  4784. }
  4785. ## Compile ternary expression
  4786. # a0 - compiler
  4787. # a1 - statement
  4788. compile_ternary_expression : (a0, a1) {
  4789. # x0 - compiler output channel
  4790. # x1, x2 - labels
  4791. allocate(3);
  4792. # Getting output channel
  4793. x0 = compiler_output_chan(a0);
  4794. # Getting label IDs
  4795. x1 = compiler_new_label(a0);
  4796. x2 = compiler_new_label(a0);
  4797. # Compiling condition
  4798. compile_expression(a0, a1[2]);
  4799. # Comparing condition with zero
  4800. fputs(x0, "\tcmpl $0, %eax\n");
  4801. # If equal to zero, jump to first label (false value)
  4802. fputs(x0, "\tje ");
  4803. fputlabel(x0, x1);
  4804. fputc(x0, '\n');
  4805. # Compiling true value
  4806. compile_expression(a0, a1[3]);
  4807. # Exit, if true value was set
  4808. fputs(x0, "\tjmp ");
  4809. fputlabel(x0, x2);
  4810. fputc(x0, '\n');
  4811. # Printing label for false value
  4812. fputlabeldef(x0, x1);
  4813. # Compiling false value
  4814. compile_expression(a0, a1[4]);
  4815. # Printing exit label
  4816. fputlabeldef(x0, x2);
  4817. }
  4818. ## Compile assignment expression
  4819. # a0 - compiler
  4820. # a1 - statement
  4821. # a2 - type
  4822. compile_assignment_expression : (a0, a1) {
  4823. # x0 - compiler output channel
  4824. # x1 - statement ID
  4825. # x2 - first operand
  4826. # x3 - first operand's ID
  4827. allocate(4);
  4828. # Getting compiler output channel
  4829. x0 = compiler_output_chan(a0);
  4830. # Fetching assignment type
  4831. x1 = a1[NODE_TYPE];
  4832. # Fetching first operand
  4833. x2 = a1[2];
  4834. # Fetching its ID
  4835. x3 = x2[NODE_TYPE];
  4836. # If it is indexed address, compiling it, and moving to %ebx
  4837. if (x3 == STMT_EXPR_INDEX) {
  4838. compile_address_index_address(a0, x2);
  4839. compiler_allocate_register(a0, REG_EBX);
  4840. fputs(x0, "\tmovl %eax, %ebx\n");
  4841. } else if (x3 == STMT_EXPR_DEREF) {
  4842. compile_expression(a0, x2[2]);
  4843. compiler_allocate_register(a0, REG_EBX);
  4844. fputs(x0, "\tmovl %eax, %ebx\n");
  4845. }
  4846. # Compiling expression
  4847. if (x1 == STMT_EXPR_ASSIGN) {
  4848. # If we have simple assignment, compile expression
  4849. compile_expression(a0, a1[3]);
  4850. } else if (x1 >= STMT_EXPR_PREDEC && x1 <= STMT_EXPR_POSTINC) {
  4851. # Compile expression to increment/decrement
  4852. compile_expression(a0, x2);
  4853. if (x1 == STMT_EXPR_PREDEC) { fputs(x0, "\tsubl $1, %eax\n"); }
  4854. else if (x1 == STMT_EXPR_PREINC) { fputs(x0, "\taddl $1, %eax\n"); }
  4855. else {
  4856. # Reserving %edx for our purposes
  4857. compiler_allocate_register(a0, REG_EDX);
  4858. # Storing new value in %edx
  4859. fputs(x0, "\tleal ");
  4860. if (x1 == STMT_EXPR_POSTDEC) { fputc(x0, '-'); }
  4861. fputs(x0, "1(%eax), %edx\n");
  4862. }
  4863. } else {
  4864. # If we have complex assignment, calculate value to set
  4865. compile_bitwise_arithmetic_expression(
  4866. a0, x1 - STMT_EXPR_MUL_ASSIGN + STMT_EXPR_MUL, x2, a1[3]
  4867. );
  4868. }
  4869. # Compiling assignment
  4870. fputs(x0, "\tmovl ");
  4871. # If we had post{dec,inc} we need to load value from %edx
  4872. if (x1 == STMT_EXPR_POSTDEC || x1 == STMT_EXPR_POSTINC) {
  4873. fputs(x0, "%edx");
  4874. } else {
  4875. fputs(x0, "%eax");
  4876. }
  4877. # Compiling where to move
  4878. fputs(x0, ", ");
  4879. if (x3 == STMT_EXPR_IDENTIFIER) { fputs(x0, x2[2]); }
  4880. else if (x3 == STMT_EXPR_ARGUMENT) { fputarg(x0, x2[2]); }
  4881. else if (x3 == STMT_EXPR_VARIABLE) { fputvar(x0, x2[2]); }
  4882. else if (x3 == STMT_EXPR_INDEX ||
  4883. x3 == STMT_EXPR_DEREF) { fputs(x0, "(%ebx)"); }
  4884. else { compiler_error(a0, a1, "Can't use this as assignable operand."); }
  4885. fputc(x0, '\n');
  4886. # Freeing used registers
  4887. if (x1 == STMT_EXPR_POSTDEC || x1 == STMT_EXPR_POSTINC) { compiler_free_register(a0, REG_EDX); }
  4888. if (x3 == STMT_EXPR_INDEX || x3 == STMT_EXPR_DEREF) { compiler_free_register(a0, REG_EBX); }
  4889. }
  4890. ## Compile integer expression
  4891. # a0 - compiler
  4892. # a1 - statement
  4893. compile_integer_expression : (a0, a1) {
  4894. # x0 - compiler output channel
  4895. allocate(1);
  4896. # Getting compiler output channel
  4897. x0 = compiler_output_chan(a0);
  4898. # Just setting value to %eax
  4899. fputs(x0, "\tmovl $");
  4900. fputd(x0, a1[2]);
  4901. fputs(x0, ", %eax\n");
  4902. }
  4903. ## Compile string expression
  4904. # a0 - compiler
  4905. # a1 - statement
  4906. compile_string_expression : (a0, a1) {
  4907. # x0 - compiler output channel
  4908. allocate(1);
  4909. # Getting compiler output channel
  4910. x0 = compiler_output_chan(a0);
  4911. # Just setting value to %eax
  4912. fputs(x0, "\tmovl $strbuf+");
  4913. fputd(x0, a1[2]);
  4914. fputs(x0, ", %eax\n");
  4915. }
  4916. ## Compile simple value
  4917. # a0 - compiler
  4918. # a1 - statement
  4919. compile_simple_value : (a0, a1) {
  4920. # x0 - compiler output channel
  4921. # x1 - statement ID
  4922. allocate(2);
  4923. # Fetching compiler output channel
  4924. x0 = compiler_output_chan(a0);
  4925. # Fetching statement ID
  4926. x1 = a1[NODE_TYPE];
  4927. # Checking if we can compile this statement
  4928. # TODO: check if statement can be precalculated, e.g. `2 + 2`
  4929. if (x1 == STMT_EXPR_INTEGER) {
  4930. # Writing integer
  4931. fputd(x0, a1[2]);
  4932. # Returning
  4933. return;
  4934. }
  4935. if (x1 == STMT_EXPR_STRING) {
  4936. # Writing string buffer address
  4937. fputs(x0, "strbuf+");
  4938. fputd(x0, a1[2]);
  4939. # Returning
  4940. return;
  4941. }
  4942. if (x1 == STMT_EXPR_IDENTIFIER) {
  4943. # Writing identifier
  4944. fputs(x0, a1[2]);
  4945. # Returning
  4946. return;
  4947. }
  4948. compiler_error(a0, a1, "Expression can't be compiled as simple.");
  4949. }
  4950. ## Print usage
  4951. # a0 - program name
  4952. usage : (a0) {
  4953. eputs("usage: ");
  4954. eputs(a0);
  4955. eputs(" [option]... <file>\n");
  4956. }
  4957. ## Main function
  4958. # a0 - argc
  4959. # a1 - argv
  4960. # @return exit code
  4961. main : (a0, a1) {
  4962. # x0 - input channel
  4963. # x1 - output channel
  4964. # x2 - lexer
  4965. # x3 - string buffer
  4966. # x4 - parser
  4967. # x5 - AST
  4968. # x6 - compiler
  4969. allocate(7);
  4970. # If no additional args - printing usage
  4971. if (a0 < 2) { usage(a1[0]); return(1); }
  4972. # TODO: implement option parser
  4973. # Creating input channel
  4974. x0 = input_chan_file(a1[1]);
  4975. # Creating output channel
  4976. x1 = output_chan_file("output.S");
  4977. # Creating lexer
  4978. x2 = lexer(x0, a1[1]);
  4979. # Creating string buffer
  4980. x3 = string_buffer();
  4981. # Creating parser
  4982. x4 = parser(x2, x3);
  4983. # Parsing code to AST
  4984. x5 = parser_parse(x4);
  4985. # Creating compiler
  4986. x6 = compiler(x1, x3);
  4987. # Compiling AST
  4988. compiler_compile(x6, x5);
  4989. return(0);
  4990. }
  4991. # Entry point of the entire program
  4992. global _start;
  4993. _start : () {
  4994. # Initializing I/O at the start
  4995. initialize_io();
  4996. # argc is located in 4(%esp), argv is located in 8(%esp)
  4997. exit(main(*(&a0 - 4), &a0));
  4998. }