123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250 |
- ### Useful constants
- NULL => 0;
- EOF => -1;
- ## Booleans
- FALSE => 0;
- TRUE => 1;
- ### System calls
- SYS_exit => 1;
- SYS_read => 3;
- SYS_write => 4;
- SYS_open => 5;
- SYS_close => 6;
- SYS_brk => 45;
- SYS_munmap => 91;
- SYS_getcwd => 183;
- SYS_mmap2 => 192;
- ### System call functions
- ## Exit with code
- # a0 - exit code
- exit : (a0) { syscall(SYS_exit, a0); }
- ## Read from file
- # a0 - file
- # a1 - buffer
- # a2 - count
- # @return read count
- read : (a0, a1, a2) { return(syscall(SYS_read, a0, a1, a2)); }
- ## Write to file
- # a0 - file
- # a1 - buffer
- # a2 - count
- # @return write count
- write : (a0, a1, a2) { return(syscall(SYS_write, a0, a1, a2)); }
- ## Open file
- # a0 - path
- # a1 - flags
- # a2 - mode
- # @return fd or error
- open : (a0, a1, a2) { return(syscall(SYS_open, a0, a1, a2)); }
- ## Close file
- # a0 - fd
- # @return result code
- close : (a0) { return(syscall(SYS_close, a0)); }
- ## Set/return heap segment limit
- # a0 - address (set) or NULL (return)
- # @return current heap limit
- brk : (a0) { return(syscall(SYS_brk, a0)); }
- ## Unmap memory page
- # a0 - address
- # a1 - size
- # @return zero on success, negative on error
- munmap : (a0, a1) { return(syscall(SYS_munmap, a0, a1)); }
- ## Map memory page
- # a0 - address
- # a1 - size
- # a2 - protection flags
- # a3 - map flags
- # a4 - fd
- # a5 - offset
- mmap2 : (a0, a1, a2, a3, a4, a5) {
- return(syscall(SYS_mmap2, a0, a1, a2, a3, a4, a5));
- }
- ## Get current work directory
- # a0 - buffer
- # a1 - buffer size
- getcwd : (a0, a1) { return(syscall(SYS_getcwd, a0, a1)); }
- ### Useful functions
- ## Align number up to another number (which is power of 2)
- # a0 - number to be aligned
- # a1 - number align to
- # @return aligned number
- align : (a0, a1) {
- return((a0 + a1 - 1) & ~(a1 - 1));
- }
- ## Align number down to another number (which is power of 2)
- # a0 - number to be aligned
- # a1 - number align to
- # @return aligned number
- align_down : (a0, a1) {
- return(a0 - (a0 & (a1 - 1)));
- }
- ### Conversions
- ## Unsigned int to string
- # a0 - value
- # a1 - result buffer
- # a2 - base
- utoa_digits : "0123456789abcdefghijklmnopqrstuvwxyz";
- utoa : (a0, a1, a2) {
- # x0 - i
- # x1 - j
- # x2 - tmp
- allocate(3);
- x0 = 0;
- x1 = 0;
- # Writing first digit
- writechar(a1, x0, readchar(utoa_digits, a0 % a2));
- # Going to next unit
- a0 = a0 / a2;
- # Going to next char position
- x0 = x0 + 1;
- # Iterating while value is not zero
- while (a0 != 0) {
- # Writing needed char to result buffer
- writechar(a1, x0, readchar(utoa_digits, a0 % a2));
- # Going to next unit
- a0 = a0 / a2;
- # Going to next char position
- x0 = x0 + 1;
- }
- # Writing '\0' to the end of string
- writechar(a1, x0, '\0');
- # Reversing string
- while (x1 < x0) {
- # Starting from penultimate char
- x0 = x0 - 1;
- # Storing char to temp
- x2 = readchar(a1, x1);
- # Moving a1[x0] to a1[x1]
- writechar(a1, x1, readchar(a1, x0));
- # Moving temp to a1[x0]
- writechar(a1, x0, x2);
- # Reversing next chars
- x1 = x1 + 1;
- }
- # Returning result buffer
- return(a1);
- }
- ### Memory Management
- ## Memory page size
- PAGE_SIZE => 4096; # 0x1000
- ## Heap segment start and end
- free_start : NULL;
- free_end : NULL;
- ## Allocate new memory page
- allocate_new_page : () {
- # If free_end is NULL, finding brk(NULL)
- if (free_end == NULL) { free_end = brk(NULL); free_start = free_end; }
- # Moving free memory segment pointers
- free_end = brk(free_end + PAGE_SIZE);
- }
- ## Allocate memory
- # a0 - size
- # @return allocated memory
- malloc : (a0) {
- # x0 - address
- allocate(1);
- # Checking if we need to allocate more memory
- while (free_start + a0 > free_end) { allocate_new_page(); }
- # Setting address
- x0 = free_start;
- # Moving free segment pointer
- free_start = x0 + a0;
- # Returning memory address
- return(x0);
- }
- ## Free memory
- # a0 - address
- # NOTE: in this implementation of memory allocator it is impossible
- free : (a0) {}
- ## Allocate cleared memory
- # a0 - size
- # @return allocated and cleared memory
- calloc : (a0) {
- # x0 - address
- allocate(1);
- # Allocating memory
- x0 = malloc(a0);
- # Clearing memory with zeroes
- memset(0, x0, a0);
- return(x0);
- }
- ## Reallocate memory with new size
- # a0 - allocated memory
- # a1 - old size
- # a2 - new size
- # @return new allocated memory
- realloc : (a0, a1, a2) {
- # x0 - old size/min size
- # x1 - new allocated memory space
- allocate(2);
- # Finding out the amount of memory to be copied
- x0 = a1;
- if (a2 < a1) { x0 = a2; }
- # Allocating new memory
- x1 = calloc(a2);
- # Copying data to new memory space
- memcpy(a0, x1, x0);
- # Freeing old memory space
- free(a0);
- return(x1);
- }
- ### Memory/String operations
- ## Fill memory block with value (char)
- # a0 - value
- # a1 - address
- # a2 - size
- memset : (a0, a1, a2) {
- # x0 - pos
- allocate(1);
- x0 = 0;
- # Writing value to memory block
- while (x0 < a2) {
- # Writing byte
- writechar(a1, x0, a0);
- # Moving to next byte
- x0 = x0 + 1;
- }
- }
- ## Copy data from one memory block to another
- # a0 - src
- # a1 - dest
- # a2 - size
- memcpy : (a0, a1, a2) {
- # x0 - pos
- allocate(1);
- x0 = 0;
- while (x0 < a2) {
- # Writing byte
- writechar(a1, x0, readchar(a0, x0));
- # Moving to next byte
- x0 = x0 + 1;
- }
- }
- ## Get length of string
- # a0 - string
- # @return string length
- strlen : (a0) {
- # x0 - string length
- allocate(1);
- x0 = 0;
- # While char is not NULL, incrementing length
- while (readchar(a0, x0) != '\0') { x0 = x0 + 1; }
- return(x0);
- }
- ## Get length of string counting escaped characters as one
- # a0 -string
- # @return string length
- stresclen : (a0) {
- # x0 - string length
- allocate(1);
- x0 = 0;
- # While char is not NULL, incrementing length
- while (readchar(a0, 0) != '\0') {
- if (readchar(a0, 0) == '\\') { a0 = a0 + 1; }
- a0 = a0 + 1;
- x0 = x0 + 1;
- }
- return(x0);
- }
- ## Copy string
- # a0 - src
- # a1 - dest
- # @return char count
- strcpy : (a0, a1) {
- # x0 - pos
- # x1 - char
- allocate(2);
- x0 = 0;
- # Reading char from string
- x1 = readchar(a0, x0);
- # Checking if char is not NULL
- while (x1 != '\0') {
- # Copying char from one string to another
- writechar(a1, x0, x1);
- # Moving to next char
- x0 = x0 + 1;
- # Reading next char
- x1 = readchar(a0, x0);
- }
- # Writing NULL character
- writechar(a1, x0, '\0');
- # Returning number of copied characters
- return(x0);
- }
- ## Compare string
- # a0 - first string
- # a1 - second string
- # @return difference between different chars (0 if strings are equal)
- strcmp : (a0, a1) {
- # x0 - pos
- # x1 - char from first string
- # x2 - char from second string
- allocate(3);
- x0 = 0;
- # Reading first chars from strings
- x1 = readchar(a0, x0);
- x2 = readchar(a1, x0);
- # Comparing char by char
- while (x1 != '\0') {
- if (x1 != x2) { return(x1 - x2); }
- # Moving to next char
- x0 = x0 + 1;
- # Reading next char
- x1 = readchar(a0, x0);
- x2 = readchar(a1, x0);
- }
- # As first string ended, finding the difference between ending chars
- return(x1 - x2);
- }
- ## Duplicate string
- # a0 - string
- # @return new string
- strdup : (a0) {
- # x0 - string length
- # x1 - new string
- allocate(2);
- # Allocating new string
- x0 = strlen(a0);
- x1 = calloc(x0 + 1);
- # Copying data to new string
- memcpy(a0, x1, x0);
- # Return new string
- return(x1);
- }
- ## Duplicate string literal (without double quotes)
- # a0 - string
- # @return new string
- strlitdup : (a0) {
- # x0 - string length
- # x1 - new string
- allocate(2);
- # Calculating length
- x0 = strlen(a0) - 2;
- # Allocating string
- x1 = calloc(x0 + 1);
- # Copying data to new string
- memcpy(a0 + 1, x1, x0);
- # Return new string
- return(x1);
- }
- ### Tuples
- ## Allocate tuple
- # a0 - size
- # @return allocated tuple
- allocate_tuple : (a0) { return(malloc(4 * a0)); }
- ## Make tuple with 1 value
- # a0 - value
- # @return tuple
- tuple1 : (a0) {
- # x0 - tuple
- allocate(1);
- x0 = allocate_tuple(1);
- # Fill tuple
- x0[0] = a0;
- return(x0);
- }
- ## Make tuple with 2 values
- # a0, a1 - values
- # @return tuple
- tuple2 : (a0, a1) {
- # x0 - tuple
- allocate(1);
- x0 = allocate_tuple(2);
- # Fill tuple
- x0[0] = a0;
- x0[1] = a1;
- return(x0);
- }
- ## Make tuple with 3 values
- # a0, a1, a2 - values
- # @return tuple
- tuple3 : (a0, a1, a2) {
- # x0 - tuple
- allocate(1);
- x0 = allocate_tuple(3);
- # Fill tuple
- x0[0] = a0;
- x0[1] = a1;
- x0[2] = a2;
- return(x0);
- }
- ## Make tuple with 4 values
- # a0, a1, a2, a3 - values
- # @return tuple
- tuple4 : (a0, a1, a2, a3) {
- # x0 - tuple
- allocate(1);
- x0 = allocate_tuple(4);
- # Fill tuple
- x0[0] = a0;
- x0[1] = a1;
- x0[2] = a2;
- x0[3] = a3;
- return(x0);
- }
- ## Make tuple with 5 values
- # a0, a1, a2, a3, a4 - values
- # @return tuple
- tuple5 : (a0, a1, a2, a3, a4) {
- # x0 - tuple
- allocate(1);
- x0 = allocate_tuple(5);
- # Fill tuple
- x0[0] = a0;
- x0[1] = a1;
- x0[2] = a2;
- x0[3] = a3;
- x0[4] = a4;
- return(x0);
- }
- ## Make tuple with 6 values
- # a0, a1, a2, a3, a4, a5 - values
- # @return tuple
- tuple6 : (a0, a1, a2, a3, a4, a5) {
- # x0 - tuple
- allocate(1);
- x0 = allocate_tuple(6);
- # Fill tuple
- x0[0] = a0;
- x0[1] = a1;
- x0[2] = a2;
- x0[3] = a3;
- x0[4] = a4;
- x0[5] = a5;
- return(x0);
- }
- ## Make tuple with 7 values
- # a0, a1, a2, a3, a4, a5, a6 - values
- # @return tuple
- tuple7 : (a0, a1, a2, a3, a4, a5, a6) {
- # x0 - tuple
- allocate(1);
- x0 = allocate_tuple(7);
- # Fill tuple
- x0[0] = a0;
- x0[1] = a1;
- x0[2] = a2;
- x0[3] = a3;
- x0[4] = a4;
- x0[5] = a5;
- x0[6] = a6;
- return(x0);
- }
- ## Make tuple with 8 values
- # a0, a1, a2, a3, a4, a5, a6, a7 - values
- # @return tuple
- tuple8 : (a0, a1, a2, a3, a4, a5, a6, a7) {
- # x0 - tuple
- allocate(1);
- x0 = allocate_tuple(8);
- # Fill tuple
- x0[0] = a0;
- x0[1] = a1;
- x0[2] = a2;
- x0[3] = a3;
- x0[4] = a4;
- x0[5] = a5;
- x0[6] = a6;
- x0[7] = a7;
- return(x0);
- }
- ### Linked List
- ## List parts
- LIST_NEXT => 0;
- LIST_VALUE => 1;
- ## Create linked list with value
- # a0 - value
- # @return list
- list : (a0) { return(list_insert(NULL, a0)); }
- ## Insert value in linked list
- # a0 - list
- # a1 - value
- # @return list with new value
- list_insert : (a0, a1) { return(tuple2(a0, a1)); }
- ## Get next element of linked list
- # a0 - list
- # @return list with next value
- list_next : (a0) { return(a0[LIST_NEXT]); }
- ## Get value of current element of linked list
- # a0 - list
- # @return current element value
- list_value : (a0) { return(a0[LIST_VALUE]); }
- ## Remove this element from list
- # a0 - list
- # @return reduced list
- list_pop : (a0) {
- # x0 - list continuation
- allocate(1);
- # Fetching next element
- x0 = list_next(a0);
- # Deallocating current element
- free(a0);
- return(x0);
- }
- ### Vector
- ## Vector parts
- VEC_SIZE => 0;
- VEC_CAP => 1;
- VEC_BUF => 2;
- ## Create character vector
- # a0 - size
- # a1 - capacity
- # @return character vector
- char_vector : (a0, a1) { return(tuple3(a0, a1, calloc(a1))); }
- ## Create vector
- # a0 - size
- # a1 - capacity
- # @return vector
- vector : (a0, a1) { return(tuple3(a0, a1, calloc(4 * a1))); }
- ## Get vector buffer
- # a0 - vector
- # @return vector buffer
- vector_buffer : (a0) { return(a0[VEC_BUF]); }
- ## Get vector size
- # a0 - vector
- # @return vector size
- vector_size : (a0) { return(a0[VEC_SIZE]); }
- ## Reserve more space for char vector buffer
- # a0 - char vector
- # a1 - new capacity
- char_vector_reserve : (a0, a1) {
- # Reserving, if new capacity is bigger
- if (a1 > a0[VEC_CAP]) {
- # Reallocating buffer
- a0[VEC_BUF] = realloc(a0[VEC_BUF], a0[VEC_CAP], a1);
- # Setting new capacity
- a0[VEC_CAP] = a1;
- }
- }
- ## Reserve more space for vector buffer
- # a0 - vector
- # a1 - new capacity
- vector_reserve : (a0, a1) {
- # Reserving, if new capacity is bigger
- if (a1 > a0[VEC_CAP]) {
- # Reallocating buffer
- a0[VEC_BUF] = realloc(a0[VEC_BUF], 4 * a0[VEC_CAP], 4 * a1);
- # Setting new capacity
- a0[VEC_CAP] = a1;
- }
- }
- ## Resize char vector
- # a0 - char vector
- # a1 - new size
- char_vector_resize : (a0, a1) {
- # Reserving, if required
- if (a1 > a0[VEC_SIZE]) { char_vector_reserve(a0, a1); }
- # Setting new size
- a0[VEC_SIZE] = a1;
- }
- ## Resize vector
- # a0 - vector
- # a1 - new size
- vector_resize : (a0, a1) {
- # Reserving, if required
- if (a1 > a0[VEC_SIZE]) { vector_reserve(a0, a1); }
- # Setting new size
- a0[VEC_SIZE] = a1;
- }
- ## Get value at index in char vector
- # a0 - char vector
- # a1 - index
- # @return char
- char_vector_get : (a0, a1) { return(readchar(a0[VEC_BUF], a1)); }
- ## Get value at index in vector
- # a0 - vector
- # a1 - index
- # @return value
- vector_get : (a0, a1) { return(a0[VEC_BUF][a1]); }
- ## Set value at index in char vector
- # a0 - char vector
- # a1 - index
- # a2 - char
- char_vector_set : (a0, a1, a2) { writechar(a0[VEC_BUF], a1, a2); }
- ## Set value at index in vector
- # a0 - vector
- # a1 - index
- # a2 - value
- vector_set : (a0, a1, a2) {
- # x0 - buffer
- allocate(1);
- x0 = a0[VEC_BUF];
- x0[a1] = a2;
- }
- ## Insert value in the end of char vector
- # a0 - char vector
- # a1 - value
- char_vector_push : (a0, a1) {
- # If vector buffer is full, reserving more
- if (a0[VEC_SIZE] == a0[VEC_CAP]) {
- char_vector_reserve(a0, a0[VEC_CAP] * 2);
- }
- # Putting new value
- char_vector_set(a0, a0[VEC_SIZE], a1);
- # Incrementing size
- a0[VEC_SIZE] = a0[VEC_SIZE] + 1;
- }
- ## Insert value in the end of vector
- # a0 - vector
- # a1 - value
- vector_push : (a0, a1) {
- # If vector buffer is full, reserving more
- if (a0[VEC_SIZE] == a0[VEC_CAP]) {
- vector_reserve(a0, a0[VEC_CAP] * 2);
- }
- # Putting new value
- vector_set(a0, a0[VEC_SIZE], a1);
- # Incrementing size
- a0[VEC_SIZE] = a0[VEC_SIZE] + 1;
- }
- ## Remove and return last value from char vector
- # a0 - char vector
- # @return char
- char_vector_pop : (a0) {
- # Decrementing size
- a0[VEC_SIZE] = a0[VEC_SIZE] - 1;
- # Returning the value
- return(char_vector_get(a0, a0[VEC_SIZE]));
- }
- ## Remove and return last value from vector
- # a0 - vector
- # @return value
- vector_pop : (a0) {
- # Decrementing size
- a0[VEC_SIZE] = a0[VEC_SIZE] - 1;
- # Returning the value
- return(vector_get(a0, a0[VEC_SIZE]));
- }
- ## Destroy vector
- # a0 - vector
- vector_destroy : (a0) {
- # Deallocating buffer
- free(a0[VEC_BUF]);
- # Deallocating vector
- free(a0);
- }
- ### I/O
- ## Open flags
- O_RDONLY => 0;
- O_WRONLY => 1;
- O_RDWR => 2;
- O_CREAT => 64;
- O_TRUNC => 512;
- O_APPEND => 1024;
- ## Parts of I/O channels
- CHAN_FD => 0;
- CHAN_BUF => 1;
- CHAN_IDX => 2;
- ICHAN_END => 3;
- ICHAN_EOF => 4;
- ## Buffer sizes
- IBUFFER_SIZE => 512;
- OBUFFER_SIZE => 512;
- ## Standard channels
- stdin : NULL;
- stdout : NULL;
- stderr : NULL;
- ## Create new input channel
- # a0 - fd
- # @return channel
- input_chan : (a0) {
- # x0 - buffer/channel
- allocate(1);
- # Allocating buffer
- x0 = malloc(IBUFFER_SIZE);
- # Creating channel
- x0 = tuple5(a0, x0, 0, 0, FALSE);
- return(x0);
- }
- ## Create new input channel based on file
- # a0 - file name
- # @return channel
- input_chan_file : (a0) {
- # x0 - fd/channel
- allocate(1);
- # Opening file with read permission
- x0 = open(a0, O_RDONLY, 0);
- # Checking for success
- assert(x0 > 0, "Couldn't open file input channel");
- # Returning new channel
- return(input_chan(x0));
- }
- ## Close input channel
- # a0 - channel
- input_chan_close : (a0) {
- # Closing fd
- close(a0[CHAN_FD]);
- # Deallocating buffer
- free(a0[CHAN_BUF]);
- # Deallocating channel
- free(a0);
- }
- ## Create new output channel
- # a0 - fd
- # @return channel
- output_chan : (a0) {
- # x0 - buffer/channel
- allocate(1);
- # Allocating buffer
- x0 = malloc(OBUFFER_SIZE);
- # Creating channel
- x0 = tuple3(a0, x0, 0);
- return(x0);
- }
- ## Create new output channel based on file
- # a0 - file name
- # @return channel
- output_chan_file : (a0) {
- # x0 - fd/channel
- allocate(1);
- # Opening file with read permission
- x0 = open(a0, O_WRONLY|O_CREAT|O_TRUNC, 420); # 420 = 0644
- # Checking for success
- assert(x0 > 0, "Couldn't open file output channel");
- # Returning new channel
- return(output_chan(x0));
- }
- ## Close output channel
- # a0 - channel
- output_chan_close : (a0) {
- # Flushing buffer
- flush(a0);
- # Closing fd
- close(a0[CHAN_FD]);
- # Deallocating buffer
- free(a0[CHAN_BUF]);
- # Deallocating channel
- free(a0);
- }
- ## Initialize standard I/O
- initialize_io : () {
- stdin = input_chan(0); # stdin=0
- stdout = output_chan(1); # stdout=1
- stderr = output_chan(2); # stderr=2
- }
- ## Fill input channel
- # a0 - channel
- fill : (a0) {
- # x0 - read count
- allocate(1);
- # Checking if file is ended
- if (a0[ICHAN_EOF]) { return; }
- # Checking if buffer is exhausted
- if (a0[CHAN_IDX] == a0[ICHAN_END]) {
- # Input buffer is empty, refilling
- x0 = read(a0[CHAN_FD], a0[CHAN_BUF], IBUFFER_SIZE);
- # Resetting index and setting size
- a0[CHAN_IDX] = 0;
- a0[ICHAN_END] = x0;
- # Setting EOF flag, if file is ended
- if (a0[ICHAN_END] == 0) { a0[ICHAN_EOF] = TRUE; }
- }
- }
- ## Read char
- # a0 - channel
- # @return char
- fgetc : (a0) {
- # x0 - char
- allocate(1);
- # Filling buffer, if needed
- fill(a0);
- # Checking if input is ended
- if (a0[CHAN_IDX] == a0[ICHAN_END]) { return(EOF); }
- # Fetching char
- x0 = readchar(a0[CHAN_BUF], a0[CHAN_IDX]);
- # Incrementing index and returning char
- a0[CHAN_IDX] = a0[CHAN_IDX] + 1;
- return(x0);
- }
- ## Read char from stdin
- # @return char
- getc : () { return(fgetc(stdin)); }
- ## Look ahead next char
- # a0 - channel
- # a1 - offset
- # @return char
- fnextc : (a0, a1) {
- # Filling buffer, if needed
- fill(a0);
- # Checking if input is ended
- if (a0[CHAN_IDX] + a1 >= a0[ICHAN_END]) { return(EOF); }
- # Fetching char
- return(readchar(a0[CHAN_BUF], a0[CHAN_IDX] + a1));
- }
- ## Look ahead next char from stdin
- # a0 - offset
- # @return char
- nextc : (a0) { return(fnextc(stdin, a0)); }
- ## Flush output channel
- # a0 - channel
- flush : (a0) {
- # If buffer is not empty
- if (a0[CHAN_IDX] > 0) {
- write(a0[CHAN_FD], a0[CHAN_BUF], a0[CHAN_IDX]);
- a0[CHAN_IDX] = 0;
- }
- }
- ## Put character
- # a0 - channel
- # a1 - char
- fputc : (a0, a1) {
- # Writing to buffer
- writechar(a0[CHAN_BUF], a0[CHAN_IDX], a1);
- # Incrementing index
- a0[CHAN_IDX] = a0[CHAN_IDX] + 1;
- # Checking if need to flush
- if (a0[CHAN_IDX] == OBUFFER_SIZE) { flush(a0); return; }
- if (a1 == '\n') { flush(a0); return; }
- }
- ## Put character in stdout
- # a0 - char
- putc : (a0) { fputc(stdout, a0); }
- ## Put character in stderr
- # a0 - char
- eputc : (a0) { fputc(stderr, a0); }
- ## Put string
- # a0 - channel
- # a1 - string
- fputs : (a0, a1) {
- # x0 - char
- allocate(1);
- # Fetching first char
- x0 = readchar(a1, 0);
- # Iterating, until we'll find '\0'
- while (x0 != '\0') {
- # Putting character
- fputc(a0, x0);
- # Reading next character
- a1 = a1 + 1;
- x0 = readchar(a1, 0);
- }
- }
- ## Put string in stdout
- # a0 - char
- puts : (a0) { fputs(stdout, a0); }
- ## Put string in stderr
- # a0 - char
- eputs : (a0) { fputs(stderr, a0); }
- ## Put number
- # a0 - channel
- # a1 - number
- # a2 - base
- fputn_buffer : NULL;
- fputn : (a0, a1, a2) {
- # Allocating number buffer, if needed
- if (fputn_buffer == NULL) { fputn_buffer = malloc(33); }
- # Clearing buffer
- memset(0, fputn_buffer, 33);
- # Converting number to string
- utoa(a1, fputn_buffer, a2);
- # Putting string
- fputs(a0, fputn_buffer);
- }
- ## Put decimal number in channel
- # a0 - channel
- # a1 - number
- fputd : (a0, a1) { fputn(a0, a1, 10); }
- ## Put hexadecimal number in channel
- # a0 - channel
- # a1 - number
- fputx : (a0, a1) { fputn(a0, a1, 16); }
- ## Put decimal number in stdout
- # a0 - number
- putd : (a0) { fputn(stdout, a0, 10); }
- ## Put hexadecimal number in stdout
- # a0 - number
- putx : (a0) { fputn(stdout, a0, 16); }
- ## Put decimal number in stderr
- # a0 - number
- eputd : (a0) { fputn(stderr, a0, 10); }
- ## Put hexadecimal number in stderr
- # a0 - number
- eputx : (a0) { fputn(stderr, a0, 16); }
- ## Assertion
- # a0 - condition
- # a1 - error string
- assert : (a0, a1) {
- # If condition success, returning
- if (a0) { return; }
- # Printing assertion error
- eputs("ASSERT: ");
- # If error string is null, printing default message. If not, print a1
- if (a1) { eputs(a1); } else { eputs("assertion failed"); }
- eputc('\n');
- # Exitting
- exit(1);
- }
- ### Lexer
- ## Character types
- CHAR_NULL => 0; # \0 */
- CHAR_INVALID => 1; # invalid characters */
- CHAR_SPACES => 2; # [\t\r ]
- CHAR_NEWLINE => 3; # \n
- CHAR_ZERO => 4; # 0
- CHAR_OCTAL => 5; # [1-7]
- CHAR_DECIMAL => 6; # [89]
- CHAR_HEX => 7; # [A-Fa-f] \ a
- CHAR_ALPHA => 8; # [G-Zg-z_] \ x
- CHAR_A => 9; # a
- CHAR_X => 10; # x
- CHAR_SQUOTE => 11; # \'
- CHAR_DQUOTE => 12; # \"
- CHAR_BACKSLASH => 13; # \\
- CHAR_SYMBOL => 14; # other characters
- ## Lexer parts
- LEX_ICHAN => 0;
- LEX_FILE => 1;
- LEX_LINE => 2;
- LEX_COLUMN => 3;
- LEX_HOLD => 4;
- LEX_TID => 5;
- LEX_TVALUE => 6;
- LEX_TTEXT => 7;
- ## Token location parts
- LOC_FILE => 0;
- LOC_LINE => 1;
- LOC_COLUMN => 2;
- ### Token IDs
- TOKEN_END => 256;
- TOKEN_INTEGER => 257;
- TOKEN_IDENTIFIER => 258;
- TOKEN_STRING => 259;
- TOKEN_ARGUMENT => 260;
- TOKEN_VARIABLE => 261;
- ## Keywords
- # Conditionals
- TOKEN_IF => 270;
- TOKEN_ELSE => 271;
- # Loops
- TOKEN_BREAK => 272;
- TOKEN_CONTINUE => 273;
- TOKEN_DO => 274;
- TOKEN_FOR => 275;
- TOKEN_WHILE => 276;
- # Functions
- TOKEN_ALLOCATE => 277;
- TOKEN_ASM => 278;
- TOKEN_RETURN => 279;
- TOKEN_SYSCALL => 280;
- # Labels
- TOKEN_GLOBAL => 281;
- TOKEN_GOTO => 282;
- TOKEN_LABEL => 283;
- # Byte operations
- TOKEN_READCHAR => 284;
- TOKEN_WRITECHAR => 285;
- # Array types
- TOKEN_TYPE_CHAR => 286;
- TOKEN_TYPE_INT => 287;
- # File
- TOKEN_INCLUDE => 288;
- TOKEN_INCLUDE_ONCE => 289;
- ## Symbols
- # Equality
- TOKEN_EQ => 300;
- TOKEN_NE => 301;
- # Compare
- TOKEN_LE => 302;
- TOKEN_GE => 303;
- # Arrow
- TOKEN_ARROW => 304;
- # Shift
- TOKEN_SHL => 305;
- TOKEN_SHR => 306;
- # Logic
- TOKEN_LAND => 307;
- TOKEN_LOR => 308;
- # Assignment
- TOKEN_AADD => 309;
- TOKEN_ASUB => 310;
- TOKEN_AMUL => 311;
- TOKEN_ADIV => 312;
- TOKEN_AMOD => 313;
- TOKEN_AAND => 314;
- TOKEN_AXOR => 315;
- TOKEN_AOR => 316;
- TOKEN_ASHL => 317;
- TOKEN_ASHR => 318;
- # Increment/decrement
- TOKEN_INC => 319;
- TOKEN_DEC => 320;
- # Ellipsis
- TOKEN_ELLIPSIS => 321;
- ## Char table
- char_table : [
- # X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 Xa Xb Xc Xd Xe Xf
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 2, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 14, 12, 14, 14, 14, 14, 11, 14, 14, 14, 14, 14, 14, 14, 14,
- 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 14, 14, 14, 14, 14, 14,
- 14, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 14, 13, 14, 14, 7,
- 14, 9, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- 8, 8, 8, 8, 8, 8, 8, 8, 10, 8, 8, 14, 14, 14, 14, 1,
- # We're not using Extended-ASCII - marking as invalid
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- ];
- ## Char to escape table (only first 128 ASCII chars)
- char_to_escape : [
- # X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 Xa Xb Xc Xd Xe Xf
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 34, 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 0, 0, 0,
- 0, 7, 8, 0, 0, 27, 12, 0, 0, 0, 0, 0, 0, 0, 10, 0,
- 0, 0, 13, 0, 9, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0
- ];
- ## Reserved keywords
- reserved_keywords : [
- "if", "else", # Conditionals
- "break", "continue", "do", "for", "while", # Loops
- "allocate", "asm", "return", "syscall", # Functions
- "global", "goto", "label", # Labels
- "readchar", "writechar", # Byte operations
- "char", "int", # Array types
- "include", "include_once", # File operations
- NULL
- ];
- ## Lexer triggers
- lx00n:[lx01,le00,lx00,lx00,lx02,lx06,lx06,lx07,lx07,lx09,lx09,lx11,lx15,le00,lx18];
- lx02n:[lx99,le00,lx99,lx99,le01,lx03,le02,le03,le00,le03,lx04,le00,le00,le00,lx99];
- lx03n:[lx99,le00,lx99,lx99,lx03,lx03,le02,le03,le00,le03,le00,le00,le00,le00,lx99];
- lx04n:[le04,le00,le04,le04,lx05,lx05,lx05,lx05,le00,lx05,le00,le00,le00,le00,le00];
- lx05n:[lx99,le00,lx99,lx99,lx05,lx05,lx05,lx05,le00,lx05,le00,le00,le00,le00,lx99];
- lx06n:[lx99,le00,lx99,lx99,lx06,lx06,lx06,le05,le00,le05,le00,le00,le00,le00,lx99];
- lx07n:[lx08,le00,lx08,lx08,lx07,lx07,lx07,lx07,lx07,lx07,lx07,le00,le00,le00,lx08];
- lx09n:[lx08,le00,lx08,lx08,lx10,lx10,lx10,lx07,lx07,lx07,lx07,le00,le00,le00,lx08];
- lx10n:[lx99,le00,lx99,lx99,lx10,lx10,lx10,lx07,lx07,lx07,lx07,le00,le00,le00,lx99];
- lx11n:[le06,le00,lx12,le06,lx12,lx12,lx12,lx12,lx12,lx12,lx12,lx12,lx12,lx13,lx12];
- lx15n:[le07,le00,lx15,le07,lx15,lx15,lx15,lx15,lx15,lx15,lx15,lx15,lx17,lx16,lx15];
- lx16n:[le07,le00,lx15,le07,lx15,lx15,lx15,lx15,lx15,lx15,lx15,lx15,lx17,lx16,lx15];
- ## Create new lexer
- # a0 - input channel
- # a1 - filename
- # @return lexer
- lexer : (a0, a1) {
- # x0 - token buffer
- # x1 - lexer
- allocate(2);
- # Creating empty token buffer
- x0 = char_vector(0, 1);
- # Pushing '\0' to token buffer
- char_vector_push(x0, '\0');
- # Allocating lexer
- x1 = tuple8(a0, a1, 1, 1, FALSE, 0, 0, x0);
- return(x1);
- }
- ## Create new lexer using file name
- # a0 - filename
- # @return lexer
- lexer_file : (a0) { return(lexer(input_chan_file(a0), a0)); }
- ## Destroy lexer
- # a0 - lexer
- lexer_destroy : (a0) {
- # Destroying token char vector
- vector_destroy(a0[LEX_TTEXT]);
- # Destroying input channel
- input_chan_close(a0[LEX_ICHAN]);
- # Freeing filename string
- free(a0[LEX_FILE]);
- # Destroying lexer
- free(a0);
- }
- ## Look ahead for next character
- # a0 - lexer
- # @return char type
- lexer_look : (a0) {
- # x0 - char
- allocate(1);
- # Reading char
- x0 = fnextc(a0[LEX_ICHAN], 0);
- # If got EOF, returning CHAR_NULL
- if (x0 == EOF) { return(CHAR_NULL); }
- # Returning char type
- return(char_table[x0]);
- }
- ## Consume char to token buffer
- # a0 - lexer
- # @return char
- lexer_consume : (a0) {
- # x0 - char
- allocate(1);
- # Reading char
- x0 = fgetc(a0[LEX_ICHAN]);
- # If newline, moving to new line
- if (x0 == '\n') {
- a0[LEX_LINE] = a0[LEX_LINE] + 1;
- a0[LEX_COLUMN] = 0;
- }
- # Incrementing column
- a0[LEX_COLUMN] = a0[LEX_COLUMN] + 1;
- # Replacing last char (NULL) in buffer
- char_vector_set(a0[LEX_TTEXT], lexer_token_length(a0) - 1, x0);
- # Pushing NULL to the end
- char_vector_push(a0[LEX_TTEXT], '\0');
- # Returning char
- return(x0);
- }
- ## Set token ID
- # a0 - lexer
- # a1 - token ID
- lexer_token_set : (a0, a1) { a0[LEX_TID] = a1; }
- ## Get token ID
- # a0 - lexer
- # @return token ID
- lexer_token_get : (a0) { return(a0[LEX_TID]); }
- ## Get token text
- # a0 - lexer
- # @return token text char vector
- lexer_token_text : (a0) { return(vector_buffer(a0[LEX_TTEXT])); }
- ## Get token text length
- # a0 - lexer
- # @return token text length
- lexer_token_length : (a0) { return(vector_size(a0[LEX_TTEXT])); }
- ## Get token value
- # a0 - lexer
- # @return token value
- lexer_token_value : (a0) { return(a0[LEX_TVALUE]); }
- ## Get token location
- # a0 - lexer
- # @return token location
- lexer_token_location : (a0) {
- return(tuple3(a0[LEX_FILE], a0[LEX_LINE], a0[LEX_COLUMN]));
- }
- ## Hold token
- # a0 - lexer
- lexer_hold : (a0) {
- assert(a0[LEX_HOLD] == FALSE, "tried to hold two or more tokens");
- a0[LEX_HOLD] = TRUE;
- }
- ## Reset token data
- # a0 - lexer
- lexer_reset_token : (a0) {
- a0[LEX_TID] = 0; # Resetting token id
- a0[LEX_TVALUE] = 0; # Resetting token value
- # Resetting token text buffer
- char_vector_resize(a0[LEX_TTEXT], 0);
- char_vector_push(a0[LEX_TTEXT], '\0');
- }
- ## Print token location to output channel
- # a0 - token location
- # a1 - output channel
- fputloc : (a0, a1) {
- fputs(a1, a0[LOC_FILE]);
- fputc(a1, ':');
- fputn(a1, a0[LOC_LINE], 10);
- fputc(a1, ':');
- fputn(a1, a0[LOC_COLUMN], 10);
- }
- ## Print lexer error
- # a0 - lexer
- # a1 - error message
- lexer_error : (a0, a1) {
- # Printing location
- fputloc(lexer_token_location(a0), stderr);
- eputs(": ");
- # Printing message
- eputs(a1);
- # Flushing
- eputc('\n');
- # Exitting
- exit(1);
- }
- ## Fetch token
- # a0 - lexer
- # @return token type
- lexer_lex : (a0) {
- # x0 - current char
- # x1, x2 - used variables by triggers
- allocate(3);
- # Checking if we are holding token
- if (a0[LEX_HOLD]) {
- a0[LEX_HOLD] = FALSE;
- return(a0[LEX_TID]);
- }
- label lx00;
- ## Entry point of lexer
- # If token is space/tab/newline, skip it
- if (lexer_look(a0) == CHAR_SPACES || lexer_look(a0) == CHAR_NEWLINE) {
- # Consuming char
- lexer_consume(a0);
- # Retrying
- goto &lx00;
- }
- # Resetting token buffer
- lexer_reset_token(a0);
- # Moving to next trigger
- goto lx00n[lexer_look(a0)];
- label lx01;
- ## We got NULL char
- return(TOKEN_END);
- label lx02;
- ## We got zero. It means we have either zero number, octal (e.g. 0664) or
- ## hex number (e.g. 0xffff)
- # Consuming zero char
- lexer_consume(a0);
- # Setting integer token ID
- lexer_token_set(a0, TOKEN_INTEGER);
- # Moving to next trigger
- goto lx02n[lexer_look(a0)];
- label lx03;
- ## We have to parse octal number
- # Fetching digit
- x0 = lexer_consume(a0);
- # Adding digit to number
- a0[LEX_TVALUE] = a0[LEX_TVALUE] * 8 + (x0 - '0');
- # Moving to next trigger
- goto lx03n[lexer_look(a0)];
- label lx04;
- ## Trying to parse hexadecimal number
- # We have 'x' symbol. Consuming it
- lexer_consume(a0);
- # Moving to next trigger
- goto lx04n[lexer_look(a0)];
- label lx05;
- ## Parsing hexadecimal number
- # Fetching digit
- x0 = lexer_consume(a0);
- # Checking if it is lower case hexadecimal digit
- if (x0 >= 'a') {
- x0 = x0 - 'a' + 10;
- } else {
- # Checking if it is higher case hexadecimal digit
- if (x0 >= 'A') {
- x0 = x0 - 'A' + 10;
- } else {
- # It is decimal digit
- x0 = x0 - '0';
- }
- }
- # Adding digit to number
- a0[LEX_TVALUE] = a0[LEX_TVALUE] * 16 + x0;
- # Moving to next trigger
- goto lx05n[lexer_look(a0)];
- label lx06;
- ## We have to parse decimal number
- # We have to set integer token ID
- lexer_token_set(a0, TOKEN_INTEGER);
- # Fetching digit
- x0 = lexer_consume(a0);
- # Adding digit to number
- a0[LEX_TVALUE] = a0[LEX_TVALUE] * 10 + (x0 - '0');
- # Moving to next trigger
- goto lx06n[lexer_look(a0)];
- label lx07;
- ## We have to parse identifier
- # We have to set identifier token ID
- lexer_token_set(a0, TOKEN_IDENTIFIER);
- # Adding char to token text
- lexer_consume(a0);
- # Moving to next trigger
- goto lx07n[lexer_look(a0)];
- label lx08;
- ## Checking if identifier is keyword
- # Resetting reserved keyword index
- x1 = 0;
- # Iterating through all reserved keywords
- while (reserved_keywords[x1] != NULL) {
- # If strings are equal
- if (strcmp(reserved_keywords[x1], lexer_token_text(a0)) == NULL) {
- # Setting keyword token ID
- lexer_token_set(a0, TOKEN_IF + x1);
- goto &lx99;
- }
- # Checking next keyword
- x1 = x1 + 1;
- }
- # We are here because identifier is not a keyword. Exitting
- goto &lx99;
- label lx09;
- ## We have to parse argument or variable (or identifier)
- # Fetching char
- x0 = lexer_consume(a0);
- # Checking if we have argument or variable
- x1 = TOKEN_VARIABLE;
- if (x0 == 'a') { x1 = TOKEN_ARGUMENT; }
- # Set token type
- lexer_token_set(a0, x1);
- # Moving to next trigger
- goto lx09n[lexer_look(a0)];
- label lx10;
- ## Parsing number of argument/variable
- # Fetching char
- x0 = lexer_consume(a0);
- # Adding digit to value
- a0[LEX_TVALUE] = a0[LEX_TVALUE] * 10 + (x0 - '0');
- # Moving to next trigger
- goto lx10n[lexer_look(a0)];
- label lx11;
- ## Parsing single quote (character literal)
- # Setting integer token ID
- lexer_token_set(a0, TOKEN_INTEGER);
- # Consuming char
- lexer_consume(a0);
- # Moving to next trigger
- goto lx11n[lexer_look(a0)];
- label lx12;
- ## Parsing single quote 'simple' content (without backslash)
- # Fetching char
- x0 = lexer_consume(a0);
- # Setting value
- a0[LEX_TVALUE] = x0;
- goto &lx14;
- label lx13;
- ## Parsing single quote content with backslash
- # Consuming backslash
- lexer_consume(a0);
- # Fetching char
- x0 = lexer_consume(a0);
- # Setting escape value
- a0[LEX_TVALUE] = char_to_escape[x0];
- label lx14;
- # Checking if quotes are closed
- if (lexer_look(a0) != CHAR_SQUOTE) { goto &le06; }
- # Consuming char
- lexer_consume(a0);
- goto &lx99;
- label lx15;
- ## Parsing double quote (string literal)
- # Consuming char
- lexer_consume(a0);
- # Moving to next trigger
- goto lx15n[lexer_look(a0)];
- label lx16;
- ## Parsing double quote content with backslash
- # Consuming backslash
- lexer_consume(a0);
- # Consuming next character
- lexer_consume(a0);
- # Moving to next trigger
- goto lx16n[lexer_look(a0)];
- label lx17;
- ## Ending string
- # Consuming char
- lexer_consume(a0);
- # Setting string token ID
- lexer_token_set(a0, TOKEN_STRING);
- # Ending
- goto &lx99;
- label lx18;
- ## Parsing symbol
- # Fetching char
- x0 = lexer_consume(a0);
- # Setting token ID to char value
- lexer_token_set(a0, x0);
- # Fetching next chars
- x1 = fnextc(a0[LEX_ICHAN], 0);
- x2 = fnextc(a0[LEX_ICHAN], 1);
- # Checking for double-char or triple-char symbols
- if (x0 == '=') {
- if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_EQ); }
- else if (x1 == '>') { lexer_consume(a0); lexer_token_set(a0, TOKEN_ARROW); }
- } else if (x0 == '!') {
- if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_NE); }
- } else if (x0 == '+') {
- if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_AADD); }
- else if (x1 == '+') { lexer_consume(a0); lexer_token_set(a0, TOKEN_INC); }
- } else if (x0 == '-') {
- if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_ASUB); }
- else if (x1 == '-') { lexer_consume(a0); lexer_token_set(a0, TOKEN_DEC); }
- } else if (x0 == '*') {
- if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_AMUL); }
- } else if (x0 == '/') {
- if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_ADIV); }
- } else if (x0 == '%') {
- if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_AMOD); }
- } else if (x0 == '<') {
- if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_LE); }
- else if (x1 == '<') {
- if (x2 == '=') {
- lexer_consume(a0); lexer_consume(a0); lexer_token_set(a0, TOKEN_ASHL);
- } else {
- lexer_consume(a0); lexer_token_set(a0, TOKEN_SHL);
- }
- }
- } else if (x0 == '>') {
- if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_GE); }
- else if (x1 == '>') {
- if (x2 == '=') {
- lexer_consume(a0); lexer_consume(a0); lexer_token_set(a0, TOKEN_ASHR);
- } else {
- lexer_consume(a0); lexer_token_set(a0, TOKEN_SHR);
- }
- }
- } else if (x0 == '^') {
- if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_AXOR); }
- } else if (x0 == '&') {
- if (x1 == '&') { lexer_consume(a0); lexer_token_set(a0, TOKEN_LAND); }
- else if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_AAND); }
- } else if (x0 == '|') {
- if (x1 == '|') { lexer_consume(a0); lexer_token_set(a0, TOKEN_LOR); }
- else if (x1 == '=') { lexer_consume(a0); lexer_token_set(a0, TOKEN_AOR); }
- } else if (x0 == '#') {
- # Checking for comment
- ## Skipping line
- while (x0 != '\n' && x0 != '\0') { x0 = fgetc(a0[LEX_ICHAN]); }
- # Resetting line and column
- a0[LEX_LINE] = a0[LEX_LINE] + 1;
- a0[LEX_COLUMN] = 1;
- # Starting lexing from the start
- goto &lx00;
- } else if (x0 == '.') {
- if (x1 == '.') {
- if (x2 == '.') { lexer_consume(a0); lexer_consume(a0); lexer_token_set(a0, TOKEN_ELLIPSIS); }
- }
- }
- label lx99;
- ## Finishing lexing. Returning current token ID
- return(a0[LEX_TID]);
- label le00;
- lexer_error(a0, "Invalid character");
- label le01;
- lexer_error(a0, "Unexpected second 0 char");
- label le02;
- lexer_error(a0, "Unexpected decimal digit (expected octal)");
- label le03;
- lexer_error(a0, "Unexpected hexadecimal digit (expected octal)");
- label le04;
- lexer_error(a0, "Unfinished hexadecimal number");
- label le05;
- lexer_error(a0, "Unexpected hexadecimal digit (expected decimal)");
- label le06;
- lexer_error(a0, "Unterminated character literal");
- label le07;
- lexer_error(a0, "Unterminated string literal");
- }
- ## Fetch assembly line
- # a0 - lexer
- # @return is this assembly line last
- lexer_lex_asm : (a0) {
- # x0 - char
- # x1 - char type
- # x2 - i
- allocate(3);
- label lax00;
- ## Entry point of assembly line lexer
- # If token is space/tab/newline, skip it
- if (lexer_look(a0) == CHAR_SPACES || lexer_look(a0) == CHAR_NEWLINE) {
- # Consuming char
- lexer_consume(a0);
- # Retrying
- goto &lax00;
- }
- # Resetting token buffer
- lexer_reset_token(a0);
- # Fetching char
- x0 = lexer_consume(a0);
- label lax01;
- ## Checking for block not being closed
- if (x0 == '}') { goto &lae00; }
- label lax02;
- ## Reading chars until semicolon
- x0 = fnextc(a0[LEX_ICHAN], 0);
- # Checking for block not being closed
- if (x0 == '}') { goto &lae00; }
- # Exitting loop, if next char is semicolon
- if (x0 == ';') { goto &lax03; }
- # Consuming char
- lexer_consume(a0);
- # Making next iteration
- goto &lax02;
- label lax03;
- ## Looking for next significant char to check if this is last line
- x2 = 1;
- do {
- # Fetching char
- ## FIXME: I guess this can get EOF when lexing at the end of buffer
- x0 = fnextc(a0[LEX_ICHAN], x2++);
- # Erroring, if we got EOF
- if (x0 == EOF) { goto &lae01; }
- # Fetching char type
- x1 = char_table[x0];
- } while (x1 == CHAR_SPACES || x1 == CHAR_NEWLINE);
- # If we have } char, we return TRUE, otherwise FALSE
- return(x0 == '}');
- label lae00;
- lexer_error(a0, "Unexpected assembly block close");
- label lae01;
- lexer_error(a0, "Unexpected end of file");
- }
- ### Parser
- ## Parser parts
- PARSER_LEX => 0;
- PARSER_BUFFER => 1;
- PARSER_SWITCH => 2;
- PARSER_INCLIST => 3;
- ## Node parts
- NODE_TYPE => 0;
- NODE_LOC => 1;
- # 2, 3, ... are used as operands
- ## Statement IDs
- # Constant
- STMT_EXPR_INTEGER => 0;
- STMT_EXPR_STRING => 1;
- # Simple
- STMT_EXPR_IDENTIFIER => 2;
- STMT_EXPR_ARGUMENT => 3;
- STMT_EXPR_VARIABLE => 4;
- STMT_EXPR_SYSCALL => 5;
- STMT_EXPR_READCHAR => 6;
- # Postfix
- STMT_EXPR_CALL => 7;
- STMT_EXPR_INDEX => 8;
- # Prefix
- STMT_EXPR_PLUS => 9;
- STMT_EXPR_MINUS => 10;
- STMT_EXPR_LOGICAL_NOT => 11;
- STMT_EXPR_NOT => 12;
- STMT_EXPR_DEREF => 13;
- STMT_EXPR_ADDROF => 14;
- # Multiplicative
- STMT_EXPR_MUL => 15;
- STMT_EXPR_DIV => 16;
- STMT_EXPR_MOD => 17;
- # Additive
- STMT_EXPR_ADD => 18;
- STMT_EXPR_SUB => 19;
- # Bitwise shift
- STMT_EXPR_SHL => 20;
- STMT_EXPR_SHR => 21;
- # Bitwise and
- STMT_EXPR_AND => 22;
- # Bitwise xor
- STMT_EXPR_XOR => 23;
- # Bitwise or
- STMT_EXPR_OR => 24;
- # Relational
- STMT_EXPR_LESS => 25;
- STMT_EXPR_GREATER => 26;
- STMT_EXPR_LE => 27;
- STMT_EXPR_GE => 28;
- # Equality
- STMT_EXPR_EQ => 29;
- STMT_EXPR_NE => 30;
- # Logical and
- STMT_EXPR_LOGICAL_AND => 31;
- # Logical or
- STMT_EXPR_LOGICAL_OR => 32;
- # Assignment
- STMT_EXPR_ASSIGN => 33;
- STMT_EXPR_MUL_ASSIGN => 34;
- STMT_EXPR_DIV_ASSIGN => 35;
- STMT_EXPR_MOD_ASSIGN => 36;
- STMT_EXPR_ADD_ASSIGN => 37;
- STMT_EXPR_SUB_ASSIGN => 38;
- STMT_EXPR_SHL_ASSIGN => 39;
- STMT_EXPR_SHR_ASSIGN => 40;
- STMT_EXPR_AND_ASSIGN => 41;
- STMT_EXPR_XOR_ASSIGN => 42;
- STMT_EXPR_OR_ASSIGN => 43;
- STMT_EXPR_PREDEC => 44;
- STMT_EXPR_PREINC => 45;
- STMT_EXPR_POSTDEC => 46;
- STMT_EXPR_POSTINC => 47;
- # Ternary
- STMT_EXPR_TERNARY => 48;
- # Statements
- STMT_IF => 49;
- STMT_FOR => 50;
- STMT_DO_WHILE => 51;
- STMT_WHILE => 52;
- STMT_ALLOCATE => 53;
- STMT_ASM => 54;
- STMT_RETURN => 55;
- STMT_BREAK => 56;
- STMT_CONTINUE => 57;
- STMT_WRITECHAR => 58;
- STMT_LABEL => 59;
- STMT_GOTO => 60;
- # Block statement
- STMT_BLOCK => 61;
- # Top level statements
- STMT_GLOBAL => 62;
- STMT_MACRO => 63;
- STMT_FUN_DECL => 64;
- STMT_ARRAY_DECL => 65;
- STMT_CHAR_ARRAY_DECL => 66;
- STMT_INT_ARRAY_DECL => 67;
- STMT_VAR_DECL => 68;
- ## Create new parser
- # a0 - lexer of main file
- # a1 - string buffer
- # @return parser
- parser : (a0, a1) { return(tuple4(list(a0), a1, FALSE, vector(0, 1))); }
- ## Get current lexer in parser
- # a0 - parser
- # @return lexer
- parser_lexer : (a0) { return(list_value(a0[PARSER_LEX])); }
- ## Get string buffer in parser
- # a0 - parser
- # @return string buffer
- parser_buffer : (a0) { return(a0[PARSER_BUFFER]); }
- ## Get current token location
- # a0 - parser
- # @return token location
- parser_token_location : (a0) {
- # x0 - token location
- allocate(1);
- # Fetching token location
- x0 = lexer_token_location(parser_lexer(a0));
- # Subtracting token length from column
- x0[LOC_COLUMN] = x0[LOC_COLUMN] - lexer_token_length(parser_lexer(a0)) + 1;
- # Returning location
- return(x0);
- }
- ## Search if file was included before
- # a0 - parser
- # a1 - filename
- parser_include_search : (a0, a1) {
- # x0 - include list
- # x1 - index
- allocate(2);
- # Getting include list
- x0 = a0[PARSER_INCLIST];
- # Iterating through includes
- x1 = 0;
- while (x1 < vector_size(x0)) {
- # Checking if filenames are equal
- if (!strcmp(vector_get(x0, x1), a1)) { return(TRUE); }
- # Moving to next file
- x1 = x1 + 1;
- }
- return(FALSE);
- }
- ## Add file to parser, if it is needed
- # a0 - parser
- # a1 - filename
- # a2 - include once
- parser_include_add : (a0, a1, a2) {
- # Checking, if we need to add this file
- if (!(a2 && parser_include_search(a0, a1))) {
- # Adding lexer to list
- a0[PARSER_LEX] = list_insert(a0[PARSER_LEX], lexer_file(a1));
- # Adding file to vector, if we are including it once
- if (a2) { vector_push(a0[PARSER_INCLIST], a1); }
- }
- }
- ## Hold current lexer in parser
- # a0 - parser
- parser_lexer_hold : (a0) { lexer_hold(parser_lexer(a0)); }
- ## Lex using current lexer in parser
- # a0 - parser
- # @return token ID
- parser_lex : (a0) { return(lexer_lex(parser_lexer(a0))); }
- ## Lex assembly line using current lexer in parser
- # a0 - parser
- # @return is last assembly line
- parser_lex_asm : (a0) { return(lexer_lex_asm(parser_lexer(a0))); }
- ## Print parser error
- # a0 - parser
- # a1 - token location
- # a2 - error message
- parser_error : (a0, a1, a2) {
- # Printing location
- fputloc(a1, stderr);
- eputs(": ");
- # Printing message
- eputs(a2);
- # Flushing
- eputc('\n');
- # Exitting
- exit(1);
- }
- ## Parse file
- # a0 - parser
- # @return AST
- parser_parse : (a0) {
- # x0 - top level statements vector
- # x1 - token
- # x2 - statement
- # x3 - token location
- allocate(4);
- # Allocating statements vector
- x0 = vector(0, 1);
- label parser_parse_continue;
- ## Parsing file
- # Fetching token for tests
- x1 = parser_lex(a0);
- # Testing until got end of file
- while (x1 != TOKEN_END) {
- # Fetching token location
- x3 = parser_token_location(a0);
- # Parsing statement
- x2 = top_level_statement(a0, x1, x3);
- # Adding statement, if needed
- if (x2) { vector_push(x0, x2); }
- # Fetching next token
- x1 = parser_lex(a0);
- }
- # Destroying lexer
- lexer_destroy(parser_lexer(a0));
- # Switching to previous lexer
- a0[PARSER_LEX] = list_next(a0[PARSER_LEX]);
- # If lexer is not NULL, continuing
- if (a0[PARSER_LEX] != NULL) { goto &parser_parse_continue; }
- # Returning statemtents vector
- return(x0);
- }
- ## Parse top level statement
- # a0 - parser
- # a1 - current token
- # a2 - token location
- # @return statement
- top_level_statement : (a0, a1, a2) {
- # x0 - next token
- # x1 - identifier
- allocate(2);
- # Parsing global statement
- if (a1 == TOKEN_GLOBAL) { return(global_statement(a0, a2)); }
- # Parsing include statement
- if (a1 == TOKEN_INCLUDE) { return(include_statement(a0, a2, FALSE)); }
- # Parsing include_once statement
- if (a1 == TOKEN_INCLUDE_ONCE) { return(include_statement(a0, a2, TRUE)); }
- # Parsing statements starting with identifier
- if (a1 == TOKEN_IDENTIFIER) {
- # Duplicating identifier
- x1 = strdup(lexer_token_text(parser_lexer(a0)));
- # Fetching next token
- x0 = parser_lex(a0);
- # Parsing macro definition
- if (x0 == TOKEN_ARROW) { return(macro_declaration(a0, x1, a2)); }
- # Parsing identifier declarations
- if (x0 == ':') {
- # Fetching next token
- x0 = parser_lex(a0);
- # Parsing function declaration
- if (x0 == '(') { return(function_declaration(a0, x1, a2)); }
- # Parsing array declaration
- if (x0 == '[') { return(array_declaration(a0, x1, a2)); }
- # Parsing reserved char array declaration
- if (x0 == TOKEN_TYPE_CHAR) { return(type_array_declaration(a0, x1, STMT_CHAR_ARRAY_DECL, a2)); }
- # Parsing reserved int array declaration
- if (x0 == TOKEN_TYPE_INT) { return(type_array_declaration(a0, x1, STMT_INT_ARRAY_DECL, a2)); }
- # Parsing simple variable declaration
- return(variable_declaration(a0, x1, x0, a2));
- }
- }
- parser_error(a0, a2, "Syntax error - none of available top level statements matched");
- }
- ## Parse global statement
- # a0 - parser
- # a1 - token location
- # @return statement
- global_statement : (a0, a1) {
- # x0 - ops vector
- # x1 - next token
- # x2 - identifier
- allocate(3);
- # Allocating ops vector
- x0 = vector(0, 1);
- # Fetching next token
- x1 = parser_lex(a0);
- # If next token is identifier - globalling only one identifier
- if (x1 == TOKEN_IDENTIFIER) {
- # Making a copy of identifier
- x2 = strdup(lexer_token_text(parser_lexer(a0)));
- # Pushing identifier to op1
- vector_push(x0, x2);
- } else {
- # If not an identifier, check if it is (. If not, errorring.
- if (x1 == '(') {
- label global_statement_next;
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking if it is identifier
- if (x1 != TOKEN_IDENTIFIER) { goto &global_statement_invalid; }
- # Making a copy of identifier
- x2 = strdup(lexer_token_text(parser_lexer(a0)));
- # Pushing identifier to ops
- vector_push(x0, x2);
- ## If next token is , - redoing loop
- # Fetching next token
- x1 = parser_lex(a0);
- if (x1 == ',') { goto &global_statement_next; }
- # Checking for correct syntax
- if (x1 != ')') { goto &global_statement_invalid; }
- } else { goto &global_statement_invalid; }
- }
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for semicolon
- if (x1 != ';') { goto &global_statement_end_error; }
- # Returning statement
- return(tuple3(STMT_GLOBAL, a1, x0));
- label global_statement_invalid;
- parser_error(a0, a1, "Invalid global statement");
- label global_statement_end_error;
- parser_error(a0, a1, "Global statement is not ended with semicolon");
- }
- ## Parse include statement
- # a0 - parser
- # a1 - token location
- # a2 - include once
- # @return NULL
- include_statement : (a0, a1, a2) {
- # x0 - file vector
- # x1 - next token/i
- # x2 - file
- allocate(3);
- # Allocating files vector
- x0 = vector(0, 1);
- # Fetching next token
- x1 = parser_lex(a0);
- # If next token is string - importing one file
- if (x1 == TOKEN_STRING) {
- # Making a copy of data in string
- x2 = strlitdup(lexer_token_text(parser_lexer(a0)));
- # Pushing file to vector
- vector_push(x0, x2);
- } else {
- # If not an string, check if it is (. If not, erroring.
- if (x1 == '(') {
- label include_statement_next;
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking if it is string
- if (x1 != TOKEN_STRING) { goto &include_statement_invalid; }
- # Making a copy of data in string
- x2 = strlitdup(lexer_token_text(parser_lexer(a0)));
- # Pushing file to vector
- vector_push(x0, x2);
- ## If next token is , - redoing loop
- # Fetching next token
- x1 = parser_lex(a0);
- if (x1 == ',') { goto &include_statement_next; }
- # Checking for correct syntax
- if (x1 != ')') { goto &include_statement_invalid; }
- } else { goto &include_statement_invalid; }
- }
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for semicolon
- if (x1 != ';') { goto &include_statement_end_error; }
- # Creating lexers for specified files
- x1 = 0;
- while (x1 < vector_size(x0)) {
- # Getting filename
- x2 = vector_get(x0, x1);
- # Adding file to parse
- parser_include_add(a0, x2, a2);
- # Moving to next file
- x1 = x1 + 1;
- }
- # Marking we switched file
- a0[PARSER_SWITCH] = TRUE;
- return(NULL);
- label include_statement_invalid;
- parser_error(a0, a1, "Invalid include statement");
- label include_statement_end_error;
- parser_error(a0, a1, "Include statement is not ended with semicolon");
- }
- ## Parse macro declaration
- # a0 - parser
- # a1 - identifier
- # a2 - token location
- # @return statement
- macro_declaration : (a0, a1, a2) {
- # x0 - token
- # x1 - expression
- allocate(2);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching expression
- x1 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for semicolon
- if (x0 != ';') { goto ¯o_declaration_end_error; }
- # Returning expression
- return(tuple4(STMT_MACRO, a2, a1, x1));
- label macro_declaration_end_error;
- parser_error(a0, a1, "Macro declaration is not ended with semicolon");
- }
- ## Parse function declaration
- # a0 - parser
- # a1 - identifier
- # a2 - token location
- # @return statement
- function_declaration : (a0, a1, a2) {
- # x0 - arg count
- # x1 - token
- # x2 - block statement
- allocate(3);
- # Resetting arg count
- x0 = 0;
- ## Parsing arguments
- label function_declaration_arg_loop;
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking if arguments ended
- if (x1 == ')') { goto &function_declaration_arg_end; }
- # Checking for argument
- if (x1 == TOKEN_ELLIPSIS) { x1 = parser_lex(a0); goto &function_declaration_arg_end; }
- if (x1 != TOKEN_ARGUMENT) { goto &function_declaration_arg_invalid; }
- if (x0 != lexer_token_value(parser_lexer(a0)))
- { goto &function_declaration_arg_invalid; }
- # Incrementing argument count
- x0 = x0 + 1;
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for next argument
- if (x1 == ',') { goto &function_declaration_arg_loop; }
- label function_declaration_arg_end;
- # Checking we actually exitted arguments correctly
- if (x1 != ')') { goto &function_declaration_arg_invalid; }
- # Fetching next token
- x1 = parser_lex(a0);
- # Fetching block statement
- x2 = block_statement(a0, x1, TRUE);
- # Returning statement
- return(tuple5(STMT_FUN_DECL, a2, a1, x0, x2));
- label function_declaration_arg_invalid;
- parser_error(a0, a2, "Invalid function arguments declaration syntax");
- }
- ## Parse array declaration
- # a0 - parser
- # a1 - identifier
- # a2 - token location
- # @return statement
- array_declaration : (a0, a1, a2) {
- # x0 - ops vector
- # x1 - next token
- # x2 - expression
- allocate(3);
- # Allocating operand vector
- x0 = vector(0, 1);
- label array_declaration_loop;
- # Fetching next token
- x1 = parser_lex(a0);
- # Fetching next expression
- x2 = expression(a0, x1);
- # Pushing expression to operand vector
- vector_push(x0, x2);
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking if there is another element
- if (x1 == ',') { goto &array_declaration_loop; }
- # Checking for correct syntax
- if (x1 != ']') { goto &array_declaration_invalid; }
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for semicolon
- if (x1 != ';') { goto &array_declaration_end_error; }
- # Returning statement
- return(tuple4(STMT_ARRAY_DECL, a2, a1, x0));
- label array_declaration_invalid;
- parser_error(a0, a2, "Invalid array declaration syntax");
- label array_declaration_end_error;
- parser_error(a0, a1, "Array statement is not ended with semicolon");
- }
- ## Parse type array declaration
- # a0 - parser
- # a1 - identifier
- # a2 - type
- # a3 - token location
- # @return statement
- type_array_declaration : (a0, a1, a2, a3) {
- # x0 - next token
- # x1 - expression
- allocate(2);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != '[') { goto &type_array_declaration_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching next expression
- x1 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ']') { goto &type_array_declaration_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for semicolon
- if (x0 != ';') { goto &type_array_declaration_end_error; }
- # Returning statement
- return(tuple4(a2, a3, a1, x1));
- label type_array_declaration_invalid;
- parser_error(a0, a3, "Invalid type array declaration syntax");
- label type_array_declaration_end_error;
- parser_error(a0, a1, "Type array statement is not ended with semicolon");
- }
- ## Parse variable declaration
- # a0 - parser
- # a1 - identifier
- # a2 - current token
- # a3 - token location
- # @return statement
- variable_declaration : (a0, a1, a2, a3) {
- # x0 - expression
- # x1 - next token
- allocate(2);
- # Fetching next expression
- x0 = expression(a0, a2);
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for semicolon
- if (x1 != ';') { goto &variable_declaration_end_error; }
- # Returning statement
- return(tuple4(STMT_VAR_DECL, a3, a1, x0));
- label variable_declaration_end_error;
- parser_error(a0, a1, "Variable statement is not ended with semicolon");
- }
- ## Parse block of statements
- # a0 - parser
- # a1 - current token
- # a2 - strict
- # @return statement
- block_statement : (a0, a1, a2) {
- # x0 - ops vector
- # x1 - next token
- # x2 - expression
- # x3 - token location
- # x4 - statement token location
- allocate(5);
- # Allocating operand (statements) vector
- x0 = vector(0, 1);
- # Fetching token location
- x3 = parser_token_location(a0);
- # Checking for correct syntax
- if (a1 != '{') {
- if (!a2) {
- # Fetching statement
- x2 = statement(a0, a1, x3);
- # Checking if expression is ended with semicolon
- if (x2[NODE_TYPE] < STMT_IF) {
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for semicolon
- if (x1 != ';') { goto &block_statement_content_invalid; }
- }
- # Pushing statement to vector
- vector_push(x0, x2);
- # Ending block statement
- goto &block_statement_end;
- } else { goto &block_statement_invalid; }
- }
- label block_statement_loop;
- # Fetching next token
- x1 = parser_lex(a0);
- # Fetching statement token location
- x4 = parser_token_location(a0);
- # Checking if we need to exit the loop
- if (x1 == '}') { goto &block_statement_end; }
- # Checking if we have next block inside this
- if (x1 == '{') { x2 = block_statement(a0, x1, TRUE); }
- # Else fetching statement
- else { x2 = statement(a0, x1, x4); }
- # Pushing statement to vector
- vector_push(x0, x2);
- # Checking if expression is ended with semicolon
- if (x2[NODE_TYPE] < STMT_IF) {
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for semicolon
- if (x1 != ';') { goto &block_statement_content_invalid; }
- }
- # So, making next iteration
- goto &block_statement_loop;
- label block_statement_end;
- # Returning block statement
- return(tuple3(STMT_BLOCK, x3, x0));
- label block_statement_invalid;
- parser_error(a0, x3, "Invalid block statement syntax");
- label block_statement_content_invalid;
- parser_error(a0, x4, "Expression inside block is not ended with semicolon");
- }
- ## Parse statement
- # a0 - parser
- # a1 - current token
- # a2 - token location
- # @return statement
- statement : (a0, a1, a2) {
- if (a1 == TOKEN_IF) { return(if_statement(a0, a2)); }
- else if (a1 == TOKEN_FOR) { return(for_statement(a0, a2)); }
- else if (a1 == TOKEN_DO) { return(do_while_statement(a0, a2)); }
- else if (a1 == TOKEN_WHILE) { return(while_statement(a0, a2)); }
- else if (a1 == TOKEN_ALLOCATE) { return(allocate_statement(a0, a2)); }
- else if (a1 == TOKEN_ASM) { return(assembly_statement(a0, a2)); }
- else if (a1 == TOKEN_RETURN) { return(return_statement(a0, a2)); }
- else if (a1 == TOKEN_BREAK || a1 == TOKEN_CONTINUE) { return(loop_jump_statement(a0, a2, a1)); }
- else if (a1 == TOKEN_WRITECHAR) { return(writechar_statement(a0, a2)); }
- else if (a1 == TOKEN_LABEL) { return(label_statement(a0, a2)); }
- else if (a1 == TOKEN_GOTO) { return(goto_statement(a0, a2)); }
- else { return(expression(a0, a1)); }
- }
- ## Parse if statement
- # a0 - parser
- # a1 - token location
- # @return statement
- if_statement : (a0, a1) {
- # x0 - ops vector
- # x1 - next token
- # x2 - expression
- allocate(3);
- # Allocating operand vector
- x0 = vector(0, 3);
- label if_statement_parse;
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for correct syntax
- if (x1 != '(') { goto &if_statement_invalid; }
- # Fetching next token
- x1 = parser_lex(a0);
- # Fetching condition expression
- x2 = expression(a0, x1);
- # Append condition to vector
- vector_push(x0, x2);
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for correct syntax
- if (x1 != ')') { goto &if_statement_invalid; }
- # Fetching next token
- x1 = parser_lex(a0);
- # Fetching block statement
- x2 = block_statement(a0, x1, FALSE);
- # Append if block to vector
- vector_push(x0, x2);
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking if we have else statement
- if (x1 != TOKEN_ELSE) { parser_lexer_hold(a0); goto &if_statement_end; }
- # Fetching next token
- x1 = parser_lex(a0);
- # If this else have condition, parsing as if statement
- if (x1 == TOKEN_IF) { goto &if_statement_parse; }
- # Fetching else block statement
- x2 = block_statement(a0, x1, FALSE);
- # Append else block to vector
- vector_push(x0, x2);
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking if we have another else branch
- if (x1 == TOKEN_ELSE) { goto &if_statement_else_invalid; }
- # Holding token
- parser_lexer_hold(a0);
- label if_statement_end;
- # Return statement
- return(tuple3(STMT_IF, a1, x0));
- label if_statement_invalid;
- parser_error(a0, a1, "Invalid if statement syntax");
- label if_statement_else_invalid;
- parser_error(a0, a1, "Put else branches with condition before last else branch.");
- }
- ## Parse for statement
- # a0 - parser
- # a1 - token location
- # @return statement
- for_statement : (a0, a1) {
- # x0 - next token
- # x1 - init expression
- # x2 - cond expression
- # x3 - iteration expressions vector
- # x4 - block statement
- # x5 - expression
- allocate(6);
- # Initializing iteration expressions vector
- x3 = vector(0, 1);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != '(') { goto &for_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking if init expression is empty
- if (x0 == ';') { x1 = NULL; goto &for_statement_skip_init; }
- # Fetching init expression
- x1 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ';') { goto &for_statement_invalid; }
- label for_statement_skip_init;
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking if cond expression is empty
- if (x0 == ';') { x2 = NULL; goto &for_statement_skip_cond; }
- # Fetching cond expression
- x2 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ';') { goto &for_statement_invalid; }
- label for_statement_skip_cond;
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for iteration expressions
- if (x0 == ')') { goto &for_statement_iter_end; }
- # Holding token
- parser_lexer_hold(a0);
- label for_statement_iter_loop;
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 == ')') { goto &for_statement_invalid; }
- # Fetching expression
- x5 = expression(a0, x0);
- # Pushing expression to vector
- vector_push(x3, x5);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for another iteration expressions
- if (x0 == ',') { goto &for_statement_iter_loop; }
- label for_statement_iter_end;
- # Checking for correct syntax
- if (x0 != ')') { goto &for_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching block statement
- x4 = block_statement(a0, x0, FALSE);
- # Returning statement
- return(tuple6(STMT_FOR, a1, x1, x2, x3, x4));
- label for_statement_invalid;
- parser_error(a0, a1, "Invalid for statement syntax");
- }
- ## Parse do-while statement
- # a0 - parser
- # a1 - token location
- # @return statement
- do_while_statement : (a0, a1) {
- # x0 - next token
- # x1 - cond expression
- # x2 - block statement
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching block statement
- x2 = block_statement(a0, x0, FALSE);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != TOKEN_WHILE) { goto &do_while_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != '(') { goto &do_while_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching condition expression
- x1 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ')') { goto &do_while_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for semicolon
- if (x0 != ';') { goto &do_while_statement_end_error; }
- # Return statement
- return(tuple4(STMT_DO_WHILE, a1, x1, x2));
- label do_while_statement_invalid;
- parser_error(a0, a1, "Invalid do-while statement syntax");
- label do_while_statement_end_error;
- parser_error(a0, a1, "Do-while statement is not ended with semicolon");
- }
- ## Parse while statement
- # a0 - parser
- # a1 - token location
- # @return statement
- while_statement : (a0, a1) {
- # x0 - next token
- # x1 - cond expression
- # x2 - block statement
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != '(') { goto &while_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching condition expression
- x1 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ')') { goto &while_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching block statement
- x2 = block_statement(a0, x0, FALSE);
- # Return statement
- return(tuple4(STMT_WHILE, a1, x1, x2));
- label while_statement_invalid;
- parser_error(a0, a1, "Invalid while statement syntax");
- }
- ## Parse allocate statement
- # a0 - parser
- # a1 - token location
- # @return statement
- allocate_statement : (a0, a1) {
- # x0 - next token
- # x1 - expression
- allocate(2);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != '(') { goto &allocate_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching size expression
- x1 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ')') { goto &allocate_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for semicolon
- if (x0 != ';') { goto &loop_jump_statement_end_error; }
- # Return statement
- return(tuple3(STMT_ALLOCATE, a1, x1));
- label allocate_statement_invalid;
- parser_error(a0, a1, "Invalid allocate statement syntax");
- label allocate_statement_end_error;
- parser_error(a0, a1, "Allocate statement is not ended with semicolon");
- }
- ## Parse assembly statement
- # a0 - parser
- # a1 - token location
- # @return statement
- assembly_statement : (a0, a1) {
- # x0 - asm vector
- # x1 - next token/asm line
- # x2 - last line
- allocate(3);
- # Allocating assembly vector
- x0 = vector(0, 1);
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for correct syntax
- if (x1 != '{') { goto &assembly_statement_invalid; }
- label assembly_statement_loop;
- # Parsing assembly...
- x2 = parser_lex_asm(a0);
- # Duplicating assembly line
- x1 = strdup(lexer_token_text(parser_lexer(a0)));
- # Pushing assembly line to vector
- vector_push(x0, x1);
- # Fetching next token
- x1 = parser_lex(a0);
- # This must be a semicolon
- if (x1 != ';') { goto &assembly_statement_invalid; }
- # If we have another lines, parsing them
- if (!x2) { goto &assembly_statement_loop; }
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for correct syntax
- if (x1 != '}') { goto &assembly_statement_invalid; }
- # Returning statement
- return(tuple3(STMT_ASM, a1, x0));
- label assembly_statement_invalid;
- parser_error(a0, a1, "Invalid assembly statement syntax");
- }
- ## Parse return statement
- # a0 - parser
- # a1 - token location
- # @return statement
- return_statement : (a0, a1) {
- # x0 - ops vector
- # x1 - next token
- # x2 - expression
- allocate(3);
- # Allocating operand vector
- x0 = vector(0, 1);
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking if we are returning with no result
- if (x1 == ';') { parser_lexer_hold(a0); goto &return_statement_end; }
- # Fetching return expression
- x2 = expression(a0, x1);
- # Pushing expression to operand vector
- vector_push(x0, x2);
- label return_statement_end;
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for semicolon
- if (x1 != ';') { goto &return_statement_end_error; }
- # Return statement
- return(tuple3(STMT_RETURN, a1, x0));
- label return_statement_end_error;
- parser_error(a0, a1, "Return statement is not ended with semicolon");
- }
- ## Parse break/continue statement
- # a0 - parser
- # a1 - token location
- # a2 - statement token
- # @return statement
- loop_jump_statement : (a0, a1, a2) {
- # x0 - next token
- allocate(1);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for semicolon
- if (x0 != ';') { goto &loop_jump_statement_end_error; }
- # Return statement
- return(tuple2(STMT_BREAK + a2 - TOKEN_BREAK, a1));
- label loop_jump_statement_end_error;
- parser_error(a0, a1, "Break/continue statement is not ended with semicolon");
- }
- ## Parse writechar statement
- # a0 - parser
- # a1 - token location
- # @return statement
- writechar_statement : (a0, a1) {
- # x0 - next token
- # x1 - array expression
- # x2 - index expression
- # x3 - value expression
- allocate(4);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != '(') { goto &writechar_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching array expression
- x1 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ',') { goto &writechar_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching index expression
- x2 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ',') { goto &writechar_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching value expression
- x3 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ')') { goto &writechar_statement_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for semicolon
- if (x0 != ';') { goto &writechar_statement_end_error; }
- # Returning statement
- return(tuple5(STMT_WRITECHAR, a1, x1, x2, x3));
- label writechar_statement_invalid;
- parser_error(a0, a1, "Invalid writechar statement");
- label writechar_statement_end_error;
- parser_error(a0, a1, "Writechar statement is not ended with semicolon");
- }
- ## Parselabel statement
- # a0 - parser
- # a1 - token location
- # @return statement
- label_statement : (a0, a1) {
- # x0 - next token
- # x1 - duplicated identifier
- allocate(2);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != TOKEN_IDENTIFIER) { goto &label_statement_invalid; }
- # Duplicating identifier
- x1 = strdup(lexer_token_text(parser_lexer(a0)));
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for semicolon
- if (x0 != ';') { goto &label_statement_end_error; }
- # Returning statement
- return(tuple3(STMT_LABEL, a1, x1));
- label label_statement_invalid;
- parser_error(a0, a1, "Invalidlabel statement syntax");
- label label_statement_end_error;
- parser_error(a0, a1, "Label statement is not ended with semicolon");
- }
- ## Parse goto statement
- # a0 - parser
- # a1 - token location
- # @return statement
- goto_statement : (a0, a1) {
- # x0 - next token
- # x1 - expression
- allocate(2);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching goto expression
- x1 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for semicolon
- if (x0 != ';') { goto &label_statement_end_error; }
- # Returning statement
- return(tuple3(STMT_GOTO, a1, x1));
- label goto_statement_end_error;
- parser_error(a0, a1, "Goto statement is not ended with semicolon");
- }
- ## Parse expression
- # a0 - parser
- # a1 - current token
- # @return statement
- expression : (a0, a1) {
- # x0 - token location
- allocate(1);
- # Fetching token location
- x0 = parser_token_location(a0);
- return(assignment_expression(a0, a1, x0));
- }
- ## Try to parse assignment expression
- # a0 - parser
- # a1 - current token
- # a2 - token location
- # @return statement
- assignment_expression : (a0, a1, a2) {
- # x0 - op1
- # x1 - next token
- allocate(2);
- # Fetching first operand
- x0 = ternary_expression(a0, a1, a2);
- # Fetching next token
- x1 = parser_lex(a0);
- # Parsing assignment
- if (x1 == '=') { return(assignment_expr(a0, x0, STMT_EXPR_ASSIGN, a2)); }
- if (x1 == TOKEN_AADD) { return(assignment_expr(a0, x0, STMT_EXPR_ADD_ASSIGN, a2)); }
- if (x1 == TOKEN_ASUB) { return(assignment_expr(a0, x0, STMT_EXPR_SUB_ASSIGN, a2)); }
- if (x1 == TOKEN_AMUL) { return(assignment_expr(a0, x0, STMT_EXPR_MUL_ASSIGN, a2)); }
- if (x1 == TOKEN_ADIV) { return(assignment_expr(a0, x0, STMT_EXPR_DIV_ASSIGN, a2)); }
- if (x1 == TOKEN_AMOD) { return(assignment_expr(a0, x0, STMT_EXPR_MOD_ASSIGN, a2)); }
- if (x1 == TOKEN_AAND) { return(assignment_expr(a0, x0, STMT_EXPR_AND_ASSIGN, a2)); }
- if (x1 == TOKEN_AXOR) { return(assignment_expr(a0, x0, STMT_EXPR_XOR_ASSIGN, a2)); }
- if (x1 == TOKEN_AOR) { return(assignment_expr(a0, x0, STMT_EXPR_OR_ASSIGN, a2)); }
- if (x1 == TOKEN_ASHL) { return(assignment_expr(a0, x0, STMT_EXPR_SHL_ASSIGN, a2)); }
- if (x1 == TOKEN_ASHR) { return(assignment_expr(a0, x0, STMT_EXPR_SHR_ASSIGN, a2)); }
- # Holding token
- parser_lexer_hold(a0);
- # Returning operand
- return(x0);
- }
- ## Parse assignment expression
- # a0 - parser
- # a1 - op1
- # a2 - assign type
- # a3 - token location
- # @return statement
- assignment_expr : (a0, a1, a2, a3) {
- # x0 - next token
- # x1 - op2
- # x2 - token location
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x2 = parser_token_location(a0);
- # Fetching second operand
- x1 = ternary_expression(a0, x0, x2);
- # Returning expression
- return(tuple4(a2, a3, a1, x1));
- }
- ## Try to parse ternary expression
- # a0 - parser
- # a1 - current token
- # a2 - token location
- # @return statement
- ternary_expression : (a0, a1, a2) {
- # x0 - op1
- # x1 - next token
- allocate(2);
- # Fetching first operand
- x0 = logical_or_expression(a0, a1, NULL, a2);
- # Fetching next token
- x1 = parser_lex(a0);
- # Parsing ternary
- if (x1 == '?') { return(ternary_expr(a0, x0, a2, x2)); }
- # Holding token
- parser_lexer_hold(a0);
- # Returning operand
- return(x0);
- }
- ## Parse ternary expression
- # a0 - parser
- # a1 - op1
- # a2 - token location
- # @return statement
- ternary_expr : (a0, a1, a2) {
- # x0 - next token
- # x1 - op2
- # x2 - op3
- # x3 - token location
- allocate(4);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x3 = parser_token_location(a0);
- # Fetching second operand
- x1 = ternary_expression(a0, x0, x3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ':') { goto &ternary_expr_error; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x3 = parser_token_location(a0);
- # Fetching third operand
- x2 = ternary_expression(a0, x0, x3);
- # Returning expression
- return(tuple5(STMT_EXPR_TERNARY, a2, a1, x1, x2));
- label ternary_expr_error;
- parser_error(a0, a2, "Incorrect ternary expression syntax");
- }
- ## Try to parse logical or expression
- # a0 - parser
- # a1 - current token
- # a2 - expression
- # a3 - token location
- # @return statement
- logical_or_expression : (a0, a1, a2, a3) {
- # x0 - next token
- allocate(1);
- # Fetching first operand
- if (a2 == NULL) { a2 = logical_and_expression(a0, a1, NULL, a3); }
- # Fetching next token
- x0 = parser_lex(a0);
- # Parsing logical or
- if (x0 == TOKEN_LOR) { return(logical_or_expr(a0, a2, a3)); }
- # Holding token
- parser_lexer_hold(a0);
- # Returning operand
- return(a2);
- }
- ## Parse logical or expression
- # a0 - parser
- # a1 - op1
- # a2 - token location
- # @return statement
- logical_or_expr : (a0, a1, a2) {
- # x0 - next token
- # x1 - op2
- # x2 - token location
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x2 = parser_token_location(a0);
- # Fetching second operand
- x1 = logical_and_expression(a0, x0, NULL, x2);
- # Returning expression
- return(logical_or_expression(a0, NULL, tuple4(STMT_EXPR_LOGICAL_OR, a2, a1, x1), a2));
- }
- ## Try to parse logical and expression
- # a0 - parser
- # a1 - current token
- # a2 - expression
- # a3 - token location
- # @return statement
- logical_and_expression : (a0, a1, a2) {
- # x0 - next token
- allocate(1);
- # Fetching first operand
- if (a2 == NULL) { a2 = equality_expression(a0, a1, NULL, a3); }
- # Fetching next token
- x0 = parser_lex(a0);
- # Parsing logical and
- if (x0 == TOKEN_LAND) { return(logical_and_expr(a0, a2, a3)); }
- # Holding token
- parser_lexer_hold(a0);
- # Returning operand
- return(a2);
- }
- ## Parse logical and expression
- # a0 - parser
- # a1 - op1
- # a2 - token location
- # @return statement
- logical_and_expr : (a0, a1, a2) {
- # x0 - next token
- # x1 - op2
- # x2 - token location
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x2 = parser_token_location(a0);
- # Fetching second operand
- x1 = equality_expression(a0, x0, NULL, x2);
- # Returning expression
- return(logical_and_expression(a0, NULL, tuple4(STMT_EXPR_LOGICAL_AND, a2, a1, x1), a2));
- }
- ## Try to parse equality expression
- # a0 - parser
- # a1 - current token
- # a2 - expression
- # a3 - token location
- # @return statement
- equality_expression : (a0, a1, a2, a3) {
- # x0 - next token
- allocate(1);
- # Fetching first operand
- if (a2 == NULL) { a2 = relational_expression(a0, a1, NULL, a3); }
- # Fetching next token
- x0 = parser_lex(a0);
- # Parsing equal
- if (x0 == TOKEN_EQ) { return(equality_expr(a0, a2, STMT_EXPR_EQ, a3)); }
- # Parsing not equal
- if (x0 == TOKEN_NE) { return(equality_expr(a0, a2, STMT_EXPR_NE, a3)); }
- # Holding token
- parser_lexer_hold(a0);
- # Returning operand
- return(a2);
- }
- ## Parse equality expression
- # a0 - parser
- # a1 - op1
- # a2 - operation
- # a3 - token location
- # @return statement
- equality_expr : (a0, a1, a2, a3) {
- # x0 - next token
- # x1 - op2
- # x2 - token location
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x2 = parser_token_location(a0);
- # Fetching second operand
- x1 = relational_expression(a0, x0, NULL, x2);
- # Returning expression
- return(equality_expression(a0, NULL, tuple4(a2, a3, a1, x1), a3));
- }
- ## Try to parse relational expression
- # a0 - parser
- # a1 - current token
- # a2 - expression
- # a3 - token location
- # @return statement
- relational_expression : (a0, a1, a2, a3) {
- # x0 - next token
- allocate(1);
- # Fetching first operand
- if (a2 == NULL) { a2 = bitwise_or_expression(a0, a1, NULL, a3); }
- # Fetching next token
- x0 = parser_lex(a0);
- # Parsing less
- if (x0 == '<') { return(relational_expr(a0, a2, STMT_EXPR_LESS, a3)); }
- # Parsing greater
- if (x0 == '>') { return(relational_expr(a0, a2, STMT_EXPR_GREATER, a3)); }
- # Parsing less or equal
- if (x0 == TOKEN_LE) { return(relational_expr(a0, a2, STMT_EXPR_LE, a3)); }
- # Parsing greater or equal
- if (x0 == TOKEN_GE) { return(relational_expr(a0, a2, STMT_EXPR_GE, a3)); }
- # Holding token
- parser_lexer_hold(a0);
- # Returning operand
- return(a2);
- }
- ## Parse relational expression
- # a0 - parser
- # a1 - op1
- # a2 - operation
- # a3 - token location
- # @return statement
- relational_expr : (a0, a1, a2, a3) {
- # x0 - next token
- # x1 - op2
- # x2 - token location
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x2 = parser_token_location(a0);
- # Fetching second operand
- x1 = bitwise_or_expression(a0, x0, NULL, x2);
- # Returning expression
- return(relational_expression(a0, NULL, tuple4(a2, a3, a1, x1), a3));
- }
- ## Try to parse bitwise or expression
- # a0 - parser
- # a1 - current token
- # a2 - expression
- # a3 - token location
- # @return statement
- bitwise_or_expression : (a0, a1, a2, a3) {
- # x0 - next token
- allocate(1);
- # Fetching first operand
- if (a2 == NULL) { a2 = bitwise_xor_expression(a0, a1, NULL, a3); }
- # Fetching next token
- x0 = parser_lex(a0);
- # Parsing bitwise or
- if (x0 == '|') { return(bitwise_or_expr(a0, a2, a3)); }
- # Holding token
- parser_lexer_hold(a0);
- # Returning operand
- return(a2);
- }
- ## Parse bitwise or expression
- # a0 - parser
- # a1 - op1
- # a2 - token location
- # @return statement
- bitwise_or_expr : (a0, a1, a2) {
- # x0 - next token
- # x1 - op2
- # x2 - token location
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x2 = parser_token_location(a0);
- # Fetching second operand
- x1 = bitwise_xor_expression(a0, x0, NULL, x2);
- # Returning expression
- return(bitwise_or_expression(a0, NULL, tuple4(STMT_EXPR_OR, a2, a1, x1), a2));
- }
- ## Try to parse bitwise xor expression
- # a0 - parser
- # a1 - current token
- # a2 - expression
- # a3 - token location
- # @return statement
- bitwise_xor_expression : (a0, a1, a2, a3) {
- # x0 - next token
- allocate(1);
- # Fetching first operand
- if (a2 == NULL) { a2 = bitwise_and_expression(a0, a1, NULL, a3); }
- # Fetching next token
- x0 = parser_lex(a0);
- # Parsing bitwise xor
- if (x0 == '^') { return(bitwise_xor_expr(a0, a2, a3)); }
- # Holding token
- parser_lexer_hold(a0);
- # Returning operand
- return(a2);
- }
- ## Parse bitwise xor expression
- # a0 - parser
- # a1 - op1
- # a2 - token location
- # @return statement
- bitwise_xor_expr : (a0, a1, a2) {
- # x0 - next token
- # x1 - op2
- # x2 - token location
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x2 = parser_token_location(a0);
- # Fetching second operand
- x1 = bitwise_and_expression(a0, x0, NULL, x2);
- # Returning expression
- return(bitwise_xor_expression(a0, NULL, tuple4(STMT_EXPR_XOR, a2, a1, x1), a2));
- }
- ## Try to parse bitwise and expression
- # a0 - parser
- # a1 - current token
- # a2 - expression
- # a3 - token location
- # @return statement
- bitwise_and_expression : (a0, a1, a2, a3) {
- # x0 - next token
- allocate(1);
- # Fetching first operand
- if (a2 == NULL) { a2 = bitwise_shift_expression(a0, a1, NULL, a3); }
- # Fetching next token
- x0 = parser_lex(a0);
- # Parsing bitwise and
- if (x0 == '&') { return(bitwise_and_expr(a0, a2, a3)); }
- # Holding token
- parser_lexer_hold(a0);
- # Returning operand
- return(a2);
- }
- ## Parse bitwise and expression
- # a0 - parser
- # a1 - op1
- # a2 - token location
- # @return statement
- bitwise_and_expr : (a0, a1, a2) {
- # x0 - next token
- # x1 - op2
- # x2 - token location
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x2 = parser_token_location(a0);
- # Fetching second operand
- x1 = bitwise_shift_expression(a0, x0, NULL, x2);
- # Returning expression
- return(bitwise_and_expression(a0, NULL, tuple4(STMT_EXPR_AND, a2, a1, x1), a2));
- }
- ## Try to parse bitwise shift expression
- # a0 - parser
- # a1 - current token
- # a2 - expression
- # a3 - token location
- # @return statement
- bitwise_shift_expression : (a0, a1, a2, a3) {
- # x0 - next token
- allocate(1);
- # Fetching first operand
- if (a2 == NULL) { a2 = additive_expression(a0, a1, NULL, a3); }
- # Fetching next token
- x0 = parser_lex(a0);
- # Parsing left shift
- if (x0 == TOKEN_SHL) { return(bitwise_shift_expr(a0, a2, STMT_EXPR_SHL, a3)); }
- # Parsing right shift
- if (x0 == TOKEN_SHR) { return(bitwise_shift_expr(a0, a2, STMT_EXPR_SHR, a3)); }
- # Holding token
- parser_lexer_hold(a0);
- # Returning operand
- return(a2);
- }
- ## Parse bitwise shift expression
- # a0 - parser
- # a1 - op1
- # a2 - operation
- # a3 - token location
- # @return statement
- bitwise_shift_expr : (a0, a1, a2, a3) {
- # x0 - next token
- # x1 - op2
- # x2 - token location
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x2 = parser_token_location(a0);
- # Fetching second operand
- x1 = additive_expression(a0, x0, NULL, x2);
- # Returning expression
- return(bitwise_shift_expression(a0, NULL, tuple4(a2, a3, a1, x1), a3));
- }
- ## Try to parse additive expression
- # a0 - parser
- # a1 - current token
- # a2 - expression
- # a3 - token location
- # @return statement
- additive_expression : (a0, a1, a2, a3) {
- # x0 - next token
- allocate(1);
- # Fetching first operand
- if (a2 == NULL) { a2 = multiplicative_expression(a0, a1, NULL, a3); }
- # Fetching next token
- x0 = parser_lex(a0);
- # Parsing addition
- if (x0 == '+') { return(additive_expr(a0, a2, STMT_EXPR_ADD, a3)); }
- # Parsing subtraction
- if (x0 == '-') { return(additive_expr(a0, a2, STMT_EXPR_SUB, a3)); }
- # Holding token
- parser_lexer_hold(a0);
- # Returning operand
- return(a2);
- }
- ## Parse additive expression
- # a0 - parser
- # a1 - op1
- # a2 - operation
- # a3 - token location
- # @return statement
- additive_expr : (a0, a1, a2, a3) {
- # x0 - next token
- # x1 - op2
- # x2 - token location
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x2 = parser_token_location(a0);
- # Fetching second operand
- x1 = multiplicative_expression(a0, x0, NULL, x2);
- # Returning expression
- return(additive_expression(a0, NULL, tuple4(a2, a3, a1, x1), a3));
- }
- ## Try to parse multiplicative expression
- # a0 - parser
- # a1 - current token
- # a2 - expression
- # a3 - token location
- # @return statement
- multiplicative_expression : (a0, a1, a2, a3) {
- # x0 - next token
- allocate(1);
- # Fetching first operand
- if (a2 == NULL) { a2 = prefix_expression(a0, a1, a3); }
- # Fetching next token
- x0 = parser_lex(a0);
- # Parsing multiplication
- if (x0 == '*') { return(multiplicative_expr(a0, a2, STMT_EXPR_MUL, a3)); }
- # Parsing integer division
- if (x0 == '/') { return(multiplicative_expr(a0, a2, STMT_EXPR_DIV, a3)); }
- # Parsing modulus division
- if (x0 == '%') { return(multiplicative_expr(a0, a2, STMT_EXPR_MOD, a3)); }
- # Holding token
- parser_lexer_hold(a0);
- # Returning operand
- return(a2);
- }
- ## Parse multiplicative expression
- # a0 - parser
- # a1 - op1
- # a2 - operation
- # a3 - token location
- # @return statement
- multiplicative_expr : (a0, a1, a2, a3) {
- # x0 - next token
- # x1 - op2
- # x2 - token location
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x2 = parser_token_location(a0);
- # Fetching second operand
- x1 = prefix_expression(a0, x0, x2);
- # Returning expression
- return(multiplicative_expression(a0, NULL, tuple4(a2, a3, a1, x1), a3));
- }
- ## Try to parse prefix expression
- # a0 - parser
- # a1 - current token
- # a2 - token location
- # @return statement
- prefix_expression : (a0, a1, a2) {
- if (a1 == '+') { return(prefix_expr(a0, STMT_EXPR_PLUS, a2)); }
- else if (a1 == '-') { return(prefix_expr(a0, STMT_EXPR_MINUS, a2)); }
- else if (a1 == '!') { return(prefix_expr(a0, STMT_EXPR_LOGICAL_NOT, a2)); }
- else if (a1 == '~') { return(prefix_expr(a0, STMT_EXPR_NOT, a2)); }
- else if (a1 == '*') { return(prefix_expr(a0, STMT_EXPR_DEREF, a2)); }
- else if (a1 == '&') { return(prefix_expr(a0, STMT_EXPR_ADDROF, a2)); }
- else if (a1 == TOKEN_DEC) { return(prefix_expr(a0, STMT_EXPR_PREDEC, a2)); }
- else if (a1 == TOKEN_INC) { return(prefix_expr(a0, STMT_EXPR_PREINC, a2)); }
- else { return(simple_expression(a0, a1, a2)); }
- }
- ## Parse prefix expression
- # a0 - parser
- # a1 - operation
- # a2 - token location
- # @return statement
- prefix_expr : (a0, a1, a2) {
- # x0 - token
- # x1 - token location
- # x2 - op1
- allocate(3);
- # Fetching token
- x0 = parser_lex(a0);
- # Fetching token location
- x1 = parser_token_location(a0);
- # Fetching operand
- x2 = prefix_expression(a0, x0, x1);
- # Returning expression
- return(tuple3(a1, a2, x2));
- }
- ## Try to parse simple expression
- # a0 - parser
- # a1 - current token
- # a2 - token location
- # @return statement
- simple_expression : (a0, a1, a2) {
- # Parsing identifier
- if (a1 == TOKEN_IDENTIFIER) { return(simple_identifier_expression(a0, a2)); }
- # Parsing argument
- if (a1 == TOKEN_ARGUMENT) { return(simple_argvar_expression(a0, TRUE, a2)); }
- # Parsing variable
- if (a1 == TOKEN_VARIABLE) { return(simple_argvar_expression(a0, FALSE, a2)); }
- # Parsing syscall
- if (a1 == TOKEN_SYSCALL) { return(simple_syscall_expression(a0, a2)); }
- # Parsing readchar
- if (a1 == TOKEN_READCHAR) { return(simple_readchar_expression(a0, a2)); }
- # Parsing parantheses grouping
- if (a1 == '(') { return(simple_parantheses_expression(a0, a2)); }
- # Parsing constant expression
- return(simple_postfix_expression(a0, constant_expression(a0, a1, a2)));
- }
- ## Parse simple identifier expression
- # a0 - parser
- # a1 - token location
- # @return statement
- simple_identifier_expression : (a0, a1) {
- # x0 - duplicated identifier string
- allocate(1);
- # Duplicating the identifier
- x0 = strdup(lexer_token_text(parser_lexer(a0)));
- # Returning expression
- return(simple_postfix_expression(a0, tuple3(STMT_EXPR_IDENTIFIER, a1, x0)));
- }
- ## Parse simple argument/variable expression
- # a0 - parser
- # a1 - is argument
- # a2 - token location
- # @return statement
- simple_argvar_expression : (a0, a1, a2) {
- # x0 - expression type
- allocate(1);
- # Getting expression type
- x0 = STMT_EXPR_VARIABLE;
- if (a1) { x0 = STMT_EXPR_ARGUMENT; }
- # Returning expression
- return(simple_postfix_expression(a0, tuple3(x0, a2, lexer_token_value(parser_lexer(a0)))));
- }
- ## Parse simple syscall expression
- # a0 - parser
- # a1 - token location
- # @return statement
- simple_syscall_expression : (a0, a1) {
- # x0 - ops vector
- # x1 - token
- # x2 - expression
- allocate(3);
- # Allocating operand vector
- x0 = vector(0, 1);
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for correct syntax
- if (x1 != '(') { goto &simple_syscall_expression_invalid; }
- label simple_syscall_expression_loop;
- # Fetching next token
- x1 = parser_lex(a0);
- # Fetching expression
- x2 = expression(a0, x1);
- # Pushing expression to operand vector
- vector_push(x0, x2);
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking if there is next expression as argument
- if (x1 == ',') { goto &simple_syscall_expression_loop; }
- # Checking for correct syntax
- if (x1 != ')') { goto &simple_syscall_expression_invalid; }
- # Checking for argument count
- if (vector_size(x0) > 7) { goto &simple_syscall_expression_too_many; }
- # Returning expression
- return(simple_postfix_expression(a0, tuple3(STMT_EXPR_SYSCALL, a1, x0)));
- label simple_syscall_expression_invalid;
- parser_error(a0, a1, "Invalid syscall expression");
- label simple_syscall_expression_too_many;
- parser_error(a0, a1, "Too many arguments in syscall expression");
- }
- ## Parse simple readchar expression
- # a0 - parser
- # a1 - token location
- # @return statement
- simple_readchar_expression : (a0, a1) {
- # x0 - next token
- # x1 - array expression
- # x2 - index expression
- allocate(3);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != '(') { goto &simple_readchar_expression_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching array expression
- x1 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ',') { goto &simple_readchar_expression_invalid; }
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching index expression
- x2 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ')') { goto &simple_readchar_expression_invalid; }
- # Returning expression
- return(simple_postfix_expression(a0, tuple4(STMT_EXPR_READCHAR, a1, x1, x2)));
- label simple_readchar_expression_invalid;
- parser_error(a0, a1, "Invalid readchar expression");
- }
- ## Parse simple parantheses expression
- # a0 - parser
- # a1 - token location
- # @return statement
- simple_parantheses_expression : (a0, a1) {
- # x0 - next token
- # x1 - expression
- allocate(2);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching expression in parantheses
- x1 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking parantheses are closed
- if (x0 != ')') { goto &simple_parantheses_expression_invalid; }
- # Returning expression
- return(simple_postfix_expression(a0, x1));
- label simple_parantheses_expression_invalid;
- parser_error(a0, a1, "Parantheses expression is not closed");
- }
- ## Parse simple expression postfixes
- # a0 - parser
- # a1 - expression
- # @return statement
- simple_postfix_expression : (a0, a1) {
- # x0 - next token
- # x1 - token location
- allocate(2);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching token location
- x1 = parser_token_location(a0);
- if (x0 == '(') { return(simple_address_call_expression(a0, a1, x1)); }
- else if (x0 == '[') { return(simple_address_index_expression(a0, a1, x1)); }
- else if (x0 == TOKEN_DEC) { return(simple_postfix_expression(a0, tuple3(STMT_EXPR_POSTDEC, x1, a1))); }
- else if (x0 == TOKEN_INC) { return(simple_postfix_expression(a0, tuple3(STMT_EXPR_POSTINC, x1, a1))); }
- # Holding the token
- parser_lexer_hold(a0);
- # Freeing unused token location
- free(x1);
- # Returning expression
- return(a1);
- }
- ## Parse simple address call expression
- # a0 - parser
- # a1 - expression
- # a2 - token location
- # @return statement
- simple_address_call_expression : (a0, a1, a2) {
- # x0 - ops vector
- # x1 - next token
- # x2 - expression
- allocate(3);
- # Allocating operand vector
- x0 = vector(0, 1);
- # Pushing expression to operand vector
- vector_push(x0, a1);
- label simple_address_call_expression_loop;
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking if arguments ended
- if (x1 == ')') { goto &simple_address_call_expression_end; }
- # Fetching expression
- x2 = expression(a0, x1);
- # Pushing expression to operand vector
- vector_push(x0, x2);
- # Fetching next token
- x1 = parser_lex(a0);
- # Checking for next argument
- if (x1 == ',') { goto &simple_address_call_expression_loop; }
- # Checking for correct syntax
- if (x1 != ')') { goto &simple_address_call_expression_invalid; }
- label simple_address_call_expression_end;
- # Returning expression
- return(simple_postfix_expression(a0, tuple3(STMT_EXPR_CALL, a2, x0)));
- label simple_address_call_expression_invalid;
- parser_error(a0, a2, "Call expression is not closed");
- }
- ## Parse simple address indexing expression
- # a0 - parser
- # a1 - expression
- # a2 - token location
- # @return statement
- simple_address_index_expression : (a0, a1, a2) {
- # x0 - next token
- # x1 - expression
- allocate(2);
- # Fetching next token
- x0 = parser_lex(a0);
- # Fetching expression
- x1 = expression(a0, x0);
- # Fetching next token
- x0 = parser_lex(a0);
- # Checking for correct syntax
- if (x0 != ']') { goto &simple_address_index_expression_invalid; }
- # Returning expression
- return(simple_postfix_expression(a0, tuple4(STMT_EXPR_INDEX, a2, a1, x1)));
- label simple_address_index_expression_invalid;
- parser_error(a0, a2, "Index expression is not closed");
- }
- ## Try to parse constant expression
- # a0 - parser
- # a1 - current token
- # a2 - token location
- # @return statement
- constant_expression : (a0, a1, a2) {
- # Parsing integer
- if (a1 == TOKEN_INTEGER) { return(constant_integer_expression(a0, a2)); }
- # Parsing string
- if (a1 == TOKEN_STRING) { return(constant_string_expression(a0, a2)); }
- parser_error(a0, a2, "Syntax error - none of available expressions matched");
- }
- ## Parse constant integer expression
- # a0 - parser
- # a1 - token location
- # @return statement
- constant_integer_expression : (a0, a1) {
- # Returning basic integer expression
- return(tuple3(STMT_EXPR_INTEGER, a1, lexer_token_value(parser_lexer(a0))));
- }
- ## Parse constant string expression
- # a0 - parser
- # a1 - token location
- # @return statement
- constant_string_expression : (a0, a1) {
- # x0 - duplicated string
- allocate(1);
- # Fetching lexer text buffer address
- x0 = lexer_token_text(parser_lexer(a0));
- # Returning expression
- return(tuple3(STMT_EXPR_STRING, a1, string_buffer_push(parser_buffer(a0), x0)));
- }
- ### String buffer
- ## String buffer constants
- STRBUF_SIZE => 16384;
- ## String buffer parts
- STRBUF_BUFFER => 0;
- STRBUF_OFFSET => 1;
- STRBUF_LENGTH => 2;
- ## Create new string buffer
- # @return buffer
- string_buffer : () { return(tuple3(calloc(STRBUF_SIZE), 0, 0)); }
- ## Get string buffer offset
- # a0 - string buffer
- # @return offset
- string_buffer_offset : (a0) { return(a0[STRBUF_OFFSET]); }
- ## Get string buffer length
- # a0 - string buffer
- # @return offset
- string_buffer_length : (a0) { return(a0[STRBUF_LENGTH]); }
- ## Find string in string buffer
- # a0 - string buffer
- # a1 - string
- # @return string offset
- string_buffer_find : (a0, a1) {
- # x0 - string buffer address
- # x1 - string buffer length
- # x2 - string buffer offset
- # x3 - string offset
- # x4 - string address
- allocate(5);
- # Fetching string buffer length
- x1 = string_buffer_length(a0);
- # If string buffer is empty, don't even try to search string
- if (x1 != 0) {
- # Fetching string buffer address
- x0 = a0[STRBUF_BUFFER];
- # Iterating through strings
- x2 = 0;
- x3 = 0;
- while (x2 < x1) {
- # Calculating string address
- x4 = x0 + x2;
- # Comparing string. If matched, return offset
- if (strcmp(x4, a1) == 0) { return(x3); }
- # String didn't match. Moving to next string
- x2 = x2 + strlen(x4) + 1;
- x3 = x3 + stresclen(x4) - 1;
- }
- }
- # We didn't find the string.
- return(-1);
- }
- ## Find or push string to string buffer
- # a0 - string buffer
- # a1 - string
- # @return string offset
- string_buffer_push : (a0, a1) {
- # x0 - string offset
- allocate(3);
- # Trying to find string in buffer
- x0 = string_buffer_find(a0, a1);
- # If not -1, return offset
- if (x0 != -1) { return(x0); }
- ## We are here, because we didn't find string. Pushing string to buffer
- # x0 - old offset
- # x1 - old length
- # x2 - string length
- # Fetching string buffer offset
- x0 = string_buffer_offset(a0);
- # Fetching string buffer length
- x1 = string_buffer_length(a0);
- # Getting length of string
- x2 = strlen(a1);
- # If buffer can't hold new string, printing error.
- # TODO: when `free` function will be done, reallocate buffer. For now it can
- # take too much memory for our purposes.
- if (x1 + x2 >= STRBUF_SIZE) {
- eputs("ERROR: string buffer is full.\n");
- exit(1);
- }
- # Copying string to buffer
- strcpy(a1, a0[STRBUF_BUFFER] + x1);
- # Setting new offset and length (strings are with "")
- a0[STRBUF_OFFSET] = x0 + stresclen(a1) - 1;
- a0[STRBUF_LENGTH] = x1 + x2 + 1;
- # Returning old offset
- return(x0);
- }
- ## Export string buffer to output channel
- # a0 - string buffer
- # a1 - output channel
- string_buffer_export : (a0, a1) {
- # x0 - string buffer address
- # x1 - string buffer length
- # x2 - offset
- allocate(3);
- # Fetching string buffer length
- x1 = string_buffer_length(a0);
- # Exporting if string buffer is not empty
- if (x1 != 0) {
- # Fetching string buffer address
- x0 = a0[STRBUF_BUFFER];
- # Creating `strbuf` label
- fputs(a1, "strbuf:\n");
- # Iterating through strings
- x2 = 0;
- while (x2 < x1) {
- # Writing .string macro
- fputs(a1, ".string ");
- # Writing string
- fputs(a1, x0 + x2);
- fputc(a1, '\n');
- # Moving to new offset
- x2 = x2 + strlen(x0 + x2) + 1;
- }
- }
- }
- ### Compiler
- ## Compiler parts
- COM_OCHAN => 0;
- COM_STRBUF => 1;
- COM_LABEL => 2;
- COM_SEGMENT => 3;
- COM_REGFLAGS => 4;
- COM_LOOP => 5;
- ## Segment flags
- SEG_TEXT => 0;
- SEG_DATA => 1;
- SEG_RODATA => 2;
- ## Register usage flags
- REG_EAX => 0;
- REG_EBX => 1;
- REG_ECX => 2;
- REG_EDX => 3;
- REG_ESI => 4;
- REG_EDI => 5;
- # Register list (%ebp used only for syscalls)
- registers : [ "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp" ];
- ## Create new compiler
- # a0 - output chan
- # a1 - string buffer
- # @return compiler
- compiler : (a0, a1) {
- return(tuple6(a0, a1, 0, SEG_TEXT, tuple6(0,0,0,0,0,0), list(NULL)));
- }
- ## Get compiler's output channel
- # a0 - compiler
- # @return output channel
- compiler_output_chan : (a0) { return(a0[COM_OCHAN]); }
- ## Get compiler's current label ID
- # a0 - compiler
- # @return label ID
- compiler_label_id : (a0) { return(a0[COM_LABEL]); }
- ## Set new segment if needed
- # a0 - compiler
- # a1 - segment
- compiler_set_segment : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting compiler output channel
- x0 = compiler_output_chan(a0);
- # Checking if we already in this segment
- if (a0[COM_SEGMENT] == a1) { return; }
- # Switching segment in compiler
- a0[COM_SEGMENT] = a1;
- # Printing required text, to switch segment
- if (a1 == SEG_TEXT) { fputs(x0, ".text\n"); }
- if (a1 == SEG_DATA) { fputs(x0, ".data\n"); }
- if (a1 == SEG_RODATA) { fputs(x0, ".section .rodata\n"); }
- }
- ## Prepare register for use
- # a0 - compiler
- # a1 - register
- compiler_allocate_register : (a0, a1) {
- # x0 - compiler output channel
- # x1 - registers state
- allocate(2);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Fetching register state
- x1 = a0[COM_REGFLAGS];
- # Checking if we need to push register to stack (do this, if regstate != 0)
- if (x1[a1] != 0) {
- # Printing push instruction
- fputs(x0, "\tpushl ");
- # Printing register
- fputs(x0, registers[a1]);
- # Printing newline
- fputc(x0, '\n');
- }
- # Increment register state by one
- x1[a1] = x1[a1] + 1;
- }
- ## Restore register state, if it is needed
- # a0 - compiler
- # a1 - register
- compiler_free_register : (a0, a1) {
- # x0 - compiler output channel
- # x1 - register state
- allocate(2);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Fetching register state
- x1 = a0[COM_REGFLAGS];
- # Checking, if register was never allocated
- assert(x1[a1] != 0, "Tried to free not used register.");
- # Checking if we need to pop register from stack (do this, if regstate != 1)
- if (x1[a1] != 1) {
- # Printing push instruction
- fputs(x0, "\tpopl ");
- # Printing register
- fputs(x0, registers[a1]);
- # Printing newline
- fputc(x0, '\n');
- }
- # Decrement register state by one
- x1[a1] = x1[a1] - 1;
- }
- ## Set new label ID
- # a0 - compiler
- # @return old label ID
- compiler_new_label : (a0) {
- # x0 - old label ID
- allocate(1);
- # Fetching label ID
- x0 = compiler_label_id(a0);
- # Incrementing label ID
- a0[COM_LABEL] = x0 + 1;
- # Returning old label ID
- return(x0);
- }
- ## Get current loop
- # a0 - compiler
- # @return loop tuple
- compiler_loop : (a0) { return(list_value(a0[COM_LOOP])); }
- ## Push new loop to list
- # a0 - compiler
- # a1 - label to start of loop
- # a2 - label to end of loop
- compiler_push_loop : (a0, a1, a2) {
- # x0 - loop tuple
- allocate(1);
- # Creating tuple
- x0 = tuple2(a1, a2);
- # Pushing loop to list
- a0[COM_LOOP] = list_insert(a0[COM_LOOP], x0);
- }
- ## Pop loop from list
- # a0 - compiler
- compiler_pop_loop : (a0, a1, a2) {
- # x0 - loop tuple
- allocate(1);
- # Fetching loop from list
- x0 = list_value(a0[COM_LOOP]);
- # Checking if list wasn't empty
- assert(x0 != NULL, "Tried to pop loop from empty list.");
- # Popping
- a0[COM_LOOP] = list_pop(a0[COM_LOOP]);
- }
- ## Print label
- # a0 - output chan
- # a1 - label ID
- fputlabel : (a0, a1) {
- # Printing the `_label` heading
- fputs(a0, "_label");
- # Printing the ID of label
- fputd(a0, a1);
- }
- ## Print label definition
- # a0 - output chan
- # a1 - label ID
- fputlabeldef : (a0, a1) {
- # Printing label
- fputlabel(a0, a1);
- # Printing definition
- fputs(a0, ":\n");
- }
- ## Print compiler error
- # a0 - compiler
- # a1 - statement
- # a2 - error message
- compiler_error : (a0, a1, a2) {
- eputs("ERROR: ");
- fputloc(a1[1], stderr);
- eputs(": ");
- eputs(a2);
- eputc('\n');
- }
- ## Compile AST
- # a0 - compiler
- # a1 - AST
- compiler_compile : (a0, a1) {
- # x0 - i
- allocate(1);
- # Using macros in AST instead of matching identifiers
- compiler_use_macros(a0, a1);
- # Resetting index
- x0 = 0;
- # Iterating through statements
- while (x0 < vector_size(a1)) {
- compile_top_level_statement(a0, vector_get(a1, x0));
- x0 = x0 + 1;
- }
- # Changing segment to .rodata
- compiler_set_segment(a0, SEG_RODATA);
- # Exporting string buffer
- string_buffer_export(a0[COM_STRBUF], a0[COM_OCHAN]);
- }
- ## Use macros instead of identifiers matching their name
- # a0 - compiler
- # a1 - AST
- compiler_use_macros : (a0, a1) {
- # x0 - i
- # x1 - statement
- # x2 - j
- allocate(3);
- # Resetting index
- x0 = 0;
- # Iterating through statements
- while (x0 < vector_size(a1)) {
- # Getting current statement
- x1 = vector_get(a1, x0);
- # Checking if current statement is a macro
- if (x1[NODE_TYPE] == STMT_MACRO) {
- # Looping through all next statements
- x2 = x0 + 1;
- while (x2 < vector_size(a1)) {
- # Using macro in statement
- compiler_use_macro(a0, vector_get(a1, x2), x1);
- x2 = x2 + 1;
- }
- }
- # Moving to next statement
- x0 = x0 + 1;
- }
- }
- ## Use macro in statement's children
- # a0 - compiler
- # a1 - statement
- # a2 - macro statement
- compiler_use_macro : (a0, a1, a2) {
- # x0 - statement ID
- # x1 - children pos
- # x2 - children count
- # x3 - child
- allocate(4);
- # Presetting default values
- x1 = 2; # Children in tuples start from second element in most cases
- # Fetching statement ID
- x0 = a1[NODE_TYPE];
- ### Checking if we have statement, that has children
- # Vectored expressions
- if (x0 == STMT_EXPR_SYSCALL) { goto &compiler_use_macro_vector; }
- if (x0 == STMT_EXPR_CALL) { goto &compiler_use_macro_vector; }
- # Not vectored expressions
- if (x0 == STMT_EXPR_READCHAR) { x2 = 2; goto &compiler_use_macro_tuple; }
- if (x0 == STMT_EXPR_INDEX) { x2 = 2; goto &compiler_use_macro_tuple; }
- # Unary operators
- ## From STMT_EXPR_PLUS to STMT_EXPR_ADDROF
- if (x0 >= STMT_EXPR_PLUS && x0 <= STMT_EXPR_ADDROF
- || x0 >= STMT_EXPR_PREDEC && x0 <= STMT_EXPR_POSTINC) {
- x2 = 1;
- goto &compiler_use_macro_tuple;
- }
- # Binary operators
- ## From STMT_EXPR_MUL to STMT_EXPR_OR_ASSIGN
- if (x0 >= STMT_EXPR_MUL && x0 <= STMT_EXPR_OR_ASSIGN) {
- x2 = 2;
- goto &compiler_use_macro_tuple;
- }
- # Ternary operator
- if (x0 == STMT_EXPR_TERNARY) { x2 = 3; goto &compiler_use_macro_tuple; }
- # Statements
- ## Vectored statements
- if (x0 == STMT_IF) { goto &compiler_use_macro_vector; }
- if (x0 == STMT_BLOCK) { goto &compiler_use_macro_vector; }
- if (x0 == STMT_RETURN) { goto &compiler_use_macro_vector; }
- if (x0 == STMT_ARRAY_DECL) { x1 = 3; goto &compiler_use_macro_vector; }
- ## Not vectored statements
- if (x0 == STMT_ALLOCATE) { x2 = 1; goto &compiler_use_macro_tuple; }
- if (x0 == STMT_FOR) { x2 = 4; goto &compiler_use_macro_tuple; }
- if (x0 == STMT_DO_WHILE) { x2 = 2; goto &compiler_use_macro_tuple; }
- if (x0 == STMT_WHILE) { x2 = 2; goto &compiler_use_macro_tuple; }
- if (x0 == STMT_WRITECHAR) { x2 = 3; goto &compiler_use_macro_tuple; }
- if (x0 == STMT_GOTO) { x2 = 1; goto &compiler_use_macro_tuple; }
- if (x0 == STMT_MACRO) { x1 = 3; x2 = 1; goto &compiler_use_macro_tuple; }
- if (x0 == STMT_FUN_DECL) { x1 = 4; x2 = 1; goto &compiler_use_macro_tuple; }
- if (x0 == STMT_CHAR_ARRAY_DECL) { x1 = 3; x2 = 1; goto &compiler_use_macro_tuple; }
- if (x0 == STMT_INT_ARRAY_DECL) { x1 = 3; x2 = 1; goto &compiler_use_macro_tuple; }
- if (x0 == STMT_VAR_DECL) { x1 = 3; x2 = 1; goto &compiler_use_macro_tuple; }
- # If we are here, we have expression that shouldn't apply macro. Exitting.
- label compiler_use_macro_exit;
- return;
- label compiler_use_macro_tuple;
- # x0 - index
- x0 = 0; # Resetting index
- while (x0 < x2) {
- # Fetching child
- x3 = a1[x1 + x0];
- # Checking if we can apply macro
- if (x3) {
- if (x3[NODE_TYPE] == STMT_EXPR_IDENTIFIER) {
- # Checking if identifier and macro names are equal
- if (strcmp(x3[2], a2[2]) == 0) {
- # Replacing child with macro
- a1[x1 + x0] = a2[3];
- }
- } else {
- # Maybe it's another statement/expression with children?
- compiler_use_macro(a0, x3, a2);
- }
- }
- # Moving to next child
- x0 = x0 + 1;
- }
- goto &compiler_use_macro_exit;
- label compiler_use_macro_vector;
- # x0 - index
- x0 = 0; # Resetting index
- while (x0 < vector_size(a1[x1])) {
- # Fetching child
- x3 = vector_get(a1[x1], x0);
- # Checking if we can apply macro
- if (x3[NODE_TYPE] == STMT_EXPR_IDENTIFIER) {
- # Checking if identifier and macro names are equal
- if (strcmp(x3[2], a2[2]) == 0) {
- # Replacing child with macro
- vector_set(a1[x1], x0, a2[3]);
- }
- } else {
- # Maybe it's another statement/expression with children?
- compiler_use_macro(a0, x3, a2);
- }
- # Moving to next child
- x0 = x0 + 1;
- }
- goto &compiler_use_macro_exit;
- }
- ## Compile top level statement
- # a0 - compiler
- # a1 - top level statement
- compile_top_level_statement : (a0, a1) {
- # x0 - statement ID
- allocate(1);
- # Getting statement ID
- x0 = a1[NODE_TYPE];
- if (x0 == STMT_GLOBAL) { compile_global_statement(a0, a1); }
- else if (x0 == STMT_FUN_DECL) { compile_function_declaration(a0, a1); }
- else if (x0 == STMT_ARRAY_DECL) { compile_array_declaration(a0, a1); }
- else if (x0 == STMT_CHAR_ARRAY_DECL) { compile_type_array_declaration(a0, a1, 1); }
- else if (x0 == STMT_INT_ARRAY_DECL) { compile_type_array_declaration(a0, a1, 4); }
- else if (x0 == STMT_VAR_DECL) { compile_variable_declaration(a0, a1); }
- }
- ## Compile global statement
- # a0 - compiler
- # a1 - statement
- compile_global_statement : (a0, a1) {
- # x0 - compiler output channel
- # x1 - counter
- allocate(2);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Writing .global macro
- fputs(x0, ".global ");
- # Iterating through identifiers
- x1 = 0;
- while (x1 < vector_size(a1[2])) {
- if (x1 != 0) { fputc(x0, ','); }
- fputs(x0, vector_get(a1[2], x1));
- x1 = x1 + 1;
- }
- # Writing new line to finish macro
- fputc(x0, '\n');
- }
- ## Compile function declaration
- # a0 - compiler
- # a1 - statement
- compile_function_declaration : (a0, a1) {
- # x0 - compiler output channel
- # x1 - block statement
- allocate(2);
- # Getting compiler output channel
- x0 = compiler_output_chan(a0);
- # Fetching block statement
- x1 = a1[4];
- # Changing segment to .text, if needed
- compiler_set_segment(a0, SEG_TEXT);
- # Writing function label
- fputs(x0, a1[2]);
- fputs(x0, ":\n");
- # Backing up stack pointer
- fputs(x0, "\tpushl %ebp\n");
- fputs(x0, "\tmovl %esp, %ebp\n");
- # Compiling our function block
- compile_block_statement(a0, x1);
- # `leave` and `ret` if needed
- compile_function_declaration_return(x0, x1);
- }
- ## Compile return function instructions, if needed
- # a0 - compiler output channel
- # a1 - block statement
- compile_function_declaration_return : (a0, a1) {
- # x0 - statements
- # x1 - last statement
- # x2 - last statement type
- allocate(3);
- # Fetching statements in block
- x0 = a1[2];
- # Checking if statements vector is empty
- if (vector_size(x0) == 0) { goto &compile_function_declaration_return_print; }
- # Fetching last statement in block( we can change AST now, as we compiled it)
- x1 = vector_pop(x0);
- x2 = x1[NODE_TYPE];
- # If last statement is return, returning :D
- if (x2 == STMT_RETURN) { return; }
- label compile_function_declaration_return_print;
- # If we are here, we need to print return things :3
- fputs(a0, "\tleave\n\tret\n");
- }
- ## Compile array declaration
- # a0 - compiler
- # a1 - statement
- compile_array_declaration : (a0, a1) {
- # x0 - compiler output channel
- # x1 - index
- allocate(2);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Changing segment to .data, if needed
- compiler_set_segment(a0, SEG_DATA);
- # Creating array label
- fputs(x0, a1[2]);
- fputs(x0, ":.long ");
- # Compiling array values
- x1 = 0;
- while (x1 < vector_size(a1[3])) {
- if (x1 != 0) { fputc(x0, ','); }
- compile_simple_value(a0, vector_get(a1[3], x1));
- x1 = x1 + 1;
- }
- # Writing newline to end .long macro
- fputc(x0, '\n');
- }
- ## Compile type array declaration
- # a0 - compiler
- # a1 - statement
- # a2 - type size
- compile_type_array_declaration : (a0, a1, a2) {
- # x0 - compiler output channel
- # x1 - size expression
- allocate(2);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Fetching size expression
- x1 = a1[3];
- # Checking we have integer in size expression
- if (x1[NODE_TYPE] != STMT_EXPR_INTEGER) {
- compiler_error(a0, x2, "Only integers can be used for size of array");
- }
- # Changing segment to .data, if needed
- compiler_set_segment(a0, SEG_DATA);
- # Writing .comm macro
- fputs(x0, ".comm ");
- # Writing array name
- fputs(x0, a1[2]);
- # Writing array size
- fputc(x0, ',');
- fputd(x0, x1[2] * a2);
- # Writing new line to finish macro
- fputc(x0, '\n');
- }
- ## Compile variable declaration
- # a0 - compiler
- # a1 - statement
- compile_variable_declaration : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Changing segment to .data, if needed
- compiler_set_segment(a0, SEG_DATA);
- # Creating variable label
- fputs(x0, a1[2]);
- fputs(x0, ":.long ");
- # Compile variable value
- compile_simple_value(a0, a1[3]);
- # Writing newline to end .long macro
- fputc(x0, '\n');
- }
- ## Compile block statement
- # a0 - compiler
- # a1 - block statement
- compile_block_statement : (a0, a1) {
- # x0 - statement index
- # x1 - unreachable
- # x2 - statement
- # x3 - statement type
- allocate(4);
- # Iterating through statements
- x0 = 0;
- x1 = FALSE;
- while (x0 < vector_size(a1[2])) {
- # Fetching statement
- x2 = vector_get(a1[2], x0);
- # Fetching statement type
- x3 = x2[NODE_TYPE];
- # Checking if code is unreachable
- if (x1) {
- # If we are defining new label, toggle unreachable flag
- if (x3 == STMT_LABEL) {
- x1 = FALSE;
- } else {
- # Otherwise, erroring
- compiler_error(a0, x2, "Unreachable code.");
- }
- }
- # Compiling statement
- compile_statement(a0, vector_get(a1[2], x0));
- # Checking if statement is break/continue/goto/return
- if (x3 == STMT_RETURN || x3 == STMT_GOTO
- || x3 == STMT_BREAK || x3 == STMT_CONTINUE) { x1 = TRUE; }
- # Moving to next statement
- x0 = x0 + 1;
- }
- }
- ## Compile statement
- # a0 - compiler
- # a1 - statement
- compile_statement : (a0, a1) {
- # x0 - statement ID
- allocate(1);
- # Fetching statement ID
- x0 = a1[NODE_TYPE];
- # Searching for statement
- if (x0 == STMT_IF) { compile_if_statement(a0, a1); }
- else if (x0 == STMT_FOR) { compile_for_statement(a0, a1); }
- else if (x0 == STMT_DO_WHILE) { compile_while_statement(a0, a1, TRUE); }
- else if (x0 == STMT_WHILE) { compile_while_statement(a0, a1, FALSE); }
- else if (x0 == STMT_ALLOCATE) { compile_allocate_statement(a0, a1); }
- else if (x0 == STMT_ASM) { compile_assembly_statement(a0, a1); }
- else if (x0 == STMT_RETURN) { compile_return_statement(a0, a1); }
- else if (x0 == STMT_BREAK || x0 == STMT_CONTINUE) { compile_loop_jump_statement(a0, a1); }
- else if (x0 == STMT_WRITECHAR) { compile_writechar_statement(a0, a1); }
- else if (x0 == STMT_LABEL) { compile_label_statement(a0, a1); }
- else if (x0 == STMT_GOTO) { compile_goto_statement(a0, a1); }
- else if (x0 == STMT_BLOCK) { compile_block_statement(a0, a1); }
- # If none of statements wasn't found, trying to compile expression
- else { compile_expression(a0, a1); }
- }
- ## Compile if statement
- # a0 - compiler
- # a1 - statement
- compile_if_statement : (a0, a1) {
- # x0 - compiler output channel
- # x1 - exit label
- # x2 - next label
- # x3 - counter
- # x4 - statement
- allocate(5);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Generating exit label
- x1 = compiler_new_label(a0);
- # Iterating through branches
- x3 = 0;
- while (x3 < vector_size(a1[2])) {
- # If we compiling else branch, finish previous branch
- if (x3 != 0) {
- # Jump to exit label
- fputs(x0, "\tjmp ");
- fputlabel(x0, x1);
- fputc(x0, '\n');
- # Print next label definition
- fputlabeldef(x0, x2);
- }
- # Fetching statement
- x4 = vector_get(a1[2], x3);
- # If statement is not a block (thus it's condition)
- if (x4[NODE_TYPE] != STMT_BLOCK) {
- # Compiling condition
- compile_expression(a0, x4);
- # Generating next label, if this is not last branch. Otherwise, use exit
- x2 = x3 + 2 < vector_size(a1[2]) ? compiler_new_label(a0) : x1;
- # Compiling condition jump (value == 0 -> jump to next label)
- fputs(x0, "\tcmpl $0, %eax\n");
- fputs(x0, "\tje "); fputlabel(x0, x2); fputc(x0, '\n');
- # Moving to block statement
- x3 = x3 + 1;
- x4 = vector_get(a1[2], x3);
- }
- # Compiling block statement
- compile_block_statement(a0, x4);
- # Moving to next statement
- x3 = x3 + 1;
- }
- # Printing exit label definition
- fputlabeldef(x0, x1);
- }
- ## Compile for statement
- # a0 - compiler
- # a1 - statement
- compile_for_statement : (a0, a1) {
- # x0 - compiler output channel
- # x1, x2, x3 - labels
- # x4 - counter
- allocate(5);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Getting label IDs
- x1 = compiler_new_label(a0);
- x2 = compiler_new_label(a0);
- x3 = compiler_new_label(a0);
- # Compile init expression if it's not null
- if (a1[2]) { compile_expression(a0, a1[2]); }
- # Printing label for "loop start"
- fputlabeldef(x0, x1);
- # Compiling condition if it is not null
- if (a1[3]) {
- compile_expression(a0, a1[3]);
- # Comparing condition with zero
- fputs(x0, "\tcmpl $0, %eax\n");
- # If equal to zero, exitting loop
- fputs(x0, "\tje ");
- fputlabel(x0, x3);
- fputc(x0, '\n');
- }
- # Pushing loop to list
- compiler_push_loop(a0, x2, x3);
- # Compiling for block
- compile_block_statement(a0, a1[5]);
- # Pop loop from list
- compiler_pop_loop(a0);
- # Printing label for iterating expressions
- fputlabeldef(x0, x2);
- # Iterating through iteration list
- x4 = 0;
- while (x4 < vector_size(a1[4])) {
- # Compiling iteration expression
- compile_expression(a0, vector_get(a1[4], x4));
- # Moving to next expression
- x4 = x4 + 1;
- }
- # Making next iteration in for loop
- fputs(x0, "\tjmp ");
- fputlabel(x0, x1);
- fputc(x0, '\n');
- # Printing exit label
- fputlabeldef(x0, x3);
- }
- ## Compile while statement
- # a0 - compiler
- # a1 - statement
- # a2 - cond in the end
- compile_while_statement : (a0, a1, a2) {
- # x0 - compiler output channel
- # x1, x2 - labels
- allocate(3);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Getting label IDs
- x1 = compiler_new_label(a0);
- x2 = compiler_new_label(a0);
- # Printing label for "loop start"
- fputlabeldef(x0, x1);
- # Compiling condition if it's in the start
- if (!a2) { compile_while_condition(a0, a1[2], a2, x1, x2); }
- # Pushing loop to list
- compiler_push_loop(a0, x1, x2);
- # Compiling while block
- compile_block_statement(a0, a1[3]);
- # Pop loop from list
- compiler_pop_loop(a0, x1, x2);
- # Compiling condition if it's in the end
- if (a2) {
- compile_while_condition(a0, a1[2], a2, x1, x2);
- } else {
- # Making next iteration in while loop
- fputs(x0, "\tjmp ");
- fputlabel(x0, x1);
- fputc(x0, '\n');
- }
- # Printing exit label
- fputlabeldef(x0, x2);
- }
- ## Compile while condition
- # a0 - compiler
- # a1 - expression
- # a2 - cond in the end
- # a3 - loop label
- # a4 - exit label
- compile_while_condition : (a0, a1, a2, a3, a4) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Compiling condition
- compile_expression(a0, a1);
- # Comparing condition with zero
- fputs(x0, "\tcmpl $0, %eax\n");
- # If in the start, jump to exit label when == 0
- # If in the end, jump to loop label when != 0
- fputs(x0, a2 ? "\tjne " : "\tje ");
- fputlabel(x0, a2 ? a3 : a4);
- fputc(x0, '\n');
- }
- ## Compile allocate statement
- # a0 - compiler
- # a1 - statement
- compile_allocate_statement : (a0, a1) {
- # x0 - compiler output channel
- # x1 - size expression
- # x2 - allocate size
- allocate(3);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Fetching size expression
- x1 = a1[2];
- # Expression must be integer
- if (x1[NODE_TYPE] != STMT_EXPR_INTEGER) {
- compiler_error(a0, x1, "Size must be integer in allocate statement.");
- }
- # Calculating allocation size (expr value * 4)
- x2 = x1[2] * 4;
- # Allocation size must be bigger than zero
- if (x2 <= 0) {
- compiler_error(a0, x1, "Size must be positive integer.");
- }
- # Compiling allocation
- fputs(x0, "\tsubl $");
- fputd(x0, x2);
- fputs(x0, ", %esp\n");
- }
- ## Compile assembly statement
- # a0 - compiler
- # a1 - statement
- compile_assembly_statement : (a0, a1) {
- # x0 - compiler output channel
- # x1 - index
- allocate(2);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Iterating through assembly lines
- for (x1 = 0; x1 < vector_size(a1[2]); x1++) {
- # Printing assembly line
- fputc(x0, '\t');
- fputs(x0, vector_get(a1[2], x1));
- fputc(x0, '\n');
- }
- }
- ## Compile return statement
- # a0 - compiler
- # a1 - statement
- compile_return_statement : (a0, a1) {
- # x0 - return array
- allocate(1);
- # Fetching return array
- x0 = a1[2];
- # Compiling return value, if it is there
- if (vector_size(x0) != 0) {
- # Compiling return value
- compile_expression(a0, vector_pop(x0));
- }
- # Compiling return expression
- fputs(compiler_output_chan(a0), "\tleave\n\tret\n");
- }
- ## Compile break/continue statement
- # a0 - compiler
- # a1 - statement
- compile_loop_jump_statement : (a0, a1) {
- # x0 - compiler output channel
- # x1 - loop tuple
- # x2 - label
- allocate(3);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Getting current loop
- x1 = compiler_loop(a0);
- # If loop is there, compiling instruction
- if (x1) {
- # Taking label from tuple
- x2 = a1[NODE_TYPE] == STMT_BREAK ? x1[1] : x1[0];
- # Jump to label
- fputs(x0, "\tjmp ");
- fputlabel(x0, x2);
- fputc(x0, '\n');
- } else {
- compiler_error(a0, a1, "Tried to break/continue with no loop there.");
- }
- }
- ## Compile writechar statement
- # a0 - compiler
- # a1 - statement
- compile_writechar_statement : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Compiling base address expression
- compile_expression(a0, a1[2]);
- # Storing base address in %edx and reserving it
- compiler_allocate_register(a0, REG_EDX);
- fputs(x0, "\tmovl %eax, %edx\n");
- # Compiling index expression
- compile_expression(a0, a1[3]);
- # Adding index to base address
- fputs(x0, "\taddl %eax, %edx\n");
- # Compiling value expression
- compile_expression(a0, a1[4]);
- # Storing byte to address and freeing %edx
- fputs(x0, "\tmovb %al, (%edx)\n");
- compiler_free_register(a0, REG_EDX);
- }
- ## Compile label statement
- # a0 - compiler
- # a1 - statement
- compile_label_statement : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting compiler output channel
- x0 = compiler_output_chan(a0);
- # Printing label definition
- fputs(x0, a1[2]);
- fputs(x0, ":\n");
- }
- ## Compile goto statement
- # a0 - compiler
- # a1 - statement
- compile_goto_statement : (a0, a1) {
- # Compile goto expression
- compile_expression(a0, a1[2]);
- # Printing jump to this value
- fputs(compiler_output_chan(a0), "\tjmp *%eax\n");
- }
- ## Compile expression
- # a0 - compiler
- # a1 - statement
- compile_expression : (a0, a1) {
- # x0 - statement ID
- allocate(1);
- # Fetching statement ID
- x0 = a1[0];
- # Looking for expressions
- ## Constant
- if (x0 == STMT_EXPR_INTEGER) { compile_integer_expression(a0, a1); }
- else if (x0 == STMT_EXPR_STRING) { compile_string_expression(a0, a1); }
- ## Postfix
- else if (x0 == STMT_EXPR_CALL) { compile_address_call_expression(a0, a1); }
- else if (x0 == STMT_EXPR_INDEX) { compile_address_index_expression(a0, a1); }
- ## Simple
- else if (x0 == STMT_EXPR_IDENTIFIER) { compile_simple_identifier_expression(a0, a1); }
- else if (x0 == STMT_EXPR_ARGUMENT) { compile_simple_argument_expression(a0, a1); }
- else if (x0 == STMT_EXPR_VARIABLE) { compile_simple_variable_expression(a0, a1); }
- else if (x0 == STMT_EXPR_SYSCALL) { compile_simple_syscall_expression(a0, a1); }
- else if (x0 == STMT_EXPR_READCHAR) { compile_simple_readchar_expression(a0, a1); }
- ## Prefix
- else if (x0 == STMT_EXPR_PLUS) { compile_prefix_plus_expression(a0, a1); }
- else if (x0 == STMT_EXPR_MINUS) { compile_prefix_minus_expression(a0, a1); }
- else if (x0 == STMT_EXPR_LOGICAL_NOT) { compile_prefix_logical_not_expression(a0, a1); }
- else if (x0 == STMT_EXPR_NOT) { compile_prefix_not_expression(a0, a1); }
- else if (x0 == STMT_EXPR_DEREF) { compile_prefix_deref_expression(a0, a1); }
- else if (x0 == STMT_EXPR_ADDROF) { compile_prefix_addrof_expression(a0, a1); }
- ## Arithmetic and bitwise operations
- else if (x0 >= STMT_EXPR_MUL && x0 <= STMT_EXPR_OR) {
- compile_bitwise_arithmetic_expression(a0, x0, a1[2], a1[3]);
- }
- ## Relational
- else if (x0 == STMT_EXPR_LESS) { compile_comparison_expression(a0, a1); }
- else if (x0 == STMT_EXPR_GREATER) { compile_comparison_expression(a0, a1); }
- else if (x0 == STMT_EXPR_LE) { compile_comparison_expression(a0, a1); }
- else if (x0 == STMT_EXPR_GE) { compile_comparison_expression(a0, a1); }
- ## Equality
- else if (x0 == STMT_EXPR_EQ) { compile_comparison_expression(a0, a1); }
- else if (x0 == STMT_EXPR_NE) { compile_comparison_expression(a0, a1); }
- ## Logical operations
- else if (x0 == STMT_EXPR_LOGICAL_AND) { compile_logical_expression(a0, a1, FALSE); }
- else if (x0 == STMT_EXPR_LOGICAL_OR) { compile_logical_expression(a0, a1, TRUE); }
- ## Ternary
- else if (x0 == STMT_EXPR_TERNARY) { compile_ternary_expression(a0, a1); }
- ## Assignment and {post,pre}{dec,inc}
- else if (x0 >= STMT_EXPR_ASSIGN && x0 <= STMT_EXPR_POSTINC) {
- compile_assignment_expression(a0, a1);
- } else {
- compiler_error(a0, a1, "This expression can't be compiled.");
- }
- }
- ## Compile address call expression
- # a0 - compiler
- # a1 - statement
- compile_address_call_expression : (a0, a1) {
- # x0 - compiler output channel
- # x1 - call vector
- # x2 - call vector size
- # x3 - argument index
- # x4 - expression to call
- allocate(5);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Fetching call vector
- x1 = a1[2];
- # Getting call vector size
- x2 = vector_size(x1);
- # Fetching expression to call
- x4 = vector_get(x1, 0);
- # Backing up all needed registers
- compiler_allocate_register(a0, REG_EBX);
- # We need to put address in %ebx, if we have address and not identifier
- if (x4[NODE_TYPE] != STMT_EXPR_IDENTIFIER) {
- # Compiling expression to call
- compile_expression(a0, x4);
- # Moving it in %ebx
- fputs(x0, "\tmovl %eax, %ebx\n");
- }
- # Backing up the rest of registers
- compiler_allocate_register(a0, REG_ECX);
- compiler_allocate_register(a0, REG_EDX);
- compiler_allocate_register(a0, REG_ESI);
- compiler_allocate_register(a0, REG_EDI);
- # Allocating enough stack for arguments
- if (x2 > 1) {
- fputs(x0, "\tsubl $");
- fputd(x0, 4 * (x2 - 1));
- fputs(x0, ", %esp\n");
- }
- # Iterating through arguments
- x3 = 1;
- while (x3 < x2) {
- # Compiling argument expression
- compile_expression(a0, vector_get(x1, x3));
- # Moving value to stack
- fputs(x0, "\tmovl %eax, ");
- fputd(x0, 4 * (x3 - 1));
- fputs(x0, "(%esp)\n");
- # Moving to next argument
- x3 = x3 + 1;
- }
- # Printing call instruction
- if (x4[NODE_TYPE] == STMT_EXPR_IDENTIFIER) {
- fputs(x0, "\tcall ");
- fputs(x0, x4[2]);
- fputc(x0, '\n');
- } else {
- # Calling it (it's in %ebx)
- fputs(x0, "\tcall *%ebx\n");
- }
- # Restoring stack
- if (x2 > 1) {
- fputs(x0, "\taddl $");
- fputd(x0, 4 * (x2 - 1));
- fputs(x0, ", %esp\n");
- }
- # Restoring all backed up registers
- compiler_free_register(a0, REG_EDI);
- compiler_free_register(a0, REG_ESI);
- compiler_free_register(a0, REG_EDX);
- compiler_free_register(a0, REG_ECX);
- compiler_free_register(a0, REG_EBX);
- }
- ## Compile address index expression
- # a0 - compiler
- # a1 - statement
- compile_address_index_expression : (a0, a1) {
- # x0 - compiler output channel
- # x1 - to be called
- allocate(2);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Compiling address
- compile_address_index_address(a0, a1);
- # Loading value from address
- fputs(x0, "\tmovl (%eax), %eax\n");
- }
- ## Compile address from address index expression
- # a0 - compiler
- # a1 - statement
- compile_address_index_address : (a0, a1) {
- # x0 - compiler output channel
- # x1 - first operand
- # x2 - first operand type
- allocate(3);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Fetching first operand
- x1 = a1[2];
- # Fetching first operand type
- x2 = x1[NODE_TYPE];
- # Checking if first operand is another address index
- if (x2 == STMT_EXPR_INDEX) {
- # Compile it
- compile_address_index_address(a0, x1);
- # Moving address to %edx
- compiler_allocate_register(a0, REG_EDX);
- fputs(x0, "\tmovl (%eax), %edx\n");
- } else {
- # If indexed expression is not identifier, compiling it and moving to %ebx
- if (x2 != STMT_EXPR_IDENTIFIER) {
- # Compiling expression
- compile_expression(a0, x1);
- # Moving value to %ebx
- compiler_allocate_register(a0, REG_EDX);
- fputs(x0, "\tmovl %eax, %edx\n");
- }
- }
- # Compiling second operand
- compile_expression(a0, a1[3]);
- # Calculating address
- fputs(x0, "\tleal ");
- if (x2 == STMT_EXPR_IDENTIFIER) {
- fputs(x0, x1[2]);
- fputs(x0, "(,%eax,4), %eax\n");
- } else {
- fputs(x0, "(%edx,%eax,4), %eax\n");
- compiler_free_register(a0, REG_EDX);
- }
- }
- ## Compile identifier expression
- # a0 - compiler
- # a1 - statement
- compile_simple_identifier_expression : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Printing instruction
- fputs(x0, "\tmovl ");
- fputs(x0, a1[2]);
- fputs(x0, ", %eax\n");
- }
- ## Compile argument expression
- # a0 - compiler
- # a1 - statement
- compile_simple_argument_expression : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Printing instruction
- fputs(x0, "\tmovl ");
- fputarg(x0, a1[2]);
- fputs(x0, ", %eax\n");
- }
- ## Print argument to output channel
- # a0 - output channel
- # a1 - number
- fputarg : (a0, a1) {
- # Print offset to %ebp
- fputd(a0, 4 * (a1 + 2));
- fputs(a0, "(%ebp)");
- }
- ## Compile variable expression
- # a0 - compiler
- # a1 - statement
- compile_simple_variable_expression : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Printing instruction
- fputs(x0, "\tmovl ");
- fputvar(x0, a1[2]);
- fputs(x0, ", %eax\n");
- }
- ## Print variable to output channel
- # a0 - output channel
- # a1 - number
- fputvar : (a0, a1) {
- # Print offset to %ebp
- fputc(a0, '-');
- fputd(a0, 4 * (a1 + 1));
- fputs(a0, "(%ebp)");
- }
- ## Compile syscall expression
- # a0 - compiler
- # a1 - statement
- compile_simple_syscall_expression : (a0, a1) {
- # x0 - compiler output channel
- # x1 - arguments
- # x2 - argument count
- # x3 - argument index
- allocate(4);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Fetching arguments
- x1 = a1[2];
- # Fetching arguments count
- x2 = vector_size(x1);
- # Backing up all needed registers
- if (x2 > 1) { compiler_allocate_register(a0, REG_EBX); }
- if (x2 > 2) { compiler_allocate_register(a0, REG_ECX); }
- if (x2 > 3) { compiler_allocate_register(a0, REG_EDX); }
- if (x2 > 4) { compiler_allocate_register(a0, REG_ESI); }
- if (x2 > 5) { compiler_allocate_register(a0, REG_EDI); }
- if (x2 > 6) { fputs(x0, "\tpushl %ebp\n"); }
- # Iterating through arguments
- x3 = 0;
- while (x3 < x2) {
- # Compiling argument value
- compile_expression(a0, vector_get(x1, x3));
- # Moving value to needed register (when x3 != 0 != %eax)
- if (x3 != 0) {
- fputs(x0, "\tmovl %eax, ");
- fputs(x0, registers[x3]);
- fputc(x0, '\n');
- }
- # If we have more than one argument, we HAVE to push %eax, as it will be
- # overwritten by compilation of expressions
- if ((x3 == 0) * (x2 > 1)) { fputs(x0, "\tpushl %eax\n"); }
- # Moving to next argument
- x3 = x3 + 1;
- }
- # Restoring %eax, if needed
- if (x2 > 1) { fputs(x0, "\tpopl %eax\n"); }
- # Calling syscall
- fputs(x0, "\tint $0x80\n");
- # Restoring all backed up registers
- if (x2 > 6) { fputs(x0, "\tpopl %ebp\n"); }
- if (x2 > 5) { compiler_free_register(a0, REG_EDI); }
- if (x2 > 4) { compiler_free_register(a0, REG_ESI); }
- if (x2 > 3) { compiler_free_register(a0, REG_EDX); }
- if (x2 > 2) { compiler_free_register(a0, REG_ECX); }
- if (x2 > 1) { compiler_free_register(a0, REG_EBX); }
- }
- ## Compile readchar expression
- # a0 - compiler
- # a1 - statement
- compile_simple_readchar_expression : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Compiling base address expression
- compile_expression(a0, a1[2]);
- # Storing base address in %ebx and reserving it
- compiler_allocate_register(a0, REG_EDX);
- fputs(x0, "\tmovl %eax, %edx\n");
- # Compiling index expression
- compile_expression(a0, a1[3]);
- # Adding index to base address
- fputs(x0, "\taddl %eax, %edx\n");
- # Loading byte from address and freeing %ebx
- fputs(x0, "\tmovsbl (%edx), %eax\n");
- compiler_free_register(a0, REG_EDX);
- }
- ## Compile plus prefix expression
- # a0 - compiler
- # a1 - statement
- compile_prefix_plus_expression : (a0, a1) {
- # DO NOTHING :P
- }
- ## Compile minus prefix expression
- # a0 - compiler
- # a1 - statement
- compile_prefix_minus_expression : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Compiling expression to negate
- compile_expression(a0, a1[2]);
- # Negating
- fputs(x0, "\tnegl %eax\n");
- }
- ## Compile logical not expression
- # a0 - compiler
- # a1 - statement
- compile_prefix_logical_not_expression : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Compiling expression to negate
- compile_expression(a0, a1[2]);
- # Comparing with zero
- fputs(x0, "\tcmpl $0, %eax\n");
- # Setting equal flag
- fputs(x0, "\tsete %al\n");
- fputs(x0, "\tmovzbl %al, %eax\n");
- }
- ## Compile not expression
- # a0 - compiler
- # a1 - statement
- compile_prefix_not_expression : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Compiling expression to bitwise negate
- compile_expression(a0, a1[2]);
- # Bitwise negating
- fputs(x0, "\tnotl %eax\n");
- }
- ## Compile dereference expression
- # a0 - compiler
- # a1 - statement
- compile_prefix_deref_expression : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Compiling expression to dereference
- compile_expression(a0, a1[2]);
- # Dereferencing
- fputs(x0, "\tmovl (%eax), %eax\n");
- }
- ## Compile addressof expression
- # a0 - compiler
- # a1 - statement
- compile_prefix_addrof_expression : (a0, a1) {
- # x0 - compiler output channel
- # x1 - expression
- # x2 - expression type
- allocate(3);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Getting expression
- x1 = a1[2];
- # Getting expression type
- x2 = x1[NODE_TYPE];
- # Compiling
- if (x2 == STMT_EXPR_IDENTIFIER) { fputs(x0, "\tmovl $"); fputs(x0, x1[2]); fputs(x0, ", %eax\n"); return; }
- if (x2 == STMT_EXPR_ARGUMENT) { fputs(x0, "\tleal "); fputarg(x0, x1[2]); fputs(x0, ", %eax\n"); return; }
- if (x2 == STMT_EXPR_VARIABLE) { fputs(x0, "\tleal "); fputvar(x0, x1[2]); fputs(x0, ", %eax\n"); return; }
- compiler_error(a0, a1, "Can't compile addressof for this expression.");
- }
- ## Compile bitwise arithmetic expression
- # a0 - compiler
- # a1 - operation type
- # a2 - first operand
- # a3 - second operand
- compile_bitwise_arithmetic_expression : (a0, a1, a2, a3) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Compiling first operand
- compile_expression(a0, a2);
- # Reserving %ebx
- compiler_allocate_register(a0, REG_EBX);
- # Moving it's value
- if (a1 == STMT_EXPR_SUB) {
- # If SUB, DIV or MOD, we are pushing %eax to stack
- fputs(x0, "\tpushl %eax\n");
- } else if (a1 == STMT_EXPR_DIV || a1 == STMT_EXPR_MOD) {
- # Allocating %edx for future purposes
- compiler_allocate_register(a0, REG_EDX);
- # If SUB, DIV or MOD, we are pushing %eax to stack
- fputs(x0, "\tpushl %eax\n");
- } else {
- # Move value to %ebx when not SUB, DIV or MOD
- fputs(x0, "\tmovl %eax, %ebx\n");
- }
- # Compiling second operand
- compile_expression(a0, a3);
- # Moving it's value
- if (a1 == STMT_EXPR_SHL || a1 == STMT_EXPR_SHR) {
- # If we have shift expressions, move value to %ecx
- compiler_allocate_register(a0, REG_ECX);
- fputs(x0, "\tmovl %eax, %ecx\n");
- # Moving value to be shifted to %eax
- fputs(x0, "\tmovl %ebx, %eax\n");
- } else if (a1 == STMT_EXPR_SUB) {
- # If we have SUB, DIV or MOD, move value to %ebx
- fputs(x0, "\tmovl %eax, %ebx\n");
- # And restore %eax
- fputs(x0, "\tpopl %eax\n");
- } else if (a1 == STMT_EXPR_DIV || a1 == STMT_EXPR_MOD) {
- # If we have SUB, DIV or MOD, move value to %ebx
- fputs(x0, "\tmovl %eax, %ebx\n");
- # And restore %eax
- fputs(x0, "\tpopl %eax\n");
- # Clear %edx for division
- fputs(x0, "\movl $0, %edx\n");
- }
- # Compiling operation
- fputc(x0, '\t');
- if (a1 == STMT_EXPR_MUL) { fputs(x0, "imull %ebx, %eax"); }
- else if (a1 == STMT_EXPR_DIV) { fputs(x0, "divl %ebx"); }
- else if (a1 == STMT_EXPR_MOD) { fputs(x0, "divl %ebx\n\tmovl %edx, %eax"); }
- else if (a1 == STMT_EXPR_ADD) { fputs(x0, "addl %ebx, %eax"); }
- else if (a1 == STMT_EXPR_SUB) { fputs(x0, "subl %ebx, %eax"); }
- else if (a1 == STMT_EXPR_SHL) { fputs(x0, "shll %cl, %eax"); }
- else if (a1 == STMT_EXPR_SHR) { fputs(x0, "shrl %cl, %eax"); }
- else if (a1 == STMT_EXPR_AND) { fputs(x0, "andl %ebx, %eax"); }
- else if (a1 == STMT_EXPR_XOR) { fputs(x0, "xorl %ebx, %eax"); }
- else if (a1 == STMT_EXPR_OR) { fputs(x0, "orl %ebx, %eax"); }
- fputc(x0, '\n');
- # Freeing used registers
- if (a1 == STMT_EXPR_SHL || a1 == STMT_EXPR_SHR) { compiler_free_register(a0, REG_ECX); }
- else if (a1 == STMT_EXPR_DIV || a1 == STMT_EXPR_MOD) { compiler_free_register(a0, REG_EDX); }
- compiler_free_register(a0, REG_EBX);
- }
- ## Compile comparison expression
- # a0 - compiler
- # a1 - statement
- compile_comparison_expression : (a0, a1) {
- # x0 - compiler output channel
- # x1 - statement ID
- allocate(2);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Fetching statement ID
- x1 = a1[NODE_TYPE];
- # TODO: make support of chained equations and comparisons.
- # Compiling first operand
- compile_expression(a0, a1[2]);
- # Moving it's value to %edx
- compiler_allocate_register(a0, REG_EDX);
- fputs(x0, "\tmovl %eax, %edx\n");
- # Compiling second operand
- compile_expression(a0, a1[3]);
- # Comparing values
- fputs(x0, "\tcmpl %eax, %edx\n");
- compiler_free_register(a0, REG_EDX);
- # Putting (not) equal flag in %eax
- fputc(x0, '\t');
- if (x1 == STMT_EXPR_EQ) { fputs(x0, "sete"); }
- if (x1 == STMT_EXPR_NE) { fputs(x0, "setne"); }
- if (x1 == STMT_EXPR_LESS) { fputs(x0, "setl"); }
- if (x1 == STMT_EXPR_GREATER) { fputs(x0, "setg"); }
- if (x1 == STMT_EXPR_LE) { fputs(x0, "setle"); }
- if (x1 == STMT_EXPR_GE) { fputs(x0, "setge"); }
- fputs(x0, " %al\n\tmovzbl %al, %eax\n");
- }
- ## Compile logical expression
- # a0 - compiler
- # a1 - statement
- # a2 - AND/OR
- compile_logical_expression : (a0, a1, a2) {
- # x0 - compiler output channel
- # x1, x2 - labels
- allocate(3);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Getting label IDs
- x1 = compiler_new_label(a0);
- x2 = compiler_new_label(a0);
- # Compiling first operand
- compile_logical_expression_operand(a0, a1[2], a2, x1);
- # Compiling second operand
- compile_logical_expression_operand(a0, a1[3], a2, x1);
- ## AND -> if we are here, we need to put 1 in %eax
- ## OR -> if we are here, we need to put 0 in %eax
- if (a2) { fputs(x0, "\tmovl $0, %eax\n"); } else { fputs(x0, "\tmovl $1, %eax\n"); }
- # Jump to second (exit) label
- fputs(x0, "\tjmp ");
- fputlabel(x0, x2);
- fputc(x0, '\n');
- # Printing first label definition
- fputlabeldef(x0, x1);
- ## AND -> if we are here, we need to put 0 in %eax
- ## OR -> if we are here, we need to put 1 in %eax
- if (a2) { fputs(x0, "\tmovl $1, %eax\n"); } else { fputs(x0, "\tmovl $0, %eax\n"); }
- # Printing second label definition
- fputlabeldef(x0, x2);
- }
- ## Compile logical expression operand
- # a0 - compiler
- # a1 - expression
- # a2 - AND/OR
- # a3 - first label
- compile_logical_expression_operand : (a0, a1, a2, a3) {
- # x0 - compiler output channel
- allocate(1);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # If operand is another logical expression, try to compile it there too.
- if (a1[NODE_TYPE] - STMT_EXPR_LOGICAL_AND == a2) {
- # Compiling first operand of logical expression
- compile_logical_expression_operand(a0, a1[2], a2, a3);
- # Compiling second operand of logical expression
- compile_logical_expression_operand(a0, a1[3], a2, a3);
- return;
- }
- # We have non-logical expression. Compiling it by default way.
- compile_expression(a0, a1);
- # Comparing it with zero
- fputs(x0, "\tcmpl $0, %eax\n");
- ## AND -> jump to first label if zero
- ## OR -> jump to first label if not zero
- if (a2) { fputs(x0, "\tjne "); } else { fputs(x0, "\tje "); }
- fputlabel(x0, a3);
- fputc(x0, '\n');
- }
- ## Compile ternary expression
- # a0 - compiler
- # a1 - statement
- compile_ternary_expression : (a0, a1) {
- # x0 - compiler output channel
- # x1, x2 - labels
- allocate(3);
- # Getting output channel
- x0 = compiler_output_chan(a0);
- # Getting label IDs
- x1 = compiler_new_label(a0);
- x2 = compiler_new_label(a0);
- # Compiling condition
- compile_expression(a0, a1[2]);
- # Comparing condition with zero
- fputs(x0, "\tcmpl $0, %eax\n");
- # If equal to zero, jump to first label (false value)
- fputs(x0, "\tje ");
- fputlabel(x0, x1);
- fputc(x0, '\n');
- # Compiling true value
- compile_expression(a0, a1[3]);
- # Exit, if true value was set
- fputs(x0, "\tjmp ");
- fputlabel(x0, x2);
- fputc(x0, '\n');
- # Printing label for false value
- fputlabeldef(x0, x1);
- # Compiling false value
- compile_expression(a0, a1[4]);
- # Printing exit label
- fputlabeldef(x0, x2);
- }
- ## Compile assignment expression
- # a0 - compiler
- # a1 - statement
- # a2 - type
- compile_assignment_expression : (a0, a1) {
- # x0 - compiler output channel
- # x1 - statement ID
- # x2 - first operand
- # x3 - first operand's ID
- allocate(4);
- # Getting compiler output channel
- x0 = compiler_output_chan(a0);
- # Fetching assignment type
- x1 = a1[NODE_TYPE];
- # Fetching first operand
- x2 = a1[2];
- # Fetching its ID
- x3 = x2[NODE_TYPE];
- # If it is indexed address, compiling it, and moving to %ebx
- if (x3 == STMT_EXPR_INDEX) {
- compile_address_index_address(a0, x2);
- compiler_allocate_register(a0, REG_EBX);
- fputs(x0, "\tmovl %eax, %ebx\n");
- } else if (x3 == STMT_EXPR_DEREF) {
- compile_expression(a0, x2[2]);
- compiler_allocate_register(a0, REG_EBX);
- fputs(x0, "\tmovl %eax, %ebx\n");
- }
- # Compiling expression
- if (x1 == STMT_EXPR_ASSIGN) {
- # If we have simple assignment, compile expression
- compile_expression(a0, a1[3]);
- } else if (x1 >= STMT_EXPR_PREDEC && x1 <= STMT_EXPR_POSTINC) {
- # Compile expression to increment/decrement
- compile_expression(a0, x2);
- if (x1 == STMT_EXPR_PREDEC) { fputs(x0, "\tsubl $1, %eax\n"); }
- else if (x1 == STMT_EXPR_PREINC) { fputs(x0, "\taddl $1, %eax\n"); }
- else {
- # Reserving %edx for our purposes
- compiler_allocate_register(a0, REG_EDX);
- # Storing new value in %edx
- fputs(x0, "\tleal ");
- if (x1 == STMT_EXPR_POSTDEC) { fputc(x0, '-'); }
- fputs(x0, "1(%eax), %edx\n");
- }
- } else {
- # If we have complex assignment, calculate value to set
- compile_bitwise_arithmetic_expression(
- a0, x1 - STMT_EXPR_MUL_ASSIGN + STMT_EXPR_MUL, x2, a1[3]
- );
- }
- # Compiling assignment
- fputs(x0, "\tmovl ");
- # If we had post{dec,inc} we need to load value from %edx
- if (x1 == STMT_EXPR_POSTDEC || x1 == STMT_EXPR_POSTINC) {
- fputs(x0, "%edx");
- } else {
- fputs(x0, "%eax");
- }
- # Compiling where to move
- fputs(x0, ", ");
- if (x3 == STMT_EXPR_IDENTIFIER) { fputs(x0, x2[2]); }
- else if (x3 == STMT_EXPR_ARGUMENT) { fputarg(x0, x2[2]); }
- else if (x3 == STMT_EXPR_VARIABLE) { fputvar(x0, x2[2]); }
- else if (x3 == STMT_EXPR_INDEX ||
- x3 == STMT_EXPR_DEREF) { fputs(x0, "(%ebx)"); }
- else { compiler_error(a0, a1, "Can't use this as assignable operand."); }
- fputc(x0, '\n');
- # Freeing used registers
- if (x1 == STMT_EXPR_POSTDEC || x1 == STMT_EXPR_POSTINC) { compiler_free_register(a0, REG_EDX); }
- if (x3 == STMT_EXPR_INDEX || x3 == STMT_EXPR_DEREF) { compiler_free_register(a0, REG_EBX); }
- }
- ## Compile integer expression
- # a0 - compiler
- # a1 - statement
- compile_integer_expression : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting compiler output channel
- x0 = compiler_output_chan(a0);
- # Just setting value to %eax
- fputs(x0, "\tmovl $");
- fputd(x0, a1[2]);
- fputs(x0, ", %eax\n");
- }
- ## Compile string expression
- # a0 - compiler
- # a1 - statement
- compile_string_expression : (a0, a1) {
- # x0 - compiler output channel
- allocate(1);
- # Getting compiler output channel
- x0 = compiler_output_chan(a0);
- # Just setting value to %eax
- fputs(x0, "\tmovl $strbuf+");
- fputd(x0, a1[2]);
- fputs(x0, ", %eax\n");
- }
- ## Compile simple value
- # a0 - compiler
- # a1 - statement
- compile_simple_value : (a0, a1) {
- # x0 - compiler output channel
- # x1 - statement ID
- allocate(2);
- # Fetching compiler output channel
- x0 = compiler_output_chan(a0);
- # Fetching statement ID
- x1 = a1[NODE_TYPE];
- # Checking if we can compile this statement
- # TODO: check if statement can be precalculated, e.g. `2 + 2`
- if (x1 == STMT_EXPR_INTEGER) {
- # Writing integer
- fputd(x0, a1[2]);
- # Returning
- return;
- }
- if (x1 == STMT_EXPR_STRING) {
- # Writing string buffer address
- fputs(x0, "strbuf+");
- fputd(x0, a1[2]);
- # Returning
- return;
- }
- if (x1 == STMT_EXPR_IDENTIFIER) {
- # Writing identifier
- fputs(x0, a1[2]);
- # Returning
- return;
- }
- compiler_error(a0, a1, "Expression can't be compiled as simple.");
- }
- ## Print usage
- # a0 - program name
- usage : (a0) {
- eputs("usage: ");
- eputs(a0);
- eputs(" [option]... <file>\n");
- }
- ## Main function
- # a0 - argc
- # a1 - argv
- # @return exit code
- main : (a0, a1) {
- # x0 - input channel
- # x1 - output channel
- # x2 - lexer
- # x3 - string buffer
- # x4 - parser
- # x5 - AST
- # x6 - compiler
- allocate(7);
- # If no additional args - printing usage
- if (a0 < 2) { usage(a1[0]); return(1); }
- # TODO: implement option parser
- # Creating input channel
- x0 = input_chan_file(a1[1]);
- # Creating output channel
- x1 = output_chan_file("output.S");
- # Creating lexer
- x2 = lexer(x0, a1[1]);
- # Creating string buffer
- x3 = string_buffer();
- # Creating parser
- x4 = parser(x2, x3);
- # Parsing code to AST
- x5 = parser_parse(x4);
- # Creating compiler
- x6 = compiler(x1, x3);
- # Compiling AST
- compiler_compile(x6, x5);
- return(0);
- }
- # Entry point of the entire program
- global _start;
- _start : () {
- # Initializing I/O at the start
- initialize_io();
- # argc is located in 4(%esp), argv is located in 8(%esp)
- exit(main(*(&a0 - 4), &a0));
- }
|