parser.c 430 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556
  1. /*
  2. * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
  3. * implemented on top of the SAX interfaces
  4. *
  5. * References:
  6. * The XML specification:
  7. * http://www.w3.org/TR/REC-xml
  8. * Original 1.0 version:
  9. * http://www.w3.org/TR/1998/REC-xml-19980210
  10. * XML second edition working draft
  11. * http://www.w3.org/TR/2000/WD-xml-2e-20000814
  12. *
  13. * Okay this is a big file, the parser core is around 7000 lines, then it
  14. * is followed by the progressive parser top routines, then the various
  15. * high level APIs to call the parser and a few miscellaneous functions.
  16. * A number of helper functions and deprecated ones have been moved to
  17. * parserInternals.c to reduce this file size.
  18. * As much as possible the functions are associated with their relative
  19. * production in the XML specification. A few productions defining the
  20. * different ranges of character are actually implanted either in
  21. * parserInternals.h or parserInternals.c
  22. * The DOM tree build is realized from the default SAX callbacks in
  23. * the module SAX.c.
  24. * The routines doing the validation checks are in valid.c and called either
  25. * from the SAX callbacks or as standalone functions using a preparsed
  26. * document.
  27. *
  28. * See Copyright for the status of this software.
  29. *
  30. * daniel@veillard.com
  31. */
  32. /* To avoid EBCDIC trouble when parsing on zOS */
  33. #if defined(__MVS__)
  34. #pragma convert("ISO8859-1")
  35. #endif
  36. #define IN_LIBXML
  37. #include "libxml.h"
  38. #if defined(_WIN32) && !defined (__CYGWIN__)
  39. #define XML_DIR_SEP '\\'
  40. #else
  41. #define XML_DIR_SEP '/'
  42. #endif
  43. #include <stdlib.h>
  44. #include <limits.h>
  45. #include <string.h>
  46. #include <stdarg.h>
  47. #include <stddef.h>
  48. #include <libxml/xmlmemory.h>
  49. #include <libxml/threads.h>
  50. #include <libxml/globals.h>
  51. #include <libxml/tree.h>
  52. #include <libxml/parser.h>
  53. #include <libxml/parserInternals.h>
  54. #include <libxml/valid.h>
  55. #include <libxml/entities.h>
  56. #include <libxml/xmlerror.h>
  57. #include <libxml/encoding.h>
  58. #include <libxml/xmlIO.h>
  59. #include <libxml/uri.h>
  60. #ifdef LIBXML_CATALOG_ENABLED
  61. #include <libxml/catalog.h>
  62. #endif
  63. #ifdef LIBXML_SCHEMAS_ENABLED
  64. #include <libxml/xmlschemastypes.h>
  65. #include <libxml/relaxng.h>
  66. #endif
  67. #ifdef HAVE_CTYPE_H
  68. #include <ctype.h>
  69. #endif
  70. #ifdef HAVE_STDLIB_H
  71. #include <stdlib.h>
  72. #endif
  73. #ifdef HAVE_SYS_STAT_H
  74. #include <sys/stat.h>
  75. #endif
  76. #ifdef HAVE_FCNTL_H
  77. #include <fcntl.h>
  78. #endif
  79. #ifdef HAVE_UNISTD_H
  80. #include <unistd.h>
  81. #endif
  82. #include "buf.h"
  83. #include "enc.h"
  84. struct _xmlStartTag {
  85. const xmlChar *prefix;
  86. const xmlChar *URI;
  87. int line;
  88. int nsNr;
  89. };
  90. static void
  91. xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
  92. static xmlParserCtxtPtr
  93. xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
  94. const xmlChar *base, xmlParserCtxtPtr pctx);
  95. static void xmlHaltParser(xmlParserCtxtPtr ctxt);
  96. static int
  97. xmlParseElementStart(xmlParserCtxtPtr ctxt);
  98. static void
  99. xmlParseElementEnd(xmlParserCtxtPtr ctxt);
  100. /************************************************************************
  101. * *
  102. * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
  103. * *
  104. ************************************************************************/
  105. #define XML_PARSER_BIG_ENTITY 1000
  106. #define XML_PARSER_LOT_ENTITY 5000
  107. /*
  108. * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
  109. * replacement over the size in byte of the input indicates that you have
  110. * and exponential behaviour. A value of 10 correspond to at least 3 entity
  111. * replacement per byte of input.
  112. */
  113. #define XML_PARSER_NON_LINEAR 10
  114. /*
  115. * xmlParserEntityCheck
  116. *
  117. * Function to check non-linear entity expansion behaviour
  118. * This is here to detect and stop exponential linear entity expansion
  119. * This is not a limitation of the parser but a safety
  120. * boundary feature. It can be disabled with the XML_PARSE_HUGE
  121. * parser option.
  122. */
  123. static int
  124. xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
  125. xmlEntityPtr ent, size_t replacement)
  126. {
  127. size_t consumed = 0;
  128. int i;
  129. if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
  130. return (0);
  131. if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
  132. return (1);
  133. /*
  134. * This may look absurd but is needed to detect
  135. * entities problems
  136. */
  137. if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
  138. (ent->content != NULL) && (ent->checked == 0) &&
  139. (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
  140. unsigned long oldnbent = ctxt->nbentities, diff;
  141. xmlChar *rep;
  142. ent->checked = 1;
  143. ++ctxt->depth;
  144. rep = xmlStringDecodeEntities(ctxt, ent->content,
  145. XML_SUBSTITUTE_REF, 0, 0, 0);
  146. --ctxt->depth;
  147. if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
  148. ent->content[0] = 0;
  149. }
  150. diff = ctxt->nbentities - oldnbent + 1;
  151. if (diff > INT_MAX / 2)
  152. diff = INT_MAX / 2;
  153. ent->checked = diff * 2;
  154. if (rep != NULL) {
  155. if (xmlStrchr(rep, '<'))
  156. ent->checked |= 1;
  157. xmlFree(rep);
  158. rep = NULL;
  159. }
  160. }
  161. /*
  162. * Prevent entity exponential check, not just replacement while
  163. * parsing the DTD
  164. * The check is potentially costly so do that only once in a thousand
  165. */
  166. if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
  167. (ctxt->nbentities % 1024 == 0)) {
  168. for (i = 0;i < ctxt->inputNr;i++) {
  169. consumed += ctxt->inputTab[i]->consumed +
  170. (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
  171. }
  172. if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
  173. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  174. ctxt->instate = XML_PARSER_EOF;
  175. return (1);
  176. }
  177. consumed = 0;
  178. }
  179. if (replacement != 0) {
  180. if (replacement < XML_MAX_TEXT_LENGTH)
  181. return(0);
  182. /*
  183. * If the volume of entity copy reaches 10 times the
  184. * amount of parsed data and over the large text threshold
  185. * then that's very likely to be an abuse.
  186. */
  187. if (ctxt->input != NULL) {
  188. consumed = ctxt->input->consumed +
  189. (ctxt->input->cur - ctxt->input->base);
  190. }
  191. consumed += ctxt->sizeentities;
  192. if (replacement < XML_PARSER_NON_LINEAR * consumed)
  193. return(0);
  194. } else if (size != 0) {
  195. /*
  196. * Do the check based on the replacement size of the entity
  197. */
  198. if (size < XML_PARSER_BIG_ENTITY)
  199. return(0);
  200. /*
  201. * A limit on the amount of text data reasonably used
  202. */
  203. if (ctxt->input != NULL) {
  204. consumed = ctxt->input->consumed +
  205. (ctxt->input->cur - ctxt->input->base);
  206. }
  207. consumed += ctxt->sizeentities;
  208. if ((size < XML_PARSER_NON_LINEAR * consumed) &&
  209. (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
  210. return (0);
  211. } else if (ent != NULL) {
  212. /*
  213. * use the number of parsed entities in the replacement
  214. */
  215. size = ent->checked / 2;
  216. /*
  217. * The amount of data parsed counting entities size only once
  218. */
  219. if (ctxt->input != NULL) {
  220. consumed = ctxt->input->consumed +
  221. (ctxt->input->cur - ctxt->input->base);
  222. }
  223. consumed += ctxt->sizeentities;
  224. /*
  225. * Check the density of entities for the amount of data
  226. * knowing an entity reference will take at least 3 bytes
  227. */
  228. if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
  229. return (0);
  230. } else {
  231. /*
  232. * strange we got no data for checking
  233. */
  234. if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
  235. (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
  236. (ctxt->nbentities <= 10000))
  237. return (0);
  238. }
  239. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  240. return (1);
  241. }
  242. /**
  243. * xmlParserMaxDepth:
  244. *
  245. * arbitrary depth limit for the XML documents that we allow to
  246. * process. This is not a limitation of the parser but a safety
  247. * boundary feature. It can be disabled with the XML_PARSE_HUGE
  248. * parser option.
  249. */
  250. unsigned int xmlParserMaxDepth = 256;
  251. #define SAX2 1
  252. #define XML_PARSER_BIG_BUFFER_SIZE 300
  253. #define XML_PARSER_BUFFER_SIZE 100
  254. #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
  255. /**
  256. * XML_PARSER_CHUNK_SIZE
  257. *
  258. * When calling GROW that's the minimal amount of data
  259. * the parser expected to have received. It is not a hard
  260. * limit but an optimization when reading strings like Names
  261. * It is not strictly needed as long as inputs available characters
  262. * are followed by 0, which should be provided by the I/O level
  263. */
  264. #define XML_PARSER_CHUNK_SIZE 100
  265. /*
  266. * List of XML prefixed PI allowed by W3C specs
  267. */
  268. static const char *xmlW3CPIs[] = {
  269. "xml-stylesheet",
  270. "xml-model",
  271. NULL
  272. };
  273. /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
  274. static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
  275. const xmlChar **str);
  276. static xmlParserErrors
  277. xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
  278. xmlSAXHandlerPtr sax,
  279. void *user_data, int depth, const xmlChar *URL,
  280. const xmlChar *ID, xmlNodePtr *list);
  281. static int
  282. xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
  283. const char *encoding);
  284. #ifdef LIBXML_LEGACY_ENABLED
  285. static void
  286. xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
  287. xmlNodePtr lastNode);
  288. #endif /* LIBXML_LEGACY_ENABLED */
  289. static xmlParserErrors
  290. xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
  291. const xmlChar *string, void *user_data, xmlNodePtr *lst);
  292. static int
  293. xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
  294. /************************************************************************
  295. * *
  296. * Some factorized error routines *
  297. * *
  298. ************************************************************************/
  299. /**
  300. * xmlErrAttributeDup:
  301. * @ctxt: an XML parser context
  302. * @prefix: the attribute prefix
  303. * @localname: the attribute localname
  304. *
  305. * Handle a redefinition of attribute error
  306. */
  307. static void
  308. xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
  309. const xmlChar * localname)
  310. {
  311. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  312. (ctxt->instate == XML_PARSER_EOF))
  313. return;
  314. if (ctxt != NULL)
  315. ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
  316. if (prefix == NULL)
  317. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
  318. XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
  319. (const char *) localname, NULL, NULL, 0, 0,
  320. "Attribute %s redefined\n", localname);
  321. else
  322. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
  323. XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
  324. (const char *) prefix, (const char *) localname,
  325. NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
  326. localname);
  327. if (ctxt != NULL) {
  328. ctxt->wellFormed = 0;
  329. if (ctxt->recovery == 0)
  330. ctxt->disableSAX = 1;
  331. }
  332. }
  333. /**
  334. * xmlFatalErr:
  335. * @ctxt: an XML parser context
  336. * @error: the error number
  337. * @extra: extra information string
  338. *
  339. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  340. */
  341. static void
  342. xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
  343. {
  344. const char *errmsg;
  345. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  346. (ctxt->instate == XML_PARSER_EOF))
  347. return;
  348. switch (error) {
  349. case XML_ERR_INVALID_HEX_CHARREF:
  350. errmsg = "CharRef: invalid hexadecimal value";
  351. break;
  352. case XML_ERR_INVALID_DEC_CHARREF:
  353. errmsg = "CharRef: invalid decimal value";
  354. break;
  355. case XML_ERR_INVALID_CHARREF:
  356. errmsg = "CharRef: invalid value";
  357. break;
  358. case XML_ERR_INTERNAL_ERROR:
  359. errmsg = "internal error";
  360. break;
  361. case XML_ERR_PEREF_AT_EOF:
  362. errmsg = "PEReference at end of document";
  363. break;
  364. case XML_ERR_PEREF_IN_PROLOG:
  365. errmsg = "PEReference in prolog";
  366. break;
  367. case XML_ERR_PEREF_IN_EPILOG:
  368. errmsg = "PEReference in epilog";
  369. break;
  370. case XML_ERR_PEREF_NO_NAME:
  371. errmsg = "PEReference: no name";
  372. break;
  373. case XML_ERR_PEREF_SEMICOL_MISSING:
  374. errmsg = "PEReference: expecting ';'";
  375. break;
  376. case XML_ERR_ENTITY_LOOP:
  377. errmsg = "Detected an entity reference loop";
  378. break;
  379. case XML_ERR_ENTITY_NOT_STARTED:
  380. errmsg = "EntityValue: \" or ' expected";
  381. break;
  382. case XML_ERR_ENTITY_PE_INTERNAL:
  383. errmsg = "PEReferences forbidden in internal subset";
  384. break;
  385. case XML_ERR_ENTITY_NOT_FINISHED:
  386. errmsg = "EntityValue: \" or ' expected";
  387. break;
  388. case XML_ERR_ATTRIBUTE_NOT_STARTED:
  389. errmsg = "AttValue: \" or ' expected";
  390. break;
  391. case XML_ERR_LT_IN_ATTRIBUTE:
  392. errmsg = "Unescaped '<' not allowed in attributes values";
  393. break;
  394. case XML_ERR_LITERAL_NOT_STARTED:
  395. errmsg = "SystemLiteral \" or ' expected";
  396. break;
  397. case XML_ERR_LITERAL_NOT_FINISHED:
  398. errmsg = "Unfinished System or Public ID \" or ' expected";
  399. break;
  400. case XML_ERR_MISPLACED_CDATA_END:
  401. errmsg = "Sequence ']]>' not allowed in content";
  402. break;
  403. case XML_ERR_URI_REQUIRED:
  404. errmsg = "SYSTEM or PUBLIC, the URI is missing";
  405. break;
  406. case XML_ERR_PUBID_REQUIRED:
  407. errmsg = "PUBLIC, the Public Identifier is missing";
  408. break;
  409. case XML_ERR_HYPHEN_IN_COMMENT:
  410. errmsg = "Comment must not contain '--' (double-hyphen)";
  411. break;
  412. case XML_ERR_PI_NOT_STARTED:
  413. errmsg = "xmlParsePI : no target name";
  414. break;
  415. case XML_ERR_RESERVED_XML_NAME:
  416. errmsg = "Invalid PI name";
  417. break;
  418. case XML_ERR_NOTATION_NOT_STARTED:
  419. errmsg = "NOTATION: Name expected here";
  420. break;
  421. case XML_ERR_NOTATION_NOT_FINISHED:
  422. errmsg = "'>' required to close NOTATION declaration";
  423. break;
  424. case XML_ERR_VALUE_REQUIRED:
  425. errmsg = "Entity value required";
  426. break;
  427. case XML_ERR_URI_FRAGMENT:
  428. errmsg = "Fragment not allowed";
  429. break;
  430. case XML_ERR_ATTLIST_NOT_STARTED:
  431. errmsg = "'(' required to start ATTLIST enumeration";
  432. break;
  433. case XML_ERR_NMTOKEN_REQUIRED:
  434. errmsg = "NmToken expected in ATTLIST enumeration";
  435. break;
  436. case XML_ERR_ATTLIST_NOT_FINISHED:
  437. errmsg = "')' required to finish ATTLIST enumeration";
  438. break;
  439. case XML_ERR_MIXED_NOT_STARTED:
  440. errmsg = "MixedContentDecl : '|' or ')*' expected";
  441. break;
  442. case XML_ERR_PCDATA_REQUIRED:
  443. errmsg = "MixedContentDecl : '#PCDATA' expected";
  444. break;
  445. case XML_ERR_ELEMCONTENT_NOT_STARTED:
  446. errmsg = "ContentDecl : Name or '(' expected";
  447. break;
  448. case XML_ERR_ELEMCONTENT_NOT_FINISHED:
  449. errmsg = "ContentDecl : ',' '|' or ')' expected";
  450. break;
  451. case XML_ERR_PEREF_IN_INT_SUBSET:
  452. errmsg =
  453. "PEReference: forbidden within markup decl in internal subset";
  454. break;
  455. case XML_ERR_GT_REQUIRED:
  456. errmsg = "expected '>'";
  457. break;
  458. case XML_ERR_CONDSEC_INVALID:
  459. errmsg = "XML conditional section '[' expected";
  460. break;
  461. case XML_ERR_EXT_SUBSET_NOT_FINISHED:
  462. errmsg = "Content error in the external subset";
  463. break;
  464. case XML_ERR_CONDSEC_INVALID_KEYWORD:
  465. errmsg =
  466. "conditional section INCLUDE or IGNORE keyword expected";
  467. break;
  468. case XML_ERR_CONDSEC_NOT_FINISHED:
  469. errmsg = "XML conditional section not closed";
  470. break;
  471. case XML_ERR_XMLDECL_NOT_STARTED:
  472. errmsg = "Text declaration '<?xml' required";
  473. break;
  474. case XML_ERR_XMLDECL_NOT_FINISHED:
  475. errmsg = "parsing XML declaration: '?>' expected";
  476. break;
  477. case XML_ERR_EXT_ENTITY_STANDALONE:
  478. errmsg = "external parsed entities cannot be standalone";
  479. break;
  480. case XML_ERR_ENTITYREF_SEMICOL_MISSING:
  481. errmsg = "EntityRef: expecting ';'";
  482. break;
  483. case XML_ERR_DOCTYPE_NOT_FINISHED:
  484. errmsg = "DOCTYPE improperly terminated";
  485. break;
  486. case XML_ERR_LTSLASH_REQUIRED:
  487. errmsg = "EndTag: '</' not found";
  488. break;
  489. case XML_ERR_EQUAL_REQUIRED:
  490. errmsg = "expected '='";
  491. break;
  492. case XML_ERR_STRING_NOT_CLOSED:
  493. errmsg = "String not closed expecting \" or '";
  494. break;
  495. case XML_ERR_STRING_NOT_STARTED:
  496. errmsg = "String not started expecting ' or \"";
  497. break;
  498. case XML_ERR_ENCODING_NAME:
  499. errmsg = "Invalid XML encoding name";
  500. break;
  501. case XML_ERR_STANDALONE_VALUE:
  502. errmsg = "standalone accepts only 'yes' or 'no'";
  503. break;
  504. case XML_ERR_DOCUMENT_EMPTY:
  505. errmsg = "Document is empty";
  506. break;
  507. case XML_ERR_DOCUMENT_END:
  508. errmsg = "Extra content at the end of the document";
  509. break;
  510. case XML_ERR_NOT_WELL_BALANCED:
  511. errmsg = "chunk is not well balanced";
  512. break;
  513. case XML_ERR_EXTRA_CONTENT:
  514. errmsg = "extra content at the end of well balanced chunk";
  515. break;
  516. case XML_ERR_VERSION_MISSING:
  517. errmsg = "Malformed declaration expecting version";
  518. break;
  519. case XML_ERR_NAME_TOO_LONG:
  520. errmsg = "Name too long use XML_PARSE_HUGE option";
  521. break;
  522. #if 0
  523. case:
  524. errmsg = "";
  525. break;
  526. #endif
  527. default:
  528. errmsg = "Unregistered error message";
  529. }
  530. if (ctxt != NULL)
  531. ctxt->errNo = error;
  532. if (info == NULL) {
  533. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
  534. XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
  535. errmsg);
  536. } else {
  537. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
  538. XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
  539. errmsg, info);
  540. }
  541. if (ctxt != NULL) {
  542. ctxt->wellFormed = 0;
  543. if (ctxt->recovery == 0)
  544. ctxt->disableSAX = 1;
  545. }
  546. }
  547. /**
  548. * xmlFatalErrMsg:
  549. * @ctxt: an XML parser context
  550. * @error: the error number
  551. * @msg: the error message
  552. *
  553. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  554. */
  555. static void LIBXML_ATTR_FORMAT(3,0)
  556. xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  557. const char *msg)
  558. {
  559. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  560. (ctxt->instate == XML_PARSER_EOF))
  561. return;
  562. if (ctxt != NULL)
  563. ctxt->errNo = error;
  564. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
  565. XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
  566. if (ctxt != NULL) {
  567. ctxt->wellFormed = 0;
  568. if (ctxt->recovery == 0)
  569. ctxt->disableSAX = 1;
  570. }
  571. }
  572. /**
  573. * xmlWarningMsg:
  574. * @ctxt: an XML parser context
  575. * @error: the error number
  576. * @msg: the error message
  577. * @str1: extra data
  578. * @str2: extra data
  579. *
  580. * Handle a warning.
  581. */
  582. static void LIBXML_ATTR_FORMAT(3,0)
  583. xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  584. const char *msg, const xmlChar *str1, const xmlChar *str2)
  585. {
  586. xmlStructuredErrorFunc schannel = NULL;
  587. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  588. (ctxt->instate == XML_PARSER_EOF))
  589. return;
  590. if ((ctxt != NULL) && (ctxt->sax != NULL) &&
  591. (ctxt->sax->initialized == XML_SAX2_MAGIC))
  592. schannel = ctxt->sax->serror;
  593. if (ctxt != NULL) {
  594. __xmlRaiseError(schannel,
  595. (ctxt->sax) ? ctxt->sax->warning : NULL,
  596. ctxt->userData,
  597. ctxt, NULL, XML_FROM_PARSER, error,
  598. XML_ERR_WARNING, NULL, 0,
  599. (const char *) str1, (const char *) str2, NULL, 0, 0,
  600. msg, (const char *) str1, (const char *) str2);
  601. } else {
  602. __xmlRaiseError(schannel, NULL, NULL,
  603. ctxt, NULL, XML_FROM_PARSER, error,
  604. XML_ERR_WARNING, NULL, 0,
  605. (const char *) str1, (const char *) str2, NULL, 0, 0,
  606. msg, (const char *) str1, (const char *) str2);
  607. }
  608. }
  609. /**
  610. * xmlValidityError:
  611. * @ctxt: an XML parser context
  612. * @error: the error number
  613. * @msg: the error message
  614. * @str1: extra data
  615. *
  616. * Handle a validity error.
  617. */
  618. static void LIBXML_ATTR_FORMAT(3,0)
  619. xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  620. const char *msg, const xmlChar *str1, const xmlChar *str2)
  621. {
  622. xmlStructuredErrorFunc schannel = NULL;
  623. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  624. (ctxt->instate == XML_PARSER_EOF))
  625. return;
  626. if (ctxt != NULL) {
  627. ctxt->errNo = error;
  628. if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
  629. schannel = ctxt->sax->serror;
  630. }
  631. if (ctxt != NULL) {
  632. __xmlRaiseError(schannel,
  633. ctxt->vctxt.error, ctxt->vctxt.userData,
  634. ctxt, NULL, XML_FROM_DTD, error,
  635. XML_ERR_ERROR, NULL, 0, (const char *) str1,
  636. (const char *) str2, NULL, 0, 0,
  637. msg, (const char *) str1, (const char *) str2);
  638. ctxt->valid = 0;
  639. } else {
  640. __xmlRaiseError(schannel, NULL, NULL,
  641. ctxt, NULL, XML_FROM_DTD, error,
  642. XML_ERR_ERROR, NULL, 0, (const char *) str1,
  643. (const char *) str2, NULL, 0, 0,
  644. msg, (const char *) str1, (const char *) str2);
  645. }
  646. }
  647. /**
  648. * xmlFatalErrMsgInt:
  649. * @ctxt: an XML parser context
  650. * @error: the error number
  651. * @msg: the error message
  652. * @val: an integer value
  653. *
  654. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  655. */
  656. static void LIBXML_ATTR_FORMAT(3,0)
  657. xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  658. const char *msg, int val)
  659. {
  660. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  661. (ctxt->instate == XML_PARSER_EOF))
  662. return;
  663. if (ctxt != NULL)
  664. ctxt->errNo = error;
  665. __xmlRaiseError(NULL, NULL, NULL,
  666. ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
  667. NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
  668. if (ctxt != NULL) {
  669. ctxt->wellFormed = 0;
  670. if (ctxt->recovery == 0)
  671. ctxt->disableSAX = 1;
  672. }
  673. }
  674. /**
  675. * xmlFatalErrMsgStrIntStr:
  676. * @ctxt: an XML parser context
  677. * @error: the error number
  678. * @msg: the error message
  679. * @str1: an string info
  680. * @val: an integer value
  681. * @str2: an string info
  682. *
  683. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  684. */
  685. static void LIBXML_ATTR_FORMAT(3,0)
  686. xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  687. const char *msg, const xmlChar *str1, int val,
  688. const xmlChar *str2)
  689. {
  690. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  691. (ctxt->instate == XML_PARSER_EOF))
  692. return;
  693. if (ctxt != NULL)
  694. ctxt->errNo = error;
  695. __xmlRaiseError(NULL, NULL, NULL,
  696. ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
  697. NULL, 0, (const char *) str1, (const char *) str2,
  698. NULL, val, 0, msg, str1, val, str2);
  699. if (ctxt != NULL) {
  700. ctxt->wellFormed = 0;
  701. if (ctxt->recovery == 0)
  702. ctxt->disableSAX = 1;
  703. }
  704. }
  705. /**
  706. * xmlFatalErrMsgStr:
  707. * @ctxt: an XML parser context
  708. * @error: the error number
  709. * @msg: the error message
  710. * @val: a string value
  711. *
  712. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  713. */
  714. static void LIBXML_ATTR_FORMAT(3,0)
  715. xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  716. const char *msg, const xmlChar * val)
  717. {
  718. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  719. (ctxt->instate == XML_PARSER_EOF))
  720. return;
  721. if (ctxt != NULL)
  722. ctxt->errNo = error;
  723. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
  724. XML_FROM_PARSER, error, XML_ERR_FATAL,
  725. NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
  726. val);
  727. if (ctxt != NULL) {
  728. ctxt->wellFormed = 0;
  729. if (ctxt->recovery == 0)
  730. ctxt->disableSAX = 1;
  731. }
  732. }
  733. /**
  734. * xmlErrMsgStr:
  735. * @ctxt: an XML parser context
  736. * @error: the error number
  737. * @msg: the error message
  738. * @val: a string value
  739. *
  740. * Handle a non fatal parser error
  741. */
  742. static void LIBXML_ATTR_FORMAT(3,0)
  743. xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  744. const char *msg, const xmlChar * val)
  745. {
  746. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  747. (ctxt->instate == XML_PARSER_EOF))
  748. return;
  749. if (ctxt != NULL)
  750. ctxt->errNo = error;
  751. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
  752. XML_FROM_PARSER, error, XML_ERR_ERROR,
  753. NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
  754. val);
  755. }
  756. /**
  757. * xmlNsErr:
  758. * @ctxt: an XML parser context
  759. * @error: the error number
  760. * @msg: the message
  761. * @info1: extra information string
  762. * @info2: extra information string
  763. *
  764. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  765. */
  766. static void LIBXML_ATTR_FORMAT(3,0)
  767. xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  768. const char *msg,
  769. const xmlChar * info1, const xmlChar * info2,
  770. const xmlChar * info3)
  771. {
  772. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  773. (ctxt->instate == XML_PARSER_EOF))
  774. return;
  775. if (ctxt != NULL)
  776. ctxt->errNo = error;
  777. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
  778. XML_ERR_ERROR, NULL, 0, (const char *) info1,
  779. (const char *) info2, (const char *) info3, 0, 0, msg,
  780. info1, info2, info3);
  781. if (ctxt != NULL)
  782. ctxt->nsWellFormed = 0;
  783. }
  784. /**
  785. * xmlNsWarn
  786. * @ctxt: an XML parser context
  787. * @error: the error number
  788. * @msg: the message
  789. * @info1: extra information string
  790. * @info2: extra information string
  791. *
  792. * Handle a namespace warning error
  793. */
  794. static void LIBXML_ATTR_FORMAT(3,0)
  795. xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  796. const char *msg,
  797. const xmlChar * info1, const xmlChar * info2,
  798. const xmlChar * info3)
  799. {
  800. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  801. (ctxt->instate == XML_PARSER_EOF))
  802. return;
  803. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
  804. XML_ERR_WARNING, NULL, 0, (const char *) info1,
  805. (const char *) info2, (const char *) info3, 0, 0, msg,
  806. info1, info2, info3);
  807. }
  808. /************************************************************************
  809. * *
  810. * Library wide options *
  811. * *
  812. ************************************************************************/
  813. /**
  814. * xmlHasFeature:
  815. * @feature: the feature to be examined
  816. *
  817. * Examines if the library has been compiled with a given feature.
  818. *
  819. * Returns a non-zero value if the feature exist, otherwise zero.
  820. * Returns zero (0) if the feature does not exist or an unknown
  821. * unknown feature is requested, non-zero otherwise.
  822. */
  823. int
  824. xmlHasFeature(xmlFeature feature)
  825. {
  826. switch (feature) {
  827. case XML_WITH_THREAD:
  828. #ifdef LIBXML_THREAD_ENABLED
  829. return(1);
  830. #else
  831. return(0);
  832. #endif
  833. case XML_WITH_TREE:
  834. #ifdef LIBXML_TREE_ENABLED
  835. return(1);
  836. #else
  837. return(0);
  838. #endif
  839. case XML_WITH_OUTPUT:
  840. #ifdef LIBXML_OUTPUT_ENABLED
  841. return(1);
  842. #else
  843. return(0);
  844. #endif
  845. case XML_WITH_PUSH:
  846. #ifdef LIBXML_PUSH_ENABLED
  847. return(1);
  848. #else
  849. return(0);
  850. #endif
  851. case XML_WITH_READER:
  852. #ifdef LIBXML_READER_ENABLED
  853. return(1);
  854. #else
  855. return(0);
  856. #endif
  857. case XML_WITH_PATTERN:
  858. #ifdef LIBXML_PATTERN_ENABLED
  859. return(1);
  860. #else
  861. return(0);
  862. #endif
  863. case XML_WITH_WRITER:
  864. #ifdef LIBXML_WRITER_ENABLED
  865. return(1);
  866. #else
  867. return(0);
  868. #endif
  869. case XML_WITH_SAX1:
  870. #ifdef LIBXML_SAX1_ENABLED
  871. return(1);
  872. #else
  873. return(0);
  874. #endif
  875. case XML_WITH_FTP:
  876. #ifdef LIBXML_FTP_ENABLED
  877. return(1);
  878. #else
  879. return(0);
  880. #endif
  881. case XML_WITH_HTTP:
  882. #ifdef LIBXML_HTTP_ENABLED
  883. return(1);
  884. #else
  885. return(0);
  886. #endif
  887. case XML_WITH_VALID:
  888. #ifdef LIBXML_VALID_ENABLED
  889. return(1);
  890. #else
  891. return(0);
  892. #endif
  893. case XML_WITH_HTML:
  894. #ifdef LIBXML_HTML_ENABLED
  895. return(1);
  896. #else
  897. return(0);
  898. #endif
  899. case XML_WITH_LEGACY:
  900. #ifdef LIBXML_LEGACY_ENABLED
  901. return(1);
  902. #else
  903. return(0);
  904. #endif
  905. case XML_WITH_C14N:
  906. #ifdef LIBXML_C14N_ENABLED
  907. return(1);
  908. #else
  909. return(0);
  910. #endif
  911. case XML_WITH_CATALOG:
  912. #ifdef LIBXML_CATALOG_ENABLED
  913. return(1);
  914. #else
  915. return(0);
  916. #endif
  917. case XML_WITH_XPATH:
  918. #ifdef LIBXML_XPATH_ENABLED
  919. return(1);
  920. #else
  921. return(0);
  922. #endif
  923. case XML_WITH_XPTR:
  924. #ifdef LIBXML_XPTR_ENABLED
  925. return(1);
  926. #else
  927. return(0);
  928. #endif
  929. case XML_WITH_XINCLUDE:
  930. #ifdef LIBXML_XINCLUDE_ENABLED
  931. return(1);
  932. #else
  933. return(0);
  934. #endif
  935. case XML_WITH_ICONV:
  936. #ifdef LIBXML_ICONV_ENABLED
  937. return(1);
  938. #else
  939. return(0);
  940. #endif
  941. case XML_WITH_ISO8859X:
  942. #ifdef LIBXML_ISO8859X_ENABLED
  943. return(1);
  944. #else
  945. return(0);
  946. #endif
  947. case XML_WITH_UNICODE:
  948. #ifdef LIBXML_UNICODE_ENABLED
  949. return(1);
  950. #else
  951. return(0);
  952. #endif
  953. case XML_WITH_REGEXP:
  954. #ifdef LIBXML_REGEXP_ENABLED
  955. return(1);
  956. #else
  957. return(0);
  958. #endif
  959. case XML_WITH_AUTOMATA:
  960. #ifdef LIBXML_AUTOMATA_ENABLED
  961. return(1);
  962. #else
  963. return(0);
  964. #endif
  965. case XML_WITH_EXPR:
  966. #ifdef LIBXML_EXPR_ENABLED
  967. return(1);
  968. #else
  969. return(0);
  970. #endif
  971. case XML_WITH_SCHEMAS:
  972. #ifdef LIBXML_SCHEMAS_ENABLED
  973. return(1);
  974. #else
  975. return(0);
  976. #endif
  977. case XML_WITH_SCHEMATRON:
  978. #ifdef LIBXML_SCHEMATRON_ENABLED
  979. return(1);
  980. #else
  981. return(0);
  982. #endif
  983. case XML_WITH_MODULES:
  984. #ifdef LIBXML_MODULES_ENABLED
  985. return(1);
  986. #else
  987. return(0);
  988. #endif
  989. case XML_WITH_DEBUG:
  990. #ifdef LIBXML_DEBUG_ENABLED
  991. return(1);
  992. #else
  993. return(0);
  994. #endif
  995. case XML_WITH_DEBUG_MEM:
  996. #ifdef DEBUG_MEMORY_LOCATION
  997. return(1);
  998. #else
  999. return(0);
  1000. #endif
  1001. case XML_WITH_DEBUG_RUN:
  1002. #ifdef LIBXML_DEBUG_RUNTIME
  1003. return(1);
  1004. #else
  1005. return(0);
  1006. #endif
  1007. case XML_WITH_ZLIB:
  1008. #ifdef LIBXML_ZLIB_ENABLED
  1009. return(1);
  1010. #else
  1011. return(0);
  1012. #endif
  1013. case XML_WITH_LZMA:
  1014. #ifdef LIBXML_LZMA_ENABLED
  1015. return(1);
  1016. #else
  1017. return(0);
  1018. #endif
  1019. case XML_WITH_ICU:
  1020. #ifdef LIBXML_ICU_ENABLED
  1021. return(1);
  1022. #else
  1023. return(0);
  1024. #endif
  1025. default:
  1026. break;
  1027. }
  1028. return(0);
  1029. }
  1030. /************************************************************************
  1031. * *
  1032. * SAX2 defaulted attributes handling *
  1033. * *
  1034. ************************************************************************/
  1035. /**
  1036. * xmlDetectSAX2:
  1037. * @ctxt: an XML parser context
  1038. *
  1039. * Do the SAX2 detection and specific initialization
  1040. */
  1041. static void
  1042. xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
  1043. xmlSAXHandlerPtr sax;
  1044. if (ctxt == NULL) return;
  1045. sax = ctxt->sax;
  1046. #ifdef LIBXML_SAX1_ENABLED
  1047. if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
  1048. ((sax->startElementNs != NULL) ||
  1049. (sax->endElementNs != NULL) ||
  1050. ((sax->startElement == NULL) && (sax->endElement == NULL))))
  1051. ctxt->sax2 = 1;
  1052. #else
  1053. ctxt->sax2 = 1;
  1054. #endif /* LIBXML_SAX1_ENABLED */
  1055. ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
  1056. ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
  1057. ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
  1058. if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
  1059. (ctxt->str_xml_ns == NULL)) {
  1060. xmlErrMemory(ctxt, NULL);
  1061. }
  1062. }
  1063. typedef struct _xmlDefAttrs xmlDefAttrs;
  1064. typedef xmlDefAttrs *xmlDefAttrsPtr;
  1065. struct _xmlDefAttrs {
  1066. int nbAttrs; /* number of defaulted attributes on that element */
  1067. int maxAttrs; /* the size of the array */
  1068. #if __STDC_VERSION__ >= 199901L
  1069. /* Using a C99 flexible array member avoids UBSan errors. */
  1070. const xmlChar *values[]; /* array of localname/prefix/values/external */
  1071. #else
  1072. const xmlChar *values[5];
  1073. #endif
  1074. };
  1075. /**
  1076. * xmlAttrNormalizeSpace:
  1077. * @src: the source string
  1078. * @dst: the target string
  1079. *
  1080. * Normalize the space in non CDATA attribute values:
  1081. * If the attribute type is not CDATA, then the XML processor MUST further
  1082. * process the normalized attribute value by discarding any leading and
  1083. * trailing space (#x20) characters, and by replacing sequences of space
  1084. * (#x20) characters by a single space (#x20) character.
  1085. * Note that the size of dst need to be at least src, and if one doesn't need
  1086. * to preserve dst (and it doesn't come from a dictionary or read-only) then
  1087. * passing src as dst is just fine.
  1088. *
  1089. * Returns a pointer to the normalized value (dst) or NULL if no conversion
  1090. * is needed.
  1091. */
  1092. static xmlChar *
  1093. xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
  1094. {
  1095. if ((src == NULL) || (dst == NULL))
  1096. return(NULL);
  1097. while (*src == 0x20) src++;
  1098. while (*src != 0) {
  1099. if (*src == 0x20) {
  1100. while (*src == 0x20) src++;
  1101. if (*src != 0)
  1102. *dst++ = 0x20;
  1103. } else {
  1104. *dst++ = *src++;
  1105. }
  1106. }
  1107. *dst = 0;
  1108. if (dst == src)
  1109. return(NULL);
  1110. return(dst);
  1111. }
  1112. /**
  1113. * xmlAttrNormalizeSpace2:
  1114. * @src: the source string
  1115. *
  1116. * Normalize the space in non CDATA attribute values, a slightly more complex
  1117. * front end to avoid allocation problems when running on attribute values
  1118. * coming from the input.
  1119. *
  1120. * Returns a pointer to the normalized value (dst) or NULL if no conversion
  1121. * is needed.
  1122. */
  1123. static const xmlChar *
  1124. xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
  1125. {
  1126. int i;
  1127. int remove_head = 0;
  1128. int need_realloc = 0;
  1129. const xmlChar *cur;
  1130. if ((ctxt == NULL) || (src == NULL) || (len == NULL))
  1131. return(NULL);
  1132. i = *len;
  1133. if (i <= 0)
  1134. return(NULL);
  1135. cur = src;
  1136. while (*cur == 0x20) {
  1137. cur++;
  1138. remove_head++;
  1139. }
  1140. while (*cur != 0) {
  1141. if (*cur == 0x20) {
  1142. cur++;
  1143. if ((*cur == 0x20) || (*cur == 0)) {
  1144. need_realloc = 1;
  1145. break;
  1146. }
  1147. } else
  1148. cur++;
  1149. }
  1150. if (need_realloc) {
  1151. xmlChar *ret;
  1152. ret = xmlStrndup(src + remove_head, i - remove_head + 1);
  1153. if (ret == NULL) {
  1154. xmlErrMemory(ctxt, NULL);
  1155. return(NULL);
  1156. }
  1157. xmlAttrNormalizeSpace(ret, ret);
  1158. *len = (int) strlen((const char *)ret);
  1159. return(ret);
  1160. } else if (remove_head) {
  1161. *len -= remove_head;
  1162. memmove(src, src + remove_head, 1 + *len);
  1163. return(src);
  1164. }
  1165. return(NULL);
  1166. }
  1167. /**
  1168. * xmlAddDefAttrs:
  1169. * @ctxt: an XML parser context
  1170. * @fullname: the element fullname
  1171. * @fullattr: the attribute fullname
  1172. * @value: the attribute value
  1173. *
  1174. * Add a defaulted attribute for an element
  1175. */
  1176. static void
  1177. xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
  1178. const xmlChar *fullname,
  1179. const xmlChar *fullattr,
  1180. const xmlChar *value) {
  1181. xmlDefAttrsPtr defaults;
  1182. int len;
  1183. const xmlChar *name;
  1184. const xmlChar *prefix;
  1185. /*
  1186. * Allows to detect attribute redefinitions
  1187. */
  1188. if (ctxt->attsSpecial != NULL) {
  1189. if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
  1190. return;
  1191. }
  1192. if (ctxt->attsDefault == NULL) {
  1193. ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
  1194. if (ctxt->attsDefault == NULL)
  1195. goto mem_error;
  1196. }
  1197. /*
  1198. * split the element name into prefix:localname , the string found
  1199. * are within the DTD and then not associated to namespace names.
  1200. */
  1201. name = xmlSplitQName3(fullname, &len);
  1202. if (name == NULL) {
  1203. name = xmlDictLookup(ctxt->dict, fullname, -1);
  1204. prefix = NULL;
  1205. } else {
  1206. name = xmlDictLookup(ctxt->dict, name, -1);
  1207. prefix = xmlDictLookup(ctxt->dict, fullname, len);
  1208. }
  1209. /*
  1210. * make sure there is some storage
  1211. */
  1212. defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
  1213. if (defaults == NULL) {
  1214. defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
  1215. (4 * 5) * sizeof(const xmlChar *));
  1216. if (defaults == NULL)
  1217. goto mem_error;
  1218. defaults->nbAttrs = 0;
  1219. defaults->maxAttrs = 4;
  1220. if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
  1221. defaults, NULL) < 0) {
  1222. xmlFree(defaults);
  1223. goto mem_error;
  1224. }
  1225. } else if (defaults->nbAttrs >= defaults->maxAttrs) {
  1226. xmlDefAttrsPtr temp;
  1227. temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
  1228. (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
  1229. if (temp == NULL)
  1230. goto mem_error;
  1231. defaults = temp;
  1232. defaults->maxAttrs *= 2;
  1233. if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
  1234. defaults, NULL) < 0) {
  1235. xmlFree(defaults);
  1236. goto mem_error;
  1237. }
  1238. }
  1239. /*
  1240. * Split the element name into prefix:localname , the string found
  1241. * are within the DTD and hen not associated to namespace names.
  1242. */
  1243. name = xmlSplitQName3(fullattr, &len);
  1244. if (name == NULL) {
  1245. name = xmlDictLookup(ctxt->dict, fullattr, -1);
  1246. prefix = NULL;
  1247. } else {
  1248. name = xmlDictLookup(ctxt->dict, name, -1);
  1249. prefix = xmlDictLookup(ctxt->dict, fullattr, len);
  1250. }
  1251. defaults->values[5 * defaults->nbAttrs] = name;
  1252. defaults->values[5 * defaults->nbAttrs + 1] = prefix;
  1253. /* intern the string and precompute the end */
  1254. len = xmlStrlen(value);
  1255. value = xmlDictLookup(ctxt->dict, value, len);
  1256. defaults->values[5 * defaults->nbAttrs + 2] = value;
  1257. defaults->values[5 * defaults->nbAttrs + 3] = value + len;
  1258. if (ctxt->external)
  1259. defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
  1260. else
  1261. defaults->values[5 * defaults->nbAttrs + 4] = NULL;
  1262. defaults->nbAttrs++;
  1263. return;
  1264. mem_error:
  1265. xmlErrMemory(ctxt, NULL);
  1266. return;
  1267. }
  1268. /**
  1269. * xmlAddSpecialAttr:
  1270. * @ctxt: an XML parser context
  1271. * @fullname: the element fullname
  1272. * @fullattr: the attribute fullname
  1273. * @type: the attribute type
  1274. *
  1275. * Register this attribute type
  1276. */
  1277. static void
  1278. xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
  1279. const xmlChar *fullname,
  1280. const xmlChar *fullattr,
  1281. int type)
  1282. {
  1283. if (ctxt->attsSpecial == NULL) {
  1284. ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
  1285. if (ctxt->attsSpecial == NULL)
  1286. goto mem_error;
  1287. }
  1288. if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
  1289. return;
  1290. xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
  1291. (void *) (ptrdiff_t) type);
  1292. return;
  1293. mem_error:
  1294. xmlErrMemory(ctxt, NULL);
  1295. return;
  1296. }
  1297. /**
  1298. * xmlCleanSpecialAttrCallback:
  1299. *
  1300. * Removes CDATA attributes from the special attribute table
  1301. */
  1302. static void
  1303. xmlCleanSpecialAttrCallback(void *payload, void *data,
  1304. const xmlChar *fullname, const xmlChar *fullattr,
  1305. const xmlChar *unused ATTRIBUTE_UNUSED) {
  1306. xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
  1307. if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
  1308. xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
  1309. }
  1310. }
  1311. /**
  1312. * xmlCleanSpecialAttr:
  1313. * @ctxt: an XML parser context
  1314. *
  1315. * Trim the list of attributes defined to remove all those of type
  1316. * CDATA as they are not special. This call should be done when finishing
  1317. * to parse the DTD and before starting to parse the document root.
  1318. */
  1319. static void
  1320. xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
  1321. {
  1322. if (ctxt->attsSpecial == NULL)
  1323. return;
  1324. xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
  1325. if (xmlHashSize(ctxt->attsSpecial) == 0) {
  1326. xmlHashFree(ctxt->attsSpecial, NULL);
  1327. ctxt->attsSpecial = NULL;
  1328. }
  1329. return;
  1330. }
  1331. /**
  1332. * xmlCheckLanguageID:
  1333. * @lang: pointer to the string value
  1334. *
  1335. * Checks that the value conforms to the LanguageID production:
  1336. *
  1337. * NOTE: this is somewhat deprecated, those productions were removed from
  1338. * the XML Second edition.
  1339. *
  1340. * [33] LanguageID ::= Langcode ('-' Subcode)*
  1341. * [34] Langcode ::= ISO639Code | IanaCode | UserCode
  1342. * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
  1343. * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
  1344. * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
  1345. * [38] Subcode ::= ([a-z] | [A-Z])+
  1346. *
  1347. * The current REC reference the successors of RFC 1766, currently 5646
  1348. *
  1349. * http://www.rfc-editor.org/rfc/rfc5646.txt
  1350. * langtag = language
  1351. * ["-" script]
  1352. * ["-" region]
  1353. * *("-" variant)
  1354. * *("-" extension)
  1355. * ["-" privateuse]
  1356. * language = 2*3ALPHA ; shortest ISO 639 code
  1357. * ["-" extlang] ; sometimes followed by
  1358. * ; extended language subtags
  1359. * / 4ALPHA ; or reserved for future use
  1360. * / 5*8ALPHA ; or registered language subtag
  1361. *
  1362. * extlang = 3ALPHA ; selected ISO 639 codes
  1363. * *2("-" 3ALPHA) ; permanently reserved
  1364. *
  1365. * script = 4ALPHA ; ISO 15924 code
  1366. *
  1367. * region = 2ALPHA ; ISO 3166-1 code
  1368. * / 3DIGIT ; UN M.49 code
  1369. *
  1370. * variant = 5*8alphanum ; registered variants
  1371. * / (DIGIT 3alphanum)
  1372. *
  1373. * extension = singleton 1*("-" (2*8alphanum))
  1374. *
  1375. * ; Single alphanumerics
  1376. * ; "x" reserved for private use
  1377. * singleton = DIGIT ; 0 - 9
  1378. * / %x41-57 ; A - W
  1379. * / %x59-5A ; Y - Z
  1380. * / %x61-77 ; a - w
  1381. * / %x79-7A ; y - z
  1382. *
  1383. * it sounds right to still allow Irregular i-xxx IANA and user codes too
  1384. * The parser below doesn't try to cope with extension or privateuse
  1385. * that could be added but that's not interoperable anyway
  1386. *
  1387. * Returns 1 if correct 0 otherwise
  1388. **/
  1389. int
  1390. xmlCheckLanguageID(const xmlChar * lang)
  1391. {
  1392. const xmlChar *cur = lang, *nxt;
  1393. if (cur == NULL)
  1394. return (0);
  1395. if (((cur[0] == 'i') && (cur[1] == '-')) ||
  1396. ((cur[0] == 'I') && (cur[1] == '-')) ||
  1397. ((cur[0] == 'x') && (cur[1] == '-')) ||
  1398. ((cur[0] == 'X') && (cur[1] == '-'))) {
  1399. /*
  1400. * Still allow IANA code and user code which were coming
  1401. * from the previous version of the XML-1.0 specification
  1402. * it's deprecated but we should not fail
  1403. */
  1404. cur += 2;
  1405. while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
  1406. ((cur[0] >= 'a') && (cur[0] <= 'z')))
  1407. cur++;
  1408. return(cur[0] == 0);
  1409. }
  1410. nxt = cur;
  1411. while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
  1412. ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
  1413. nxt++;
  1414. if (nxt - cur >= 4) {
  1415. /*
  1416. * Reserved
  1417. */
  1418. if ((nxt - cur > 8) || (nxt[0] != 0))
  1419. return(0);
  1420. return(1);
  1421. }
  1422. if (nxt - cur < 2)
  1423. return(0);
  1424. /* we got an ISO 639 code */
  1425. if (nxt[0] == 0)
  1426. return(1);
  1427. if (nxt[0] != '-')
  1428. return(0);
  1429. nxt++;
  1430. cur = nxt;
  1431. /* now we can have extlang or script or region or variant */
  1432. if ((nxt[0] >= '0') && (nxt[0] <= '9'))
  1433. goto region_m49;
  1434. while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
  1435. ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
  1436. nxt++;
  1437. if (nxt - cur == 4)
  1438. goto script;
  1439. if (nxt - cur == 2)
  1440. goto region;
  1441. if ((nxt - cur >= 5) && (nxt - cur <= 8))
  1442. goto variant;
  1443. if (nxt - cur != 3)
  1444. return(0);
  1445. /* we parsed an extlang */
  1446. if (nxt[0] == 0)
  1447. return(1);
  1448. if (nxt[0] != '-')
  1449. return(0);
  1450. nxt++;
  1451. cur = nxt;
  1452. /* now we can have script or region or variant */
  1453. if ((nxt[0] >= '0') && (nxt[0] <= '9'))
  1454. goto region_m49;
  1455. while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
  1456. ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
  1457. nxt++;
  1458. if (nxt - cur == 2)
  1459. goto region;
  1460. if ((nxt - cur >= 5) && (nxt - cur <= 8))
  1461. goto variant;
  1462. if (nxt - cur != 4)
  1463. return(0);
  1464. /* we parsed a script */
  1465. script:
  1466. if (nxt[0] == 0)
  1467. return(1);
  1468. if (nxt[0] != '-')
  1469. return(0);
  1470. nxt++;
  1471. cur = nxt;
  1472. /* now we can have region or variant */
  1473. if ((nxt[0] >= '0') && (nxt[0] <= '9'))
  1474. goto region_m49;
  1475. while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
  1476. ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
  1477. nxt++;
  1478. if ((nxt - cur >= 5) && (nxt - cur <= 8))
  1479. goto variant;
  1480. if (nxt - cur != 2)
  1481. return(0);
  1482. /* we parsed a region */
  1483. region:
  1484. if (nxt[0] == 0)
  1485. return(1);
  1486. if (nxt[0] != '-')
  1487. return(0);
  1488. nxt++;
  1489. cur = nxt;
  1490. /* now we can just have a variant */
  1491. while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
  1492. ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
  1493. nxt++;
  1494. if ((nxt - cur < 5) || (nxt - cur > 8))
  1495. return(0);
  1496. /* we parsed a variant */
  1497. variant:
  1498. if (nxt[0] == 0)
  1499. return(1);
  1500. if (nxt[0] != '-')
  1501. return(0);
  1502. /* extensions and private use subtags not checked */
  1503. return (1);
  1504. region_m49:
  1505. if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
  1506. ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
  1507. nxt += 3;
  1508. goto region;
  1509. }
  1510. return(0);
  1511. }
  1512. /************************************************************************
  1513. * *
  1514. * Parser stacks related functions and macros *
  1515. * *
  1516. ************************************************************************/
  1517. static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
  1518. const xmlChar ** str);
  1519. #ifdef SAX2
  1520. /**
  1521. * nsPush:
  1522. * @ctxt: an XML parser context
  1523. * @prefix: the namespace prefix or NULL
  1524. * @URL: the namespace name
  1525. *
  1526. * Pushes a new parser namespace on top of the ns stack
  1527. *
  1528. * Returns -1 in case of error, -2 if the namespace should be discarded
  1529. * and the index in the stack otherwise.
  1530. */
  1531. static int
  1532. nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
  1533. {
  1534. if (ctxt->options & XML_PARSE_NSCLEAN) {
  1535. int i;
  1536. for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
  1537. if (ctxt->nsTab[i] == prefix) {
  1538. /* in scope */
  1539. if (ctxt->nsTab[i + 1] == URL)
  1540. return(-2);
  1541. /* out of scope keep it */
  1542. break;
  1543. }
  1544. }
  1545. }
  1546. if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
  1547. ctxt->nsMax = 10;
  1548. ctxt->nsNr = 0;
  1549. ctxt->nsTab = (const xmlChar **)
  1550. xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
  1551. if (ctxt->nsTab == NULL) {
  1552. xmlErrMemory(ctxt, NULL);
  1553. ctxt->nsMax = 0;
  1554. return (-1);
  1555. }
  1556. } else if (ctxt->nsNr >= ctxt->nsMax) {
  1557. const xmlChar ** tmp;
  1558. ctxt->nsMax *= 2;
  1559. tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
  1560. ctxt->nsMax * sizeof(ctxt->nsTab[0]));
  1561. if (tmp == NULL) {
  1562. xmlErrMemory(ctxt, NULL);
  1563. ctxt->nsMax /= 2;
  1564. return (-1);
  1565. }
  1566. ctxt->nsTab = tmp;
  1567. }
  1568. ctxt->nsTab[ctxt->nsNr++] = prefix;
  1569. ctxt->nsTab[ctxt->nsNr++] = URL;
  1570. return (ctxt->nsNr);
  1571. }
  1572. /**
  1573. * nsPop:
  1574. * @ctxt: an XML parser context
  1575. * @nr: the number to pop
  1576. *
  1577. * Pops the top @nr parser prefix/namespace from the ns stack
  1578. *
  1579. * Returns the number of namespaces removed
  1580. */
  1581. static int
  1582. nsPop(xmlParserCtxtPtr ctxt, int nr)
  1583. {
  1584. int i;
  1585. if (ctxt->nsTab == NULL) return(0);
  1586. if (ctxt->nsNr < nr) {
  1587. xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
  1588. nr = ctxt->nsNr;
  1589. }
  1590. if (ctxt->nsNr <= 0)
  1591. return (0);
  1592. for (i = 0;i < nr;i++) {
  1593. ctxt->nsNr--;
  1594. ctxt->nsTab[ctxt->nsNr] = NULL;
  1595. }
  1596. return(nr);
  1597. }
  1598. #endif
  1599. static int
  1600. xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
  1601. const xmlChar **atts;
  1602. int *attallocs;
  1603. int maxatts;
  1604. if (ctxt->atts == NULL) {
  1605. maxatts = 55; /* allow for 10 attrs by default */
  1606. atts = (const xmlChar **)
  1607. xmlMalloc(maxatts * sizeof(xmlChar *));
  1608. if (atts == NULL) goto mem_error;
  1609. ctxt->atts = atts;
  1610. attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
  1611. if (attallocs == NULL) goto mem_error;
  1612. ctxt->attallocs = attallocs;
  1613. ctxt->maxatts = maxatts;
  1614. } else if (nr + 5 > ctxt->maxatts) {
  1615. maxatts = (nr + 5) * 2;
  1616. atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
  1617. maxatts * sizeof(const xmlChar *));
  1618. if (atts == NULL) goto mem_error;
  1619. ctxt->atts = atts;
  1620. attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
  1621. (maxatts / 5) * sizeof(int));
  1622. if (attallocs == NULL) goto mem_error;
  1623. ctxt->attallocs = attallocs;
  1624. ctxt->maxatts = maxatts;
  1625. }
  1626. return(ctxt->maxatts);
  1627. mem_error:
  1628. xmlErrMemory(ctxt, NULL);
  1629. return(-1);
  1630. }
  1631. /**
  1632. * inputPush:
  1633. * @ctxt: an XML parser context
  1634. * @value: the parser input
  1635. *
  1636. * Pushes a new parser input on top of the input stack
  1637. *
  1638. * Returns -1 in case of error, the index in the stack otherwise
  1639. */
  1640. int
  1641. inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
  1642. {
  1643. if ((ctxt == NULL) || (value == NULL))
  1644. return(-1);
  1645. if (ctxt->inputNr >= ctxt->inputMax) {
  1646. ctxt->inputMax *= 2;
  1647. ctxt->inputTab =
  1648. (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
  1649. ctxt->inputMax *
  1650. sizeof(ctxt->inputTab[0]));
  1651. if (ctxt->inputTab == NULL) {
  1652. xmlErrMemory(ctxt, NULL);
  1653. xmlFreeInputStream(value);
  1654. ctxt->inputMax /= 2;
  1655. value = NULL;
  1656. return (-1);
  1657. }
  1658. }
  1659. ctxt->inputTab[ctxt->inputNr] = value;
  1660. ctxt->input = value;
  1661. return (ctxt->inputNr++);
  1662. }
  1663. /**
  1664. * inputPop:
  1665. * @ctxt: an XML parser context
  1666. *
  1667. * Pops the top parser input from the input stack
  1668. *
  1669. * Returns the input just removed
  1670. */
  1671. xmlParserInputPtr
  1672. inputPop(xmlParserCtxtPtr ctxt)
  1673. {
  1674. xmlParserInputPtr ret;
  1675. if (ctxt == NULL)
  1676. return(NULL);
  1677. if (ctxt->inputNr <= 0)
  1678. return (NULL);
  1679. ctxt->inputNr--;
  1680. if (ctxt->inputNr > 0)
  1681. ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
  1682. else
  1683. ctxt->input = NULL;
  1684. ret = ctxt->inputTab[ctxt->inputNr];
  1685. ctxt->inputTab[ctxt->inputNr] = NULL;
  1686. return (ret);
  1687. }
  1688. /**
  1689. * nodePush:
  1690. * @ctxt: an XML parser context
  1691. * @value: the element node
  1692. *
  1693. * Pushes a new element node on top of the node stack
  1694. *
  1695. * Returns -1 in case of error, the index in the stack otherwise
  1696. */
  1697. int
  1698. nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
  1699. {
  1700. if (ctxt == NULL) return(0);
  1701. if (ctxt->nodeNr >= ctxt->nodeMax) {
  1702. xmlNodePtr *tmp;
  1703. tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
  1704. ctxt->nodeMax * 2 *
  1705. sizeof(ctxt->nodeTab[0]));
  1706. if (tmp == NULL) {
  1707. xmlErrMemory(ctxt, NULL);
  1708. return (-1);
  1709. }
  1710. ctxt->nodeTab = tmp;
  1711. ctxt->nodeMax *= 2;
  1712. }
  1713. if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
  1714. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  1715. xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
  1716. "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
  1717. xmlParserMaxDepth);
  1718. xmlHaltParser(ctxt);
  1719. return(-1);
  1720. }
  1721. ctxt->nodeTab[ctxt->nodeNr] = value;
  1722. ctxt->node = value;
  1723. return (ctxt->nodeNr++);
  1724. }
  1725. /**
  1726. * nodePop:
  1727. * @ctxt: an XML parser context
  1728. *
  1729. * Pops the top element node from the node stack
  1730. *
  1731. * Returns the node just removed
  1732. */
  1733. xmlNodePtr
  1734. nodePop(xmlParserCtxtPtr ctxt)
  1735. {
  1736. xmlNodePtr ret;
  1737. if (ctxt == NULL) return(NULL);
  1738. if (ctxt->nodeNr <= 0)
  1739. return (NULL);
  1740. ctxt->nodeNr--;
  1741. if (ctxt->nodeNr > 0)
  1742. ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
  1743. else
  1744. ctxt->node = NULL;
  1745. ret = ctxt->nodeTab[ctxt->nodeNr];
  1746. ctxt->nodeTab[ctxt->nodeNr] = NULL;
  1747. return (ret);
  1748. }
  1749. /**
  1750. * nameNsPush:
  1751. * @ctxt: an XML parser context
  1752. * @value: the element name
  1753. * @prefix: the element prefix
  1754. * @URI: the element namespace name
  1755. * @line: the current line number for error messages
  1756. * @nsNr: the number of namespaces pushed on the namespace table
  1757. *
  1758. * Pushes a new element name/prefix/URL on top of the name stack
  1759. *
  1760. * Returns -1 in case of error, the index in the stack otherwise
  1761. */
  1762. static int
  1763. nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
  1764. const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
  1765. {
  1766. xmlStartTag *tag;
  1767. if (ctxt->nameNr >= ctxt->nameMax) {
  1768. const xmlChar * *tmp;
  1769. xmlStartTag *tmp2;
  1770. ctxt->nameMax *= 2;
  1771. tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
  1772. ctxt->nameMax *
  1773. sizeof(ctxt->nameTab[0]));
  1774. if (tmp == NULL) {
  1775. ctxt->nameMax /= 2;
  1776. goto mem_error;
  1777. }
  1778. ctxt->nameTab = tmp;
  1779. tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
  1780. ctxt->nameMax *
  1781. sizeof(ctxt->pushTab[0]));
  1782. if (tmp2 == NULL) {
  1783. ctxt->nameMax /= 2;
  1784. goto mem_error;
  1785. }
  1786. ctxt->pushTab = tmp2;
  1787. } else if (ctxt->pushTab == NULL) {
  1788. ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
  1789. sizeof(ctxt->pushTab[0]));
  1790. if (ctxt->pushTab == NULL)
  1791. goto mem_error;
  1792. }
  1793. ctxt->nameTab[ctxt->nameNr] = value;
  1794. ctxt->name = value;
  1795. tag = &ctxt->pushTab[ctxt->nameNr];
  1796. tag->prefix = prefix;
  1797. tag->URI = URI;
  1798. tag->line = line;
  1799. tag->nsNr = nsNr;
  1800. return (ctxt->nameNr++);
  1801. mem_error:
  1802. xmlErrMemory(ctxt, NULL);
  1803. return (-1);
  1804. }
  1805. #ifdef LIBXML_PUSH_ENABLED
  1806. /**
  1807. * nameNsPop:
  1808. * @ctxt: an XML parser context
  1809. *
  1810. * Pops the top element/prefix/URI name from the name stack
  1811. *
  1812. * Returns the name just removed
  1813. */
  1814. static const xmlChar *
  1815. nameNsPop(xmlParserCtxtPtr ctxt)
  1816. {
  1817. const xmlChar *ret;
  1818. if (ctxt->nameNr <= 0)
  1819. return (NULL);
  1820. ctxt->nameNr--;
  1821. if (ctxt->nameNr > 0)
  1822. ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
  1823. else
  1824. ctxt->name = NULL;
  1825. ret = ctxt->nameTab[ctxt->nameNr];
  1826. ctxt->nameTab[ctxt->nameNr] = NULL;
  1827. return (ret);
  1828. }
  1829. #endif /* LIBXML_PUSH_ENABLED */
  1830. /**
  1831. * namePush:
  1832. * @ctxt: an XML parser context
  1833. * @value: the element name
  1834. *
  1835. * Pushes a new element name on top of the name stack
  1836. *
  1837. * Returns -1 in case of error, the index in the stack otherwise
  1838. */
  1839. int
  1840. namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
  1841. {
  1842. if (ctxt == NULL) return (-1);
  1843. if (ctxt->nameNr >= ctxt->nameMax) {
  1844. const xmlChar * *tmp;
  1845. tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
  1846. ctxt->nameMax * 2 *
  1847. sizeof(ctxt->nameTab[0]));
  1848. if (tmp == NULL) {
  1849. goto mem_error;
  1850. }
  1851. ctxt->nameTab = tmp;
  1852. ctxt->nameMax *= 2;
  1853. }
  1854. ctxt->nameTab[ctxt->nameNr] = value;
  1855. ctxt->name = value;
  1856. return (ctxt->nameNr++);
  1857. mem_error:
  1858. xmlErrMemory(ctxt, NULL);
  1859. return (-1);
  1860. }
  1861. /**
  1862. * namePop:
  1863. * @ctxt: an XML parser context
  1864. *
  1865. * Pops the top element name from the name stack
  1866. *
  1867. * Returns the name just removed
  1868. */
  1869. const xmlChar *
  1870. namePop(xmlParserCtxtPtr ctxt)
  1871. {
  1872. const xmlChar *ret;
  1873. if ((ctxt == NULL) || (ctxt->nameNr <= 0))
  1874. return (NULL);
  1875. ctxt->nameNr--;
  1876. if (ctxt->nameNr > 0)
  1877. ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
  1878. else
  1879. ctxt->name = NULL;
  1880. ret = ctxt->nameTab[ctxt->nameNr];
  1881. ctxt->nameTab[ctxt->nameNr] = NULL;
  1882. return (ret);
  1883. }
  1884. static int spacePush(xmlParserCtxtPtr ctxt, int val) {
  1885. if (ctxt->spaceNr >= ctxt->spaceMax) {
  1886. int *tmp;
  1887. ctxt->spaceMax *= 2;
  1888. tmp = (int *) xmlRealloc(ctxt->spaceTab,
  1889. ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
  1890. if (tmp == NULL) {
  1891. xmlErrMemory(ctxt, NULL);
  1892. ctxt->spaceMax /=2;
  1893. return(-1);
  1894. }
  1895. ctxt->spaceTab = tmp;
  1896. }
  1897. ctxt->spaceTab[ctxt->spaceNr] = val;
  1898. ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
  1899. return(ctxt->spaceNr++);
  1900. }
  1901. static int spacePop(xmlParserCtxtPtr ctxt) {
  1902. int ret;
  1903. if (ctxt->spaceNr <= 0) return(0);
  1904. ctxt->spaceNr--;
  1905. if (ctxt->spaceNr > 0)
  1906. ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
  1907. else
  1908. ctxt->space = &ctxt->spaceTab[0];
  1909. ret = ctxt->spaceTab[ctxt->spaceNr];
  1910. ctxt->spaceTab[ctxt->spaceNr] = -1;
  1911. return(ret);
  1912. }
  1913. /*
  1914. * Macros for accessing the content. Those should be used only by the parser,
  1915. * and not exported.
  1916. *
  1917. * Dirty macros, i.e. one often need to make assumption on the context to
  1918. * use them
  1919. *
  1920. * CUR_PTR return the current pointer to the xmlChar to be parsed.
  1921. * To be used with extreme caution since operations consuming
  1922. * characters may move the input buffer to a different location !
  1923. * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
  1924. * This should be used internally by the parser
  1925. * only to compare to ASCII values otherwise it would break when
  1926. * running with UTF-8 encoding.
  1927. * RAW same as CUR but in the input buffer, bypass any token
  1928. * extraction that may have been done
  1929. * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
  1930. * to compare on ASCII based substring.
  1931. * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
  1932. * strings without newlines within the parser.
  1933. * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
  1934. * defined char within the parser.
  1935. * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
  1936. *
  1937. * NEXT Skip to the next character, this does the proper decoding
  1938. * in UTF-8 mode. It also pop-up unfinished entities on the fly.
  1939. * NEXTL(l) Skip the current unicode character of l xmlChars long.
  1940. * CUR_CHAR(l) returns the current unicode character (int), set l
  1941. * to the number of xmlChars used for the encoding [0-5].
  1942. * CUR_SCHAR same but operate on a string instead of the context
  1943. * COPY_BUF copy the current unicode char to the target buffer, increment
  1944. * the index
  1945. * GROW, SHRINK handling of input buffers
  1946. */
  1947. #define RAW (*ctxt->input->cur)
  1948. #define CUR (*ctxt->input->cur)
  1949. #define NXT(val) ctxt->input->cur[(val)]
  1950. #define CUR_PTR ctxt->input->cur
  1951. #define BASE_PTR ctxt->input->base
  1952. #define CMP4( s, c1, c2, c3, c4 ) \
  1953. ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
  1954. ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
  1955. #define CMP5( s, c1, c2, c3, c4, c5 ) \
  1956. ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
  1957. #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
  1958. ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
  1959. #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
  1960. ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
  1961. #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
  1962. ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
  1963. #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
  1964. ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
  1965. ((unsigned char *) s)[ 8 ] == c9 )
  1966. #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
  1967. ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
  1968. ((unsigned char *) s)[ 9 ] == c10 )
  1969. #define SKIP(val) do { \
  1970. ctxt->input->cur += (val),ctxt->input->col+=(val); \
  1971. if (*ctxt->input->cur == 0) \
  1972. xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
  1973. } while (0)
  1974. #define SKIPL(val) do { \
  1975. int skipl; \
  1976. for(skipl=0; skipl<val; skipl++) { \
  1977. if (*(ctxt->input->cur) == '\n') { \
  1978. ctxt->input->line++; ctxt->input->col = 1; \
  1979. } else ctxt->input->col++; \
  1980. ctxt->input->cur++; \
  1981. } \
  1982. if (*ctxt->input->cur == 0) \
  1983. xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
  1984. } while (0)
  1985. #define SHRINK if ((ctxt->progressive == 0) && \
  1986. (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
  1987. (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
  1988. xmlSHRINK (ctxt);
  1989. static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
  1990. xmlParserInputShrink(ctxt->input);
  1991. if (*ctxt->input->cur == 0)
  1992. xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  1993. }
  1994. #define GROW if ((ctxt->progressive == 0) && \
  1995. (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
  1996. xmlGROW (ctxt);
  1997. static void xmlGROW (xmlParserCtxtPtr ctxt) {
  1998. ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
  1999. ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
  2000. if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
  2001. (curBase > XML_MAX_LOOKUP_LIMIT)) &&
  2002. ((ctxt->input->buf) &&
  2003. (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
  2004. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  2005. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
  2006. xmlHaltParser(ctxt);
  2007. return;
  2008. }
  2009. xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  2010. if ((ctxt->input->cur > ctxt->input->end) ||
  2011. (ctxt->input->cur < ctxt->input->base)) {
  2012. xmlHaltParser(ctxt);
  2013. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
  2014. return;
  2015. }
  2016. if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
  2017. xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  2018. }
  2019. #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
  2020. #define NEXT xmlNextChar(ctxt)
  2021. #define NEXT1 { \
  2022. ctxt->input->col++; \
  2023. ctxt->input->cur++; \
  2024. if (*ctxt->input->cur == 0) \
  2025. xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
  2026. }
  2027. #define NEXTL(l) do { \
  2028. if (*(ctxt->input->cur) == '\n') { \
  2029. ctxt->input->line++; ctxt->input->col = 1; \
  2030. } else ctxt->input->col++; \
  2031. ctxt->input->cur += l; \
  2032. } while (0)
  2033. #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
  2034. #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
  2035. #define COPY_BUF(l,b,i,v) \
  2036. if (l == 1) b[i++] = (xmlChar) v; \
  2037. else i += xmlCopyCharMultiByte(&b[i],v)
  2038. /**
  2039. * xmlSkipBlankChars:
  2040. * @ctxt: the XML parser context
  2041. *
  2042. * skip all blanks character found at that point in the input streams.
  2043. * It pops up finished entities in the process if allowable at that point.
  2044. *
  2045. * Returns the number of space chars skipped
  2046. */
  2047. int
  2048. xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
  2049. int res = 0;
  2050. /*
  2051. * It's Okay to use CUR/NEXT here since all the blanks are on
  2052. * the ASCII range.
  2053. */
  2054. if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
  2055. (ctxt->instate == XML_PARSER_START)) {
  2056. const xmlChar *cur;
  2057. /*
  2058. * if we are in the document content, go really fast
  2059. */
  2060. cur = ctxt->input->cur;
  2061. while (IS_BLANK_CH(*cur)) {
  2062. if (*cur == '\n') {
  2063. ctxt->input->line++; ctxt->input->col = 1;
  2064. } else {
  2065. ctxt->input->col++;
  2066. }
  2067. cur++;
  2068. res++;
  2069. if (*cur == 0) {
  2070. ctxt->input->cur = cur;
  2071. xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  2072. cur = ctxt->input->cur;
  2073. }
  2074. }
  2075. ctxt->input->cur = cur;
  2076. } else {
  2077. int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
  2078. while (1) {
  2079. if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
  2080. NEXT;
  2081. } else if (CUR == '%') {
  2082. /*
  2083. * Need to handle support of entities branching here
  2084. */
  2085. if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
  2086. break;
  2087. xmlParsePEReference(ctxt);
  2088. } else if (CUR == 0) {
  2089. if (ctxt->inputNr <= 1)
  2090. break;
  2091. xmlPopInput(ctxt);
  2092. } else {
  2093. break;
  2094. }
  2095. /*
  2096. * Also increase the counter when entering or exiting a PERef.
  2097. * The spec says: "When a parameter-entity reference is recognized
  2098. * in the DTD and included, its replacement text MUST be enlarged
  2099. * by the attachment of one leading and one following space (#x20)
  2100. * character."
  2101. */
  2102. res++;
  2103. }
  2104. }
  2105. return(res);
  2106. }
  2107. /************************************************************************
  2108. * *
  2109. * Commodity functions to handle entities *
  2110. * *
  2111. ************************************************************************/
  2112. /**
  2113. * xmlPopInput:
  2114. * @ctxt: an XML parser context
  2115. *
  2116. * xmlPopInput: the current input pointed by ctxt->input came to an end
  2117. * pop it and return the next char.
  2118. *
  2119. * Returns the current xmlChar in the parser context
  2120. */
  2121. xmlChar
  2122. xmlPopInput(xmlParserCtxtPtr ctxt) {
  2123. if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
  2124. if (xmlParserDebugEntities)
  2125. xmlGenericError(xmlGenericErrorContext,
  2126. "Popping input %d\n", ctxt->inputNr);
  2127. if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
  2128. (ctxt->instate != XML_PARSER_EOF))
  2129. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  2130. "Unfinished entity outside the DTD");
  2131. xmlFreeInputStream(inputPop(ctxt));
  2132. if (*ctxt->input->cur == 0)
  2133. xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  2134. return(CUR);
  2135. }
  2136. /**
  2137. * xmlPushInput:
  2138. * @ctxt: an XML parser context
  2139. * @input: an XML parser input fragment (entity, XML fragment ...).
  2140. *
  2141. * xmlPushInput: switch to a new input stream which is stacked on top
  2142. * of the previous one(s).
  2143. * Returns -1 in case of error or the index in the input stack
  2144. */
  2145. int
  2146. xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
  2147. int ret;
  2148. if (input == NULL) return(-1);
  2149. if (xmlParserDebugEntities) {
  2150. if ((ctxt->input != NULL) && (ctxt->input->filename))
  2151. xmlGenericError(xmlGenericErrorContext,
  2152. "%s(%d): ", ctxt->input->filename,
  2153. ctxt->input->line);
  2154. xmlGenericError(xmlGenericErrorContext,
  2155. "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
  2156. }
  2157. if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
  2158. (ctxt->inputNr > 1024)) {
  2159. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  2160. while (ctxt->inputNr > 1)
  2161. xmlFreeInputStream(inputPop(ctxt));
  2162. return(-1);
  2163. }
  2164. ret = inputPush(ctxt, input);
  2165. if (ctxt->instate == XML_PARSER_EOF)
  2166. return(-1);
  2167. GROW;
  2168. return(ret);
  2169. }
  2170. /**
  2171. * xmlParseCharRef:
  2172. * @ctxt: an XML parser context
  2173. *
  2174. * parse Reference declarations
  2175. *
  2176. * [66] CharRef ::= '&#' [0-9]+ ';' |
  2177. * '&#x' [0-9a-fA-F]+ ';'
  2178. *
  2179. * [ WFC: Legal Character ]
  2180. * Characters referred to using character references must match the
  2181. * production for Char.
  2182. *
  2183. * Returns the value parsed (as an int), 0 in case of error
  2184. */
  2185. int
  2186. xmlParseCharRef(xmlParserCtxtPtr ctxt) {
  2187. int val = 0;
  2188. int count = 0;
  2189. /*
  2190. * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
  2191. */
  2192. if ((RAW == '&') && (NXT(1) == '#') &&
  2193. (NXT(2) == 'x')) {
  2194. SKIP(3);
  2195. GROW;
  2196. while (RAW != ';') { /* loop blocked by count */
  2197. if (count++ > 20) {
  2198. count = 0;
  2199. GROW;
  2200. if (ctxt->instate == XML_PARSER_EOF)
  2201. return(0);
  2202. }
  2203. if ((RAW >= '0') && (RAW <= '9'))
  2204. val = val * 16 + (CUR - '0');
  2205. else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
  2206. val = val * 16 + (CUR - 'a') + 10;
  2207. else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
  2208. val = val * 16 + (CUR - 'A') + 10;
  2209. else {
  2210. xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
  2211. val = 0;
  2212. break;
  2213. }
  2214. if (val > 0x110000)
  2215. val = 0x110000;
  2216. NEXT;
  2217. count++;
  2218. }
  2219. if (RAW == ';') {
  2220. /* on purpose to avoid reentrancy problems with NEXT and SKIP */
  2221. ctxt->input->col++;
  2222. ctxt->input->cur++;
  2223. }
  2224. } else if ((RAW == '&') && (NXT(1) == '#')) {
  2225. SKIP(2);
  2226. GROW;
  2227. while (RAW != ';') { /* loop blocked by count */
  2228. if (count++ > 20) {
  2229. count = 0;
  2230. GROW;
  2231. if (ctxt->instate == XML_PARSER_EOF)
  2232. return(0);
  2233. }
  2234. if ((RAW >= '0') && (RAW <= '9'))
  2235. val = val * 10 + (CUR - '0');
  2236. else {
  2237. xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
  2238. val = 0;
  2239. break;
  2240. }
  2241. if (val > 0x110000)
  2242. val = 0x110000;
  2243. NEXT;
  2244. count++;
  2245. }
  2246. if (RAW == ';') {
  2247. /* on purpose to avoid reentrancy problems with NEXT and SKIP */
  2248. ctxt->input->col++;
  2249. ctxt->input->cur++;
  2250. }
  2251. } else {
  2252. xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
  2253. }
  2254. /*
  2255. * [ WFC: Legal Character ]
  2256. * Characters referred to using character references must match the
  2257. * production for Char.
  2258. */
  2259. if (val >= 0x110000) {
  2260. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  2261. "xmlParseCharRef: character reference out of bounds\n",
  2262. val);
  2263. } else if (IS_CHAR(val)) {
  2264. return(val);
  2265. } else {
  2266. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  2267. "xmlParseCharRef: invalid xmlChar value %d\n",
  2268. val);
  2269. }
  2270. return(0);
  2271. }
  2272. /**
  2273. * xmlParseStringCharRef:
  2274. * @ctxt: an XML parser context
  2275. * @str: a pointer to an index in the string
  2276. *
  2277. * parse Reference declarations, variant parsing from a string rather
  2278. * than an an input flow.
  2279. *
  2280. * [66] CharRef ::= '&#' [0-9]+ ';' |
  2281. * '&#x' [0-9a-fA-F]+ ';'
  2282. *
  2283. * [ WFC: Legal Character ]
  2284. * Characters referred to using character references must match the
  2285. * production for Char.
  2286. *
  2287. * Returns the value parsed (as an int), 0 in case of error, str will be
  2288. * updated to the current value of the index
  2289. */
  2290. static int
  2291. xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
  2292. const xmlChar *ptr;
  2293. xmlChar cur;
  2294. int val = 0;
  2295. if ((str == NULL) || (*str == NULL)) return(0);
  2296. ptr = *str;
  2297. cur = *ptr;
  2298. if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
  2299. ptr += 3;
  2300. cur = *ptr;
  2301. while (cur != ';') { /* Non input consuming loop */
  2302. if ((cur >= '0') && (cur <= '9'))
  2303. val = val * 16 + (cur - '0');
  2304. else if ((cur >= 'a') && (cur <= 'f'))
  2305. val = val * 16 + (cur - 'a') + 10;
  2306. else if ((cur >= 'A') && (cur <= 'F'))
  2307. val = val * 16 + (cur - 'A') + 10;
  2308. else {
  2309. xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
  2310. val = 0;
  2311. break;
  2312. }
  2313. if (val > 0x110000)
  2314. val = 0x110000;
  2315. ptr++;
  2316. cur = *ptr;
  2317. }
  2318. if (cur == ';')
  2319. ptr++;
  2320. } else if ((cur == '&') && (ptr[1] == '#')){
  2321. ptr += 2;
  2322. cur = *ptr;
  2323. while (cur != ';') { /* Non input consuming loops */
  2324. if ((cur >= '0') && (cur <= '9'))
  2325. val = val * 10 + (cur - '0');
  2326. else {
  2327. xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
  2328. val = 0;
  2329. break;
  2330. }
  2331. if (val > 0x110000)
  2332. val = 0x110000;
  2333. ptr++;
  2334. cur = *ptr;
  2335. }
  2336. if (cur == ';')
  2337. ptr++;
  2338. } else {
  2339. xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
  2340. return(0);
  2341. }
  2342. *str = ptr;
  2343. /*
  2344. * [ WFC: Legal Character ]
  2345. * Characters referred to using character references must match the
  2346. * production for Char.
  2347. */
  2348. if (val >= 0x110000) {
  2349. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  2350. "xmlParseStringCharRef: character reference out of bounds\n",
  2351. val);
  2352. } else if (IS_CHAR(val)) {
  2353. return(val);
  2354. } else {
  2355. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  2356. "xmlParseStringCharRef: invalid xmlChar value %d\n",
  2357. val);
  2358. }
  2359. return(0);
  2360. }
  2361. /**
  2362. * xmlParserHandlePEReference:
  2363. * @ctxt: the parser context
  2364. *
  2365. * [69] PEReference ::= '%' Name ';'
  2366. *
  2367. * [ WFC: No Recursion ]
  2368. * A parsed entity must not contain a recursive
  2369. * reference to itself, either directly or indirectly.
  2370. *
  2371. * [ WFC: Entity Declared ]
  2372. * In a document without any DTD, a document with only an internal DTD
  2373. * subset which contains no parameter entity references, or a document
  2374. * with "standalone='yes'", ... ... The declaration of a parameter
  2375. * entity must precede any reference to it...
  2376. *
  2377. * [ VC: Entity Declared ]
  2378. * In a document with an external subset or external parameter entities
  2379. * with "standalone='no'", ... ... The declaration of a parameter entity
  2380. * must precede any reference to it...
  2381. *
  2382. * [ WFC: In DTD ]
  2383. * Parameter-entity references may only appear in the DTD.
  2384. * NOTE: misleading but this is handled.
  2385. *
  2386. * A PEReference may have been detected in the current input stream
  2387. * the handling is done accordingly to
  2388. * http://www.w3.org/TR/REC-xml#entproc
  2389. * i.e.
  2390. * - Included in literal in entity values
  2391. * - Included as Parameter Entity reference within DTDs
  2392. */
  2393. void
  2394. xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
  2395. switch(ctxt->instate) {
  2396. case XML_PARSER_CDATA_SECTION:
  2397. return;
  2398. case XML_PARSER_COMMENT:
  2399. return;
  2400. case XML_PARSER_START_TAG:
  2401. return;
  2402. case XML_PARSER_END_TAG:
  2403. return;
  2404. case XML_PARSER_EOF:
  2405. xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
  2406. return;
  2407. case XML_PARSER_PROLOG:
  2408. case XML_PARSER_START:
  2409. case XML_PARSER_MISC:
  2410. xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
  2411. return;
  2412. case XML_PARSER_ENTITY_DECL:
  2413. case XML_PARSER_CONTENT:
  2414. case XML_PARSER_ATTRIBUTE_VALUE:
  2415. case XML_PARSER_PI:
  2416. case XML_PARSER_SYSTEM_LITERAL:
  2417. case XML_PARSER_PUBLIC_LITERAL:
  2418. /* we just ignore it there */
  2419. return;
  2420. case XML_PARSER_EPILOG:
  2421. xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
  2422. return;
  2423. case XML_PARSER_ENTITY_VALUE:
  2424. /*
  2425. * NOTE: in the case of entity values, we don't do the
  2426. * substitution here since we need the literal
  2427. * entity value to be able to save the internal
  2428. * subset of the document.
  2429. * This will be handled by xmlStringDecodeEntities
  2430. */
  2431. return;
  2432. case XML_PARSER_DTD:
  2433. /*
  2434. * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
  2435. * In the internal DTD subset, parameter-entity references
  2436. * can occur only where markup declarations can occur, not
  2437. * within markup declarations.
  2438. * In that case this is handled in xmlParseMarkupDecl
  2439. */
  2440. if ((ctxt->external == 0) && (ctxt->inputNr == 1))
  2441. return;
  2442. if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
  2443. return;
  2444. break;
  2445. case XML_PARSER_IGNORE:
  2446. return;
  2447. }
  2448. xmlParsePEReference(ctxt);
  2449. }
  2450. /*
  2451. * Macro used to grow the current buffer.
  2452. * buffer##_size is expected to be a size_t
  2453. * mem_error: is expected to handle memory allocation failures
  2454. */
  2455. #define growBuffer(buffer, n) { \
  2456. xmlChar *tmp; \
  2457. size_t new_size = buffer##_size * 2 + n; \
  2458. if (new_size < buffer##_size) goto mem_error; \
  2459. tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
  2460. if (tmp == NULL) goto mem_error; \
  2461. buffer = tmp; \
  2462. buffer##_size = new_size; \
  2463. }
  2464. /**
  2465. * xmlStringLenDecodeEntities:
  2466. * @ctxt: the parser context
  2467. * @str: the input string
  2468. * @len: the string length
  2469. * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
  2470. * @end: an end marker xmlChar, 0 if none
  2471. * @end2: an end marker xmlChar, 0 if none
  2472. * @end3: an end marker xmlChar, 0 if none
  2473. *
  2474. * Takes a entity string content and process to do the adequate substitutions.
  2475. *
  2476. * [67] Reference ::= EntityRef | CharRef
  2477. *
  2478. * [69] PEReference ::= '%' Name ';'
  2479. *
  2480. * Returns A newly allocated string with the substitution done. The caller
  2481. * must deallocate it !
  2482. */
  2483. xmlChar *
  2484. xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
  2485. int what, xmlChar end, xmlChar end2, xmlChar end3) {
  2486. xmlChar *buffer = NULL;
  2487. size_t buffer_size = 0;
  2488. size_t nbchars = 0;
  2489. xmlChar *current = NULL;
  2490. xmlChar *rep = NULL;
  2491. const xmlChar *last;
  2492. xmlEntityPtr ent;
  2493. int c,l;
  2494. if ((ctxt == NULL) || (str == NULL) || (len < 0))
  2495. return(NULL);
  2496. last = str + len;
  2497. if (((ctxt->depth > 40) &&
  2498. ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
  2499. (ctxt->depth > 1024)) {
  2500. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  2501. return(NULL);
  2502. }
  2503. /*
  2504. * allocate a translation buffer.
  2505. */
  2506. buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
  2507. buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
  2508. if (buffer == NULL) goto mem_error;
  2509. /*
  2510. * OK loop until we reach one of the ending char or a size limit.
  2511. * we are operating on already parsed values.
  2512. */
  2513. if (str < last)
  2514. c = CUR_SCHAR(str, l);
  2515. else
  2516. c = 0;
  2517. while ((c != 0) && (c != end) && /* non input consuming loop */
  2518. (c != end2) && (c != end3) &&
  2519. (ctxt->instate != XML_PARSER_EOF)) {
  2520. if (c == 0) break;
  2521. if ((c == '&') && (str[1] == '#')) {
  2522. int val = xmlParseStringCharRef(ctxt, &str);
  2523. if (val == 0)
  2524. goto int_error;
  2525. COPY_BUF(0,buffer,nbchars,val);
  2526. if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
  2527. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2528. }
  2529. } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
  2530. if (xmlParserDebugEntities)
  2531. xmlGenericError(xmlGenericErrorContext,
  2532. "String decoding Entity Reference: %.30s\n",
  2533. str);
  2534. ent = xmlParseStringEntityRef(ctxt, &str);
  2535. xmlParserEntityCheck(ctxt, 0, ent, 0);
  2536. if (ent != NULL)
  2537. ctxt->nbentities += ent->checked / 2;
  2538. if ((ent != NULL) &&
  2539. (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
  2540. if (ent->content != NULL) {
  2541. COPY_BUF(0,buffer,nbchars,ent->content[0]);
  2542. if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
  2543. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2544. }
  2545. } else {
  2546. xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
  2547. "predefined entity has no content\n");
  2548. goto int_error;
  2549. }
  2550. } else if ((ent != NULL) && (ent->content != NULL)) {
  2551. ctxt->depth++;
  2552. rep = xmlStringDecodeEntities(ctxt, ent->content, what,
  2553. 0, 0, 0);
  2554. ctxt->depth--;
  2555. if (rep == NULL) {
  2556. ent->content[0] = 0;
  2557. goto int_error;
  2558. }
  2559. current = rep;
  2560. while (*current != 0) { /* non input consuming loop */
  2561. buffer[nbchars++] = *current++;
  2562. if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
  2563. if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
  2564. goto int_error;
  2565. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2566. }
  2567. }
  2568. xmlFree(rep);
  2569. rep = NULL;
  2570. } else if (ent != NULL) {
  2571. int i = xmlStrlen(ent->name);
  2572. const xmlChar *cur = ent->name;
  2573. buffer[nbchars++] = '&';
  2574. if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
  2575. growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
  2576. }
  2577. for (;i > 0;i--)
  2578. buffer[nbchars++] = *cur++;
  2579. buffer[nbchars++] = ';';
  2580. }
  2581. } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
  2582. if (xmlParserDebugEntities)
  2583. xmlGenericError(xmlGenericErrorContext,
  2584. "String decoding PE Reference: %.30s\n", str);
  2585. ent = xmlParseStringPEReference(ctxt, &str);
  2586. xmlParserEntityCheck(ctxt, 0, ent, 0);
  2587. if (ent != NULL)
  2588. ctxt->nbentities += ent->checked / 2;
  2589. if (ent != NULL) {
  2590. if (ent->content == NULL) {
  2591. /*
  2592. * Note: external parsed entities will not be loaded,
  2593. * it is not required for a non-validating parser to
  2594. * complete external PEReferences coming from the
  2595. * internal subset
  2596. */
  2597. if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
  2598. ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
  2599. (ctxt->validate != 0)) {
  2600. xmlLoadEntityContent(ctxt, ent);
  2601. } else {
  2602. xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
  2603. "not validating will not read content for PE entity %s\n",
  2604. ent->name, NULL);
  2605. }
  2606. }
  2607. ctxt->depth++;
  2608. rep = xmlStringDecodeEntities(ctxt, ent->content, what,
  2609. 0, 0, 0);
  2610. ctxt->depth--;
  2611. if (rep == NULL) {
  2612. if (ent->content != NULL)
  2613. ent->content[0] = 0;
  2614. goto int_error;
  2615. }
  2616. current = rep;
  2617. while (*current != 0) { /* non input consuming loop */
  2618. buffer[nbchars++] = *current++;
  2619. if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
  2620. if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
  2621. goto int_error;
  2622. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2623. }
  2624. }
  2625. xmlFree(rep);
  2626. rep = NULL;
  2627. }
  2628. } else {
  2629. COPY_BUF(l,buffer,nbchars,c);
  2630. str += l;
  2631. if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
  2632. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2633. }
  2634. }
  2635. if (str < last)
  2636. c = CUR_SCHAR(str, l);
  2637. else
  2638. c = 0;
  2639. }
  2640. buffer[nbchars] = 0;
  2641. return(buffer);
  2642. mem_error:
  2643. xmlErrMemory(ctxt, NULL);
  2644. int_error:
  2645. if (rep != NULL)
  2646. xmlFree(rep);
  2647. if (buffer != NULL)
  2648. xmlFree(buffer);
  2649. return(NULL);
  2650. }
  2651. /**
  2652. * xmlStringDecodeEntities:
  2653. * @ctxt: the parser context
  2654. * @str: the input string
  2655. * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
  2656. * @end: an end marker xmlChar, 0 if none
  2657. * @end2: an end marker xmlChar, 0 if none
  2658. * @end3: an end marker xmlChar, 0 if none
  2659. *
  2660. * Takes a entity string content and process to do the adequate substitutions.
  2661. *
  2662. * [67] Reference ::= EntityRef | CharRef
  2663. *
  2664. * [69] PEReference ::= '%' Name ';'
  2665. *
  2666. * Returns A newly allocated string with the substitution done. The caller
  2667. * must deallocate it !
  2668. */
  2669. xmlChar *
  2670. xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
  2671. xmlChar end, xmlChar end2, xmlChar end3) {
  2672. if ((ctxt == NULL) || (str == NULL)) return(NULL);
  2673. return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
  2674. end, end2, end3));
  2675. }
  2676. /************************************************************************
  2677. * *
  2678. * Commodity functions, cleanup needed ? *
  2679. * *
  2680. ************************************************************************/
  2681. /**
  2682. * areBlanks:
  2683. * @ctxt: an XML parser context
  2684. * @str: a xmlChar *
  2685. * @len: the size of @str
  2686. * @blank_chars: we know the chars are blanks
  2687. *
  2688. * Is this a sequence of blank chars that one can ignore ?
  2689. *
  2690. * Returns 1 if ignorable 0 otherwise.
  2691. */
  2692. static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
  2693. int blank_chars) {
  2694. int i, ret;
  2695. xmlNodePtr lastChild;
  2696. /*
  2697. * Don't spend time trying to differentiate them, the same callback is
  2698. * used !
  2699. */
  2700. if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
  2701. return(0);
  2702. /*
  2703. * Check for xml:space value.
  2704. */
  2705. if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
  2706. (*(ctxt->space) == -2))
  2707. return(0);
  2708. /*
  2709. * Check that the string is made of blanks
  2710. */
  2711. if (blank_chars == 0) {
  2712. for (i = 0;i < len;i++)
  2713. if (!(IS_BLANK_CH(str[i]))) return(0);
  2714. }
  2715. /*
  2716. * Look if the element is mixed content in the DTD if available
  2717. */
  2718. if (ctxt->node == NULL) return(0);
  2719. if (ctxt->myDoc != NULL) {
  2720. ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
  2721. if (ret == 0) return(1);
  2722. if (ret == 1) return(0);
  2723. }
  2724. /*
  2725. * Otherwise, heuristic :-\
  2726. */
  2727. if ((RAW != '<') && (RAW != 0xD)) return(0);
  2728. if ((ctxt->node->children == NULL) &&
  2729. (RAW == '<') && (NXT(1) == '/')) return(0);
  2730. lastChild = xmlGetLastChild(ctxt->node);
  2731. if (lastChild == NULL) {
  2732. if ((ctxt->node->type != XML_ELEMENT_NODE) &&
  2733. (ctxt->node->content != NULL)) return(0);
  2734. } else if (xmlNodeIsText(lastChild))
  2735. return(0);
  2736. else if ((ctxt->node->children != NULL) &&
  2737. (xmlNodeIsText(ctxt->node->children)))
  2738. return(0);
  2739. return(1);
  2740. }
  2741. /************************************************************************
  2742. * *
  2743. * Extra stuff for namespace support *
  2744. * Relates to http://www.w3.org/TR/WD-xml-names *
  2745. * *
  2746. ************************************************************************/
  2747. /**
  2748. * xmlSplitQName:
  2749. * @ctxt: an XML parser context
  2750. * @name: an XML parser context
  2751. * @prefix: a xmlChar **
  2752. *
  2753. * parse an UTF8 encoded XML qualified name string
  2754. *
  2755. * [NS 5] QName ::= (Prefix ':')? LocalPart
  2756. *
  2757. * [NS 6] Prefix ::= NCName
  2758. *
  2759. * [NS 7] LocalPart ::= NCName
  2760. *
  2761. * Returns the local part, and prefix is updated
  2762. * to get the Prefix if any.
  2763. */
  2764. xmlChar *
  2765. xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
  2766. xmlChar buf[XML_MAX_NAMELEN + 5];
  2767. xmlChar *buffer = NULL;
  2768. int len = 0;
  2769. int max = XML_MAX_NAMELEN;
  2770. xmlChar *ret = NULL;
  2771. const xmlChar *cur = name;
  2772. int c;
  2773. if (prefix == NULL) return(NULL);
  2774. *prefix = NULL;
  2775. if (cur == NULL) return(NULL);
  2776. #ifndef XML_XML_NAMESPACE
  2777. /* xml: prefix is not really a namespace */
  2778. if ((cur[0] == 'x') && (cur[1] == 'm') &&
  2779. (cur[2] == 'l') && (cur[3] == ':'))
  2780. return(xmlStrdup(name));
  2781. #endif
  2782. /* nasty but well=formed */
  2783. if (cur[0] == ':')
  2784. return(xmlStrdup(name));
  2785. c = *cur++;
  2786. while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
  2787. buf[len++] = c;
  2788. c = *cur++;
  2789. }
  2790. if (len >= max) {
  2791. /*
  2792. * Okay someone managed to make a huge name, so he's ready to pay
  2793. * for the processing speed.
  2794. */
  2795. max = len * 2;
  2796. buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
  2797. if (buffer == NULL) {
  2798. xmlErrMemory(ctxt, NULL);
  2799. return(NULL);
  2800. }
  2801. memcpy(buffer, buf, len);
  2802. while ((c != 0) && (c != ':')) { /* tested bigname.xml */
  2803. if (len + 10 > max) {
  2804. xmlChar *tmp;
  2805. max *= 2;
  2806. tmp = (xmlChar *) xmlRealloc(buffer,
  2807. max * sizeof(xmlChar));
  2808. if (tmp == NULL) {
  2809. xmlFree(buffer);
  2810. xmlErrMemory(ctxt, NULL);
  2811. return(NULL);
  2812. }
  2813. buffer = tmp;
  2814. }
  2815. buffer[len++] = c;
  2816. c = *cur++;
  2817. }
  2818. buffer[len] = 0;
  2819. }
  2820. if ((c == ':') && (*cur == 0)) {
  2821. if (buffer != NULL)
  2822. xmlFree(buffer);
  2823. *prefix = NULL;
  2824. return(xmlStrdup(name));
  2825. }
  2826. if (buffer == NULL)
  2827. ret = xmlStrndup(buf, len);
  2828. else {
  2829. ret = buffer;
  2830. buffer = NULL;
  2831. max = XML_MAX_NAMELEN;
  2832. }
  2833. if (c == ':') {
  2834. c = *cur;
  2835. *prefix = ret;
  2836. if (c == 0) {
  2837. return(xmlStrndup(BAD_CAST "", 0));
  2838. }
  2839. len = 0;
  2840. /*
  2841. * Check that the first character is proper to start
  2842. * a new name
  2843. */
  2844. if (!(((c >= 0x61) && (c <= 0x7A)) ||
  2845. ((c >= 0x41) && (c <= 0x5A)) ||
  2846. (c == '_') || (c == ':'))) {
  2847. int l;
  2848. int first = CUR_SCHAR(cur, l);
  2849. if (!IS_LETTER(first) && (first != '_')) {
  2850. xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
  2851. "Name %s is not XML Namespace compliant\n",
  2852. name);
  2853. }
  2854. }
  2855. cur++;
  2856. while ((c != 0) && (len < max)) { /* tested bigname2.xml */
  2857. buf[len++] = c;
  2858. c = *cur++;
  2859. }
  2860. if (len >= max) {
  2861. /*
  2862. * Okay someone managed to make a huge name, so he's ready to pay
  2863. * for the processing speed.
  2864. */
  2865. max = len * 2;
  2866. buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
  2867. if (buffer == NULL) {
  2868. xmlErrMemory(ctxt, NULL);
  2869. return(NULL);
  2870. }
  2871. memcpy(buffer, buf, len);
  2872. while (c != 0) { /* tested bigname2.xml */
  2873. if (len + 10 > max) {
  2874. xmlChar *tmp;
  2875. max *= 2;
  2876. tmp = (xmlChar *) xmlRealloc(buffer,
  2877. max * sizeof(xmlChar));
  2878. if (tmp == NULL) {
  2879. xmlErrMemory(ctxt, NULL);
  2880. xmlFree(buffer);
  2881. return(NULL);
  2882. }
  2883. buffer = tmp;
  2884. }
  2885. buffer[len++] = c;
  2886. c = *cur++;
  2887. }
  2888. buffer[len] = 0;
  2889. }
  2890. if (buffer == NULL)
  2891. ret = xmlStrndup(buf, len);
  2892. else {
  2893. ret = buffer;
  2894. }
  2895. }
  2896. return(ret);
  2897. }
  2898. /************************************************************************
  2899. * *
  2900. * The parser itself *
  2901. * Relates to http://www.w3.org/TR/REC-xml *
  2902. * *
  2903. ************************************************************************/
  2904. /************************************************************************
  2905. * *
  2906. * Routines to parse Name, NCName and NmToken *
  2907. * *
  2908. ************************************************************************/
  2909. #ifdef DEBUG
  2910. static unsigned long nbParseName = 0;
  2911. static unsigned long nbParseNmToken = 0;
  2912. static unsigned long nbParseNCName = 0;
  2913. static unsigned long nbParseNCNameComplex = 0;
  2914. static unsigned long nbParseNameComplex = 0;
  2915. static unsigned long nbParseStringName = 0;
  2916. #endif
  2917. /*
  2918. * The two following functions are related to the change of accepted
  2919. * characters for Name and NmToken in the Revision 5 of XML-1.0
  2920. * They correspond to the modified production [4] and the new production [4a]
  2921. * changes in that revision. Also note that the macros used for the
  2922. * productions Letter, Digit, CombiningChar and Extender are not needed
  2923. * anymore.
  2924. * We still keep compatibility to pre-revision5 parsing semantic if the
  2925. * new XML_PARSE_OLD10 option is given to the parser.
  2926. */
  2927. static int
  2928. xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
  2929. if ((ctxt->options & XML_PARSE_OLD10) == 0) {
  2930. /*
  2931. * Use the new checks of production [4] [4a] amd [5] of the
  2932. * Update 5 of XML-1.0
  2933. */
  2934. if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
  2935. (((c >= 'a') && (c <= 'z')) ||
  2936. ((c >= 'A') && (c <= 'Z')) ||
  2937. (c == '_') || (c == ':') ||
  2938. ((c >= 0xC0) && (c <= 0xD6)) ||
  2939. ((c >= 0xD8) && (c <= 0xF6)) ||
  2940. ((c >= 0xF8) && (c <= 0x2FF)) ||
  2941. ((c >= 0x370) && (c <= 0x37D)) ||
  2942. ((c >= 0x37F) && (c <= 0x1FFF)) ||
  2943. ((c >= 0x200C) && (c <= 0x200D)) ||
  2944. ((c >= 0x2070) && (c <= 0x218F)) ||
  2945. ((c >= 0x2C00) && (c <= 0x2FEF)) ||
  2946. ((c >= 0x3001) && (c <= 0xD7FF)) ||
  2947. ((c >= 0xF900) && (c <= 0xFDCF)) ||
  2948. ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
  2949. ((c >= 0x10000) && (c <= 0xEFFFF))))
  2950. return(1);
  2951. } else {
  2952. if (IS_LETTER(c) || (c == '_') || (c == ':'))
  2953. return(1);
  2954. }
  2955. return(0);
  2956. }
  2957. static int
  2958. xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
  2959. if ((ctxt->options & XML_PARSE_OLD10) == 0) {
  2960. /*
  2961. * Use the new checks of production [4] [4a] amd [5] of the
  2962. * Update 5 of XML-1.0
  2963. */
  2964. if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
  2965. (((c >= 'a') && (c <= 'z')) ||
  2966. ((c >= 'A') && (c <= 'Z')) ||
  2967. ((c >= '0') && (c <= '9')) || /* !start */
  2968. (c == '_') || (c == ':') ||
  2969. (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
  2970. ((c >= 0xC0) && (c <= 0xD6)) ||
  2971. ((c >= 0xD8) && (c <= 0xF6)) ||
  2972. ((c >= 0xF8) && (c <= 0x2FF)) ||
  2973. ((c >= 0x300) && (c <= 0x36F)) || /* !start */
  2974. ((c >= 0x370) && (c <= 0x37D)) ||
  2975. ((c >= 0x37F) && (c <= 0x1FFF)) ||
  2976. ((c >= 0x200C) && (c <= 0x200D)) ||
  2977. ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
  2978. ((c >= 0x2070) && (c <= 0x218F)) ||
  2979. ((c >= 0x2C00) && (c <= 0x2FEF)) ||
  2980. ((c >= 0x3001) && (c <= 0xD7FF)) ||
  2981. ((c >= 0xF900) && (c <= 0xFDCF)) ||
  2982. ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
  2983. ((c >= 0x10000) && (c <= 0xEFFFF))))
  2984. return(1);
  2985. } else {
  2986. if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
  2987. (c == '.') || (c == '-') ||
  2988. (c == '_') || (c == ':') ||
  2989. (IS_COMBINING(c)) ||
  2990. (IS_EXTENDER(c)))
  2991. return(1);
  2992. }
  2993. return(0);
  2994. }
  2995. static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
  2996. int *len, int *alloc, int normalize);
  2997. static const xmlChar *
  2998. xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
  2999. int len = 0, l;
  3000. int c;
  3001. int count = 0;
  3002. #ifdef DEBUG
  3003. nbParseNameComplex++;
  3004. #endif
  3005. /*
  3006. * Handler for more complex cases
  3007. */
  3008. GROW;
  3009. if (ctxt->instate == XML_PARSER_EOF)
  3010. return(NULL);
  3011. c = CUR_CHAR(l);
  3012. if ((ctxt->options & XML_PARSE_OLD10) == 0) {
  3013. /*
  3014. * Use the new checks of production [4] [4a] amd [5] of the
  3015. * Update 5 of XML-1.0
  3016. */
  3017. if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
  3018. (!(((c >= 'a') && (c <= 'z')) ||
  3019. ((c >= 'A') && (c <= 'Z')) ||
  3020. (c == '_') || (c == ':') ||
  3021. ((c >= 0xC0) && (c <= 0xD6)) ||
  3022. ((c >= 0xD8) && (c <= 0xF6)) ||
  3023. ((c >= 0xF8) && (c <= 0x2FF)) ||
  3024. ((c >= 0x370) && (c <= 0x37D)) ||
  3025. ((c >= 0x37F) && (c <= 0x1FFF)) ||
  3026. ((c >= 0x200C) && (c <= 0x200D)) ||
  3027. ((c >= 0x2070) && (c <= 0x218F)) ||
  3028. ((c >= 0x2C00) && (c <= 0x2FEF)) ||
  3029. ((c >= 0x3001) && (c <= 0xD7FF)) ||
  3030. ((c >= 0xF900) && (c <= 0xFDCF)) ||
  3031. ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
  3032. ((c >= 0x10000) && (c <= 0xEFFFF))))) {
  3033. return(NULL);
  3034. }
  3035. len += l;
  3036. NEXTL(l);
  3037. c = CUR_CHAR(l);
  3038. while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
  3039. (((c >= 'a') && (c <= 'z')) ||
  3040. ((c >= 'A') && (c <= 'Z')) ||
  3041. ((c >= '0') && (c <= '9')) || /* !start */
  3042. (c == '_') || (c == ':') ||
  3043. (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
  3044. ((c >= 0xC0) && (c <= 0xD6)) ||
  3045. ((c >= 0xD8) && (c <= 0xF6)) ||
  3046. ((c >= 0xF8) && (c <= 0x2FF)) ||
  3047. ((c >= 0x300) && (c <= 0x36F)) || /* !start */
  3048. ((c >= 0x370) && (c <= 0x37D)) ||
  3049. ((c >= 0x37F) && (c <= 0x1FFF)) ||
  3050. ((c >= 0x200C) && (c <= 0x200D)) ||
  3051. ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
  3052. ((c >= 0x2070) && (c <= 0x218F)) ||
  3053. ((c >= 0x2C00) && (c <= 0x2FEF)) ||
  3054. ((c >= 0x3001) && (c <= 0xD7FF)) ||
  3055. ((c >= 0xF900) && (c <= 0xFDCF)) ||
  3056. ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
  3057. ((c >= 0x10000) && (c <= 0xEFFFF))
  3058. )) {
  3059. if (count++ > XML_PARSER_CHUNK_SIZE) {
  3060. count = 0;
  3061. GROW;
  3062. if (ctxt->instate == XML_PARSER_EOF)
  3063. return(NULL);
  3064. }
  3065. len += l;
  3066. NEXTL(l);
  3067. c = CUR_CHAR(l);
  3068. }
  3069. } else {
  3070. if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
  3071. (!IS_LETTER(c) && (c != '_') &&
  3072. (c != ':'))) {
  3073. return(NULL);
  3074. }
  3075. len += l;
  3076. NEXTL(l);
  3077. c = CUR_CHAR(l);
  3078. while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
  3079. ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
  3080. (c == '.') || (c == '-') ||
  3081. (c == '_') || (c == ':') ||
  3082. (IS_COMBINING(c)) ||
  3083. (IS_EXTENDER(c)))) {
  3084. if (count++ > XML_PARSER_CHUNK_SIZE) {
  3085. count = 0;
  3086. GROW;
  3087. if (ctxt->instate == XML_PARSER_EOF)
  3088. return(NULL);
  3089. }
  3090. len += l;
  3091. NEXTL(l);
  3092. c = CUR_CHAR(l);
  3093. }
  3094. }
  3095. if ((len > XML_MAX_NAME_LENGTH) &&
  3096. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3097. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
  3098. return(NULL);
  3099. }
  3100. if (ctxt->input->cur - ctxt->input->base < len) {
  3101. /*
  3102. * There were a couple of bugs where PERefs lead to to a change
  3103. * of the buffer. Check the buffer size to avoid passing an invalid
  3104. * pointer to xmlDictLookup.
  3105. */
  3106. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  3107. "unexpected change of input buffer");
  3108. return (NULL);
  3109. }
  3110. if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
  3111. return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
  3112. return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
  3113. }
  3114. /**
  3115. * xmlParseName:
  3116. * @ctxt: an XML parser context
  3117. *
  3118. * parse an XML name.
  3119. *
  3120. * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
  3121. * CombiningChar | Extender
  3122. *
  3123. * [5] Name ::= (Letter | '_' | ':') (NameChar)*
  3124. *
  3125. * [6] Names ::= Name (#x20 Name)*
  3126. *
  3127. * Returns the Name parsed or NULL
  3128. */
  3129. const xmlChar *
  3130. xmlParseName(xmlParserCtxtPtr ctxt) {
  3131. const xmlChar *in;
  3132. const xmlChar *ret;
  3133. int count = 0;
  3134. GROW;
  3135. #ifdef DEBUG
  3136. nbParseName++;
  3137. #endif
  3138. /*
  3139. * Accelerator for simple ASCII names
  3140. */
  3141. in = ctxt->input->cur;
  3142. if (((*in >= 0x61) && (*in <= 0x7A)) ||
  3143. ((*in >= 0x41) && (*in <= 0x5A)) ||
  3144. (*in == '_') || (*in == ':')) {
  3145. in++;
  3146. while (((*in >= 0x61) && (*in <= 0x7A)) ||
  3147. ((*in >= 0x41) && (*in <= 0x5A)) ||
  3148. ((*in >= 0x30) && (*in <= 0x39)) ||
  3149. (*in == '_') || (*in == '-') ||
  3150. (*in == ':') || (*in == '.'))
  3151. in++;
  3152. if ((*in > 0) && (*in < 0x80)) {
  3153. count = in - ctxt->input->cur;
  3154. if ((count > XML_MAX_NAME_LENGTH) &&
  3155. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3156. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
  3157. return(NULL);
  3158. }
  3159. ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
  3160. ctxt->input->cur = in;
  3161. ctxt->input->col += count;
  3162. if (ret == NULL)
  3163. xmlErrMemory(ctxt, NULL);
  3164. return(ret);
  3165. }
  3166. }
  3167. /* accelerator for special cases */
  3168. return(xmlParseNameComplex(ctxt));
  3169. }
  3170. static const xmlChar *
  3171. xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
  3172. int len = 0, l;
  3173. int c;
  3174. int count = 0;
  3175. size_t startPosition = 0;
  3176. #ifdef DEBUG
  3177. nbParseNCNameComplex++;
  3178. #endif
  3179. /*
  3180. * Handler for more complex cases
  3181. */
  3182. GROW;
  3183. startPosition = CUR_PTR - BASE_PTR;
  3184. c = CUR_CHAR(l);
  3185. if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
  3186. (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
  3187. return(NULL);
  3188. }
  3189. while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
  3190. (xmlIsNameChar(ctxt, c) && (c != ':'))) {
  3191. if (count++ > XML_PARSER_CHUNK_SIZE) {
  3192. if ((len > XML_MAX_NAME_LENGTH) &&
  3193. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3194. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
  3195. return(NULL);
  3196. }
  3197. count = 0;
  3198. GROW;
  3199. if (ctxt->instate == XML_PARSER_EOF)
  3200. return(NULL);
  3201. }
  3202. len += l;
  3203. NEXTL(l);
  3204. c = CUR_CHAR(l);
  3205. if (c == 0) {
  3206. count = 0;
  3207. /*
  3208. * when shrinking to extend the buffer we really need to preserve
  3209. * the part of the name we already parsed. Hence rolling back
  3210. * by current length.
  3211. */
  3212. ctxt->input->cur -= l;
  3213. GROW;
  3214. if (ctxt->instate == XML_PARSER_EOF)
  3215. return(NULL);
  3216. ctxt->input->cur += l;
  3217. c = CUR_CHAR(l);
  3218. }
  3219. }
  3220. if ((len > XML_MAX_NAME_LENGTH) &&
  3221. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3222. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
  3223. return(NULL);
  3224. }
  3225. return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
  3226. }
  3227. /**
  3228. * xmlParseNCName:
  3229. * @ctxt: an XML parser context
  3230. * @len: length of the string parsed
  3231. *
  3232. * parse an XML name.
  3233. *
  3234. * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
  3235. * CombiningChar | Extender
  3236. *
  3237. * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
  3238. *
  3239. * Returns the Name parsed or NULL
  3240. */
  3241. static const xmlChar *
  3242. xmlParseNCName(xmlParserCtxtPtr ctxt) {
  3243. const xmlChar *in, *e;
  3244. const xmlChar *ret;
  3245. int count = 0;
  3246. #ifdef DEBUG
  3247. nbParseNCName++;
  3248. #endif
  3249. /*
  3250. * Accelerator for simple ASCII names
  3251. */
  3252. in = ctxt->input->cur;
  3253. e = ctxt->input->end;
  3254. if ((((*in >= 0x61) && (*in <= 0x7A)) ||
  3255. ((*in >= 0x41) && (*in <= 0x5A)) ||
  3256. (*in == '_')) && (in < e)) {
  3257. in++;
  3258. while ((((*in >= 0x61) && (*in <= 0x7A)) ||
  3259. ((*in >= 0x41) && (*in <= 0x5A)) ||
  3260. ((*in >= 0x30) && (*in <= 0x39)) ||
  3261. (*in == '_') || (*in == '-') ||
  3262. (*in == '.')) && (in < e))
  3263. in++;
  3264. if (in >= e)
  3265. goto complex;
  3266. if ((*in > 0) && (*in < 0x80)) {
  3267. count = in - ctxt->input->cur;
  3268. if ((count > XML_MAX_NAME_LENGTH) &&
  3269. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3270. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
  3271. return(NULL);
  3272. }
  3273. ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
  3274. ctxt->input->cur = in;
  3275. ctxt->input->col += count;
  3276. if (ret == NULL) {
  3277. xmlErrMemory(ctxt, NULL);
  3278. }
  3279. return(ret);
  3280. }
  3281. }
  3282. complex:
  3283. return(xmlParseNCNameComplex(ctxt));
  3284. }
  3285. /**
  3286. * xmlParseNameAndCompare:
  3287. * @ctxt: an XML parser context
  3288. *
  3289. * parse an XML name and compares for match
  3290. * (specialized for endtag parsing)
  3291. *
  3292. * Returns NULL for an illegal name, (xmlChar*) 1 for success
  3293. * and the name for mismatch
  3294. */
  3295. static const xmlChar *
  3296. xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
  3297. register const xmlChar *cmp = other;
  3298. register const xmlChar *in;
  3299. const xmlChar *ret;
  3300. GROW;
  3301. if (ctxt->instate == XML_PARSER_EOF)
  3302. return(NULL);
  3303. in = ctxt->input->cur;
  3304. while (*in != 0 && *in == *cmp) {
  3305. ++in;
  3306. ++cmp;
  3307. }
  3308. if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
  3309. /* success */
  3310. ctxt->input->col += in - ctxt->input->cur;
  3311. ctxt->input->cur = in;
  3312. return (const xmlChar*) 1;
  3313. }
  3314. /* failure (or end of input buffer), check with full function */
  3315. ret = xmlParseName (ctxt);
  3316. /* strings coming from the dictionary direct compare possible */
  3317. if (ret == other) {
  3318. return (const xmlChar*) 1;
  3319. }
  3320. return ret;
  3321. }
  3322. /**
  3323. * xmlParseStringName:
  3324. * @ctxt: an XML parser context
  3325. * @str: a pointer to the string pointer (IN/OUT)
  3326. *
  3327. * parse an XML name.
  3328. *
  3329. * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
  3330. * CombiningChar | Extender
  3331. *
  3332. * [5] Name ::= (Letter | '_' | ':') (NameChar)*
  3333. *
  3334. * [6] Names ::= Name (#x20 Name)*
  3335. *
  3336. * Returns the Name parsed or NULL. The @str pointer
  3337. * is updated to the current location in the string.
  3338. */
  3339. static xmlChar *
  3340. xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
  3341. xmlChar buf[XML_MAX_NAMELEN + 5];
  3342. const xmlChar *cur = *str;
  3343. int len = 0, l;
  3344. int c;
  3345. #ifdef DEBUG
  3346. nbParseStringName++;
  3347. #endif
  3348. c = CUR_SCHAR(cur, l);
  3349. if (!xmlIsNameStartChar(ctxt, c)) {
  3350. return(NULL);
  3351. }
  3352. COPY_BUF(l,buf,len,c);
  3353. cur += l;
  3354. c = CUR_SCHAR(cur, l);
  3355. while (xmlIsNameChar(ctxt, c)) {
  3356. COPY_BUF(l,buf,len,c);
  3357. cur += l;
  3358. c = CUR_SCHAR(cur, l);
  3359. if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
  3360. /*
  3361. * Okay someone managed to make a huge name, so he's ready to pay
  3362. * for the processing speed.
  3363. */
  3364. xmlChar *buffer;
  3365. int max = len * 2;
  3366. buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
  3367. if (buffer == NULL) {
  3368. xmlErrMemory(ctxt, NULL);
  3369. return(NULL);
  3370. }
  3371. memcpy(buffer, buf, len);
  3372. while (xmlIsNameChar(ctxt, c)) {
  3373. if (len + 10 > max) {
  3374. xmlChar *tmp;
  3375. if ((len > XML_MAX_NAME_LENGTH) &&
  3376. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3377. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
  3378. xmlFree(buffer);
  3379. return(NULL);
  3380. }
  3381. max *= 2;
  3382. tmp = (xmlChar *) xmlRealloc(buffer,
  3383. max * sizeof(xmlChar));
  3384. if (tmp == NULL) {
  3385. xmlErrMemory(ctxt, NULL);
  3386. xmlFree(buffer);
  3387. return(NULL);
  3388. }
  3389. buffer = tmp;
  3390. }
  3391. COPY_BUF(l,buffer,len,c);
  3392. cur += l;
  3393. c = CUR_SCHAR(cur, l);
  3394. }
  3395. buffer[len] = 0;
  3396. *str = cur;
  3397. return(buffer);
  3398. }
  3399. }
  3400. if ((len > XML_MAX_NAME_LENGTH) &&
  3401. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3402. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
  3403. return(NULL);
  3404. }
  3405. *str = cur;
  3406. return(xmlStrndup(buf, len));
  3407. }
  3408. /**
  3409. * xmlParseNmtoken:
  3410. * @ctxt: an XML parser context
  3411. *
  3412. * parse an XML Nmtoken.
  3413. *
  3414. * [7] Nmtoken ::= (NameChar)+
  3415. *
  3416. * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
  3417. *
  3418. * Returns the Nmtoken parsed or NULL
  3419. */
  3420. xmlChar *
  3421. xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
  3422. xmlChar buf[XML_MAX_NAMELEN + 5];
  3423. int len = 0, l;
  3424. int c;
  3425. int count = 0;
  3426. #ifdef DEBUG
  3427. nbParseNmToken++;
  3428. #endif
  3429. GROW;
  3430. if (ctxt->instate == XML_PARSER_EOF)
  3431. return(NULL);
  3432. c = CUR_CHAR(l);
  3433. while (xmlIsNameChar(ctxt, c)) {
  3434. if (count++ > XML_PARSER_CHUNK_SIZE) {
  3435. count = 0;
  3436. GROW;
  3437. }
  3438. COPY_BUF(l,buf,len,c);
  3439. NEXTL(l);
  3440. c = CUR_CHAR(l);
  3441. if (c == 0) {
  3442. count = 0;
  3443. GROW;
  3444. if (ctxt->instate == XML_PARSER_EOF)
  3445. return(NULL);
  3446. c = CUR_CHAR(l);
  3447. }
  3448. if (len >= XML_MAX_NAMELEN) {
  3449. /*
  3450. * Okay someone managed to make a huge token, so he's ready to pay
  3451. * for the processing speed.
  3452. */
  3453. xmlChar *buffer;
  3454. int max = len * 2;
  3455. buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
  3456. if (buffer == NULL) {
  3457. xmlErrMemory(ctxt, NULL);
  3458. return(NULL);
  3459. }
  3460. memcpy(buffer, buf, len);
  3461. while (xmlIsNameChar(ctxt, c)) {
  3462. if (count++ > XML_PARSER_CHUNK_SIZE) {
  3463. count = 0;
  3464. GROW;
  3465. if (ctxt->instate == XML_PARSER_EOF) {
  3466. xmlFree(buffer);
  3467. return(NULL);
  3468. }
  3469. }
  3470. if (len + 10 > max) {
  3471. xmlChar *tmp;
  3472. if ((max > XML_MAX_NAME_LENGTH) &&
  3473. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3474. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
  3475. xmlFree(buffer);
  3476. return(NULL);
  3477. }
  3478. max *= 2;
  3479. tmp = (xmlChar *) xmlRealloc(buffer,
  3480. max * sizeof(xmlChar));
  3481. if (tmp == NULL) {
  3482. xmlErrMemory(ctxt, NULL);
  3483. xmlFree(buffer);
  3484. return(NULL);
  3485. }
  3486. buffer = tmp;
  3487. }
  3488. COPY_BUF(l,buffer,len,c);
  3489. NEXTL(l);
  3490. c = CUR_CHAR(l);
  3491. }
  3492. buffer[len] = 0;
  3493. return(buffer);
  3494. }
  3495. }
  3496. if (len == 0)
  3497. return(NULL);
  3498. if ((len > XML_MAX_NAME_LENGTH) &&
  3499. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3500. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
  3501. return(NULL);
  3502. }
  3503. return(xmlStrndup(buf, len));
  3504. }
  3505. /**
  3506. * xmlParseEntityValue:
  3507. * @ctxt: an XML parser context
  3508. * @orig: if non-NULL store a copy of the original entity value
  3509. *
  3510. * parse a value for ENTITY declarations
  3511. *
  3512. * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
  3513. * "'" ([^%&'] | PEReference | Reference)* "'"
  3514. *
  3515. * Returns the EntityValue parsed with reference substituted or NULL
  3516. */
  3517. xmlChar *
  3518. xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
  3519. xmlChar *buf = NULL;
  3520. int len = 0;
  3521. int size = XML_PARSER_BUFFER_SIZE;
  3522. int c, l;
  3523. xmlChar stop;
  3524. xmlChar *ret = NULL;
  3525. const xmlChar *cur = NULL;
  3526. xmlParserInputPtr input;
  3527. if (RAW == '"') stop = '"';
  3528. else if (RAW == '\'') stop = '\'';
  3529. else {
  3530. xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
  3531. return(NULL);
  3532. }
  3533. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  3534. if (buf == NULL) {
  3535. xmlErrMemory(ctxt, NULL);
  3536. return(NULL);
  3537. }
  3538. /*
  3539. * The content of the entity definition is copied in a buffer.
  3540. */
  3541. ctxt->instate = XML_PARSER_ENTITY_VALUE;
  3542. input = ctxt->input;
  3543. GROW;
  3544. if (ctxt->instate == XML_PARSER_EOF)
  3545. goto error;
  3546. NEXT;
  3547. c = CUR_CHAR(l);
  3548. /*
  3549. * NOTE: 4.4.5 Included in Literal
  3550. * When a parameter entity reference appears in a literal entity
  3551. * value, ... a single or double quote character in the replacement
  3552. * text is always treated as a normal data character and will not
  3553. * terminate the literal.
  3554. * In practice it means we stop the loop only when back at parsing
  3555. * the initial entity and the quote is found
  3556. */
  3557. while (((IS_CHAR(c)) && ((c != stop) || /* checked */
  3558. (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
  3559. if (len + 5 >= size) {
  3560. xmlChar *tmp;
  3561. size *= 2;
  3562. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  3563. if (tmp == NULL) {
  3564. xmlErrMemory(ctxt, NULL);
  3565. goto error;
  3566. }
  3567. buf = tmp;
  3568. }
  3569. COPY_BUF(l,buf,len,c);
  3570. NEXTL(l);
  3571. GROW;
  3572. c = CUR_CHAR(l);
  3573. if (c == 0) {
  3574. GROW;
  3575. c = CUR_CHAR(l);
  3576. }
  3577. }
  3578. buf[len] = 0;
  3579. if (ctxt->instate == XML_PARSER_EOF)
  3580. goto error;
  3581. if (c != stop) {
  3582. xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
  3583. goto error;
  3584. }
  3585. NEXT;
  3586. /*
  3587. * Raise problem w.r.t. '&' and '%' being used in non-entities
  3588. * reference constructs. Note Charref will be handled in
  3589. * xmlStringDecodeEntities()
  3590. */
  3591. cur = buf;
  3592. while (*cur != 0) { /* non input consuming */
  3593. if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
  3594. xmlChar *name;
  3595. xmlChar tmp = *cur;
  3596. int nameOk = 0;
  3597. cur++;
  3598. name = xmlParseStringName(ctxt, &cur);
  3599. if (name != NULL) {
  3600. nameOk = 1;
  3601. xmlFree(name);
  3602. }
  3603. if ((nameOk == 0) || (*cur != ';')) {
  3604. xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
  3605. "EntityValue: '%c' forbidden except for entities references\n",
  3606. tmp);
  3607. goto error;
  3608. }
  3609. if ((tmp == '%') && (ctxt->inSubset == 1) &&
  3610. (ctxt->inputNr == 1)) {
  3611. xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
  3612. goto error;
  3613. }
  3614. if (*cur == 0)
  3615. break;
  3616. }
  3617. cur++;
  3618. }
  3619. /*
  3620. * Then PEReference entities are substituted.
  3621. *
  3622. * NOTE: 4.4.7 Bypassed
  3623. * When a general entity reference appears in the EntityValue in
  3624. * an entity declaration, it is bypassed and left as is.
  3625. * so XML_SUBSTITUTE_REF is not set here.
  3626. */
  3627. ++ctxt->depth;
  3628. ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
  3629. 0, 0, 0);
  3630. --ctxt->depth;
  3631. if (orig != NULL) {
  3632. *orig = buf;
  3633. buf = NULL;
  3634. }
  3635. error:
  3636. if (buf != NULL)
  3637. xmlFree(buf);
  3638. return(ret);
  3639. }
  3640. /**
  3641. * xmlParseAttValueComplex:
  3642. * @ctxt: an XML parser context
  3643. * @len: the resulting attribute len
  3644. * @normalize: whether to apply the inner normalization
  3645. *
  3646. * parse a value for an attribute, this is the fallback function
  3647. * of xmlParseAttValue() when the attribute parsing requires handling
  3648. * of non-ASCII characters, or normalization compaction.
  3649. *
  3650. * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
  3651. */
  3652. static xmlChar *
  3653. xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
  3654. xmlChar limit = 0;
  3655. xmlChar *buf = NULL;
  3656. xmlChar *rep = NULL;
  3657. size_t len = 0;
  3658. size_t buf_size = 0;
  3659. int c, l, in_space = 0;
  3660. xmlChar *current = NULL;
  3661. xmlEntityPtr ent;
  3662. if (NXT(0) == '"') {
  3663. ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
  3664. limit = '"';
  3665. NEXT;
  3666. } else if (NXT(0) == '\'') {
  3667. limit = '\'';
  3668. ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
  3669. NEXT;
  3670. } else {
  3671. xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
  3672. return(NULL);
  3673. }
  3674. /*
  3675. * allocate a translation buffer.
  3676. */
  3677. buf_size = XML_PARSER_BUFFER_SIZE;
  3678. buf = (xmlChar *) xmlMallocAtomic(buf_size);
  3679. if (buf == NULL) goto mem_error;
  3680. /*
  3681. * OK loop until we reach one of the ending char or a size limit.
  3682. */
  3683. c = CUR_CHAR(l);
  3684. while (((NXT(0) != limit) && /* checked */
  3685. (IS_CHAR(c)) && (c != '<')) &&
  3686. (ctxt->instate != XML_PARSER_EOF)) {
  3687. /*
  3688. * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
  3689. * special option is given
  3690. */
  3691. if ((len > XML_MAX_TEXT_LENGTH) &&
  3692. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3693. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  3694. "AttValue length too long\n");
  3695. goto mem_error;
  3696. }
  3697. if (c == '&') {
  3698. in_space = 0;
  3699. if (NXT(1) == '#') {
  3700. int val = xmlParseCharRef(ctxt);
  3701. if (val == '&') {
  3702. if (ctxt->replaceEntities) {
  3703. if (len + 10 > buf_size) {
  3704. growBuffer(buf, 10);
  3705. }
  3706. buf[len++] = '&';
  3707. } else {
  3708. /*
  3709. * The reparsing will be done in xmlStringGetNodeList()
  3710. * called by the attribute() function in SAX.c
  3711. */
  3712. if (len + 10 > buf_size) {
  3713. growBuffer(buf, 10);
  3714. }
  3715. buf[len++] = '&';
  3716. buf[len++] = '#';
  3717. buf[len++] = '3';
  3718. buf[len++] = '8';
  3719. buf[len++] = ';';
  3720. }
  3721. } else if (val != 0) {
  3722. if (len + 10 > buf_size) {
  3723. growBuffer(buf, 10);
  3724. }
  3725. len += xmlCopyChar(0, &buf[len], val);
  3726. }
  3727. } else {
  3728. ent = xmlParseEntityRef(ctxt);
  3729. ctxt->nbentities++;
  3730. if (ent != NULL)
  3731. ctxt->nbentities += ent->owner;
  3732. if ((ent != NULL) &&
  3733. (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
  3734. if (len + 10 > buf_size) {
  3735. growBuffer(buf, 10);
  3736. }
  3737. if ((ctxt->replaceEntities == 0) &&
  3738. (ent->content[0] == '&')) {
  3739. buf[len++] = '&';
  3740. buf[len++] = '#';
  3741. buf[len++] = '3';
  3742. buf[len++] = '8';
  3743. buf[len++] = ';';
  3744. } else {
  3745. buf[len++] = ent->content[0];
  3746. }
  3747. } else if ((ent != NULL) &&
  3748. (ctxt->replaceEntities != 0)) {
  3749. if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
  3750. ++ctxt->depth;
  3751. rep = xmlStringDecodeEntities(ctxt, ent->content,
  3752. XML_SUBSTITUTE_REF,
  3753. 0, 0, 0);
  3754. --ctxt->depth;
  3755. if (rep != NULL) {
  3756. current = rep;
  3757. while (*current != 0) { /* non input consuming */
  3758. if ((*current == 0xD) || (*current == 0xA) ||
  3759. (*current == 0x9)) {
  3760. buf[len++] = 0x20;
  3761. current++;
  3762. } else
  3763. buf[len++] = *current++;
  3764. if (len + 10 > buf_size) {
  3765. growBuffer(buf, 10);
  3766. }
  3767. }
  3768. xmlFree(rep);
  3769. rep = NULL;
  3770. }
  3771. } else {
  3772. if (len + 10 > buf_size) {
  3773. growBuffer(buf, 10);
  3774. }
  3775. if (ent->content != NULL)
  3776. buf[len++] = ent->content[0];
  3777. }
  3778. } else if (ent != NULL) {
  3779. int i = xmlStrlen(ent->name);
  3780. const xmlChar *cur = ent->name;
  3781. /*
  3782. * This may look absurd but is needed to detect
  3783. * entities problems
  3784. */
  3785. if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
  3786. (ent->content != NULL) && (ent->checked == 0)) {
  3787. unsigned long oldnbent = ctxt->nbentities, diff;
  3788. ++ctxt->depth;
  3789. rep = xmlStringDecodeEntities(ctxt, ent->content,
  3790. XML_SUBSTITUTE_REF, 0, 0, 0);
  3791. --ctxt->depth;
  3792. diff = ctxt->nbentities - oldnbent + 1;
  3793. if (diff > INT_MAX / 2)
  3794. diff = INT_MAX / 2;
  3795. ent->checked = diff * 2;
  3796. if (rep != NULL) {
  3797. if (xmlStrchr(rep, '<'))
  3798. ent->checked |= 1;
  3799. xmlFree(rep);
  3800. rep = NULL;
  3801. } else {
  3802. ent->content[0] = 0;
  3803. }
  3804. }
  3805. /*
  3806. * Just output the reference
  3807. */
  3808. buf[len++] = '&';
  3809. while (len + i + 10 > buf_size) {
  3810. growBuffer(buf, i + 10);
  3811. }
  3812. for (;i > 0;i--)
  3813. buf[len++] = *cur++;
  3814. buf[len++] = ';';
  3815. }
  3816. }
  3817. } else {
  3818. if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
  3819. if ((len != 0) || (!normalize)) {
  3820. if ((!normalize) || (!in_space)) {
  3821. COPY_BUF(l,buf,len,0x20);
  3822. while (len + 10 > buf_size) {
  3823. growBuffer(buf, 10);
  3824. }
  3825. }
  3826. in_space = 1;
  3827. }
  3828. } else {
  3829. in_space = 0;
  3830. COPY_BUF(l,buf,len,c);
  3831. if (len + 10 > buf_size) {
  3832. growBuffer(buf, 10);
  3833. }
  3834. }
  3835. NEXTL(l);
  3836. }
  3837. GROW;
  3838. c = CUR_CHAR(l);
  3839. }
  3840. if (ctxt->instate == XML_PARSER_EOF)
  3841. goto error;
  3842. if ((in_space) && (normalize)) {
  3843. while ((len > 0) && (buf[len - 1] == 0x20)) len--;
  3844. }
  3845. buf[len] = 0;
  3846. if (RAW == '<') {
  3847. xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
  3848. } else if (RAW != limit) {
  3849. if ((c != 0) && (!IS_CHAR(c))) {
  3850. xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
  3851. "invalid character in attribute value\n");
  3852. } else {
  3853. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  3854. "AttValue: ' expected\n");
  3855. }
  3856. } else
  3857. NEXT;
  3858. /*
  3859. * There we potentially risk an overflow, don't allow attribute value of
  3860. * length more than INT_MAX it is a very reasonable assumption !
  3861. */
  3862. if (len >= INT_MAX) {
  3863. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  3864. "AttValue length too long\n");
  3865. goto mem_error;
  3866. }
  3867. if (attlen != NULL) *attlen = (int) len;
  3868. return(buf);
  3869. mem_error:
  3870. xmlErrMemory(ctxt, NULL);
  3871. error:
  3872. if (buf != NULL)
  3873. xmlFree(buf);
  3874. if (rep != NULL)
  3875. xmlFree(rep);
  3876. return(NULL);
  3877. }
  3878. /**
  3879. * xmlParseAttValue:
  3880. * @ctxt: an XML parser context
  3881. *
  3882. * parse a value for an attribute
  3883. * Note: the parser won't do substitution of entities here, this
  3884. * will be handled later in xmlStringGetNodeList
  3885. *
  3886. * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
  3887. * "'" ([^<&'] | Reference)* "'"
  3888. *
  3889. * 3.3.3 Attribute-Value Normalization:
  3890. * Before the value of an attribute is passed to the application or
  3891. * checked for validity, the XML processor must normalize it as follows:
  3892. * - a character reference is processed by appending the referenced
  3893. * character to the attribute value
  3894. * - an entity reference is processed by recursively processing the
  3895. * replacement text of the entity
  3896. * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
  3897. * appending #x20 to the normalized value, except that only a single
  3898. * #x20 is appended for a "#xD#xA" sequence that is part of an external
  3899. * parsed entity or the literal entity value of an internal parsed entity
  3900. * - other characters are processed by appending them to the normalized value
  3901. * If the declared value is not CDATA, then the XML processor must further
  3902. * process the normalized attribute value by discarding any leading and
  3903. * trailing space (#x20) characters, and by replacing sequences of space
  3904. * (#x20) characters by a single space (#x20) character.
  3905. * All attributes for which no declaration has been read should be treated
  3906. * by a non-validating parser as if declared CDATA.
  3907. *
  3908. * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
  3909. */
  3910. xmlChar *
  3911. xmlParseAttValue(xmlParserCtxtPtr ctxt) {
  3912. if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
  3913. return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
  3914. }
  3915. /**
  3916. * xmlParseSystemLiteral:
  3917. * @ctxt: an XML parser context
  3918. *
  3919. * parse an XML Literal
  3920. *
  3921. * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
  3922. *
  3923. * Returns the SystemLiteral parsed or NULL
  3924. */
  3925. xmlChar *
  3926. xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
  3927. xmlChar *buf = NULL;
  3928. int len = 0;
  3929. int size = XML_PARSER_BUFFER_SIZE;
  3930. int cur, l;
  3931. xmlChar stop;
  3932. int state = ctxt->instate;
  3933. int count = 0;
  3934. SHRINK;
  3935. if (RAW == '"') {
  3936. NEXT;
  3937. stop = '"';
  3938. } else if (RAW == '\'') {
  3939. NEXT;
  3940. stop = '\'';
  3941. } else {
  3942. xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
  3943. return(NULL);
  3944. }
  3945. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  3946. if (buf == NULL) {
  3947. xmlErrMemory(ctxt, NULL);
  3948. return(NULL);
  3949. }
  3950. ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
  3951. cur = CUR_CHAR(l);
  3952. while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
  3953. if (len + 5 >= size) {
  3954. xmlChar *tmp;
  3955. if ((size > XML_MAX_NAME_LENGTH) &&
  3956. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3957. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
  3958. xmlFree(buf);
  3959. ctxt->instate = (xmlParserInputState) state;
  3960. return(NULL);
  3961. }
  3962. size *= 2;
  3963. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  3964. if (tmp == NULL) {
  3965. xmlFree(buf);
  3966. xmlErrMemory(ctxt, NULL);
  3967. ctxt->instate = (xmlParserInputState) state;
  3968. return(NULL);
  3969. }
  3970. buf = tmp;
  3971. }
  3972. count++;
  3973. if (count > 50) {
  3974. SHRINK;
  3975. GROW;
  3976. count = 0;
  3977. if (ctxt->instate == XML_PARSER_EOF) {
  3978. xmlFree(buf);
  3979. return(NULL);
  3980. }
  3981. }
  3982. COPY_BUF(l,buf,len,cur);
  3983. NEXTL(l);
  3984. cur = CUR_CHAR(l);
  3985. if (cur == 0) {
  3986. GROW;
  3987. SHRINK;
  3988. cur = CUR_CHAR(l);
  3989. }
  3990. }
  3991. buf[len] = 0;
  3992. ctxt->instate = (xmlParserInputState) state;
  3993. if (!IS_CHAR(cur)) {
  3994. xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
  3995. } else {
  3996. NEXT;
  3997. }
  3998. return(buf);
  3999. }
  4000. /**
  4001. * xmlParsePubidLiteral:
  4002. * @ctxt: an XML parser context
  4003. *
  4004. * parse an XML public literal
  4005. *
  4006. * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
  4007. *
  4008. * Returns the PubidLiteral parsed or NULL.
  4009. */
  4010. xmlChar *
  4011. xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
  4012. xmlChar *buf = NULL;
  4013. int len = 0;
  4014. int size = XML_PARSER_BUFFER_SIZE;
  4015. xmlChar cur;
  4016. xmlChar stop;
  4017. int count = 0;
  4018. xmlParserInputState oldstate = ctxt->instate;
  4019. SHRINK;
  4020. if (RAW == '"') {
  4021. NEXT;
  4022. stop = '"';
  4023. } else if (RAW == '\'') {
  4024. NEXT;
  4025. stop = '\'';
  4026. } else {
  4027. xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
  4028. return(NULL);
  4029. }
  4030. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  4031. if (buf == NULL) {
  4032. xmlErrMemory(ctxt, NULL);
  4033. return(NULL);
  4034. }
  4035. ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
  4036. cur = CUR;
  4037. while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
  4038. if (len + 1 >= size) {
  4039. xmlChar *tmp;
  4040. if ((size > XML_MAX_NAME_LENGTH) &&
  4041. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  4042. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
  4043. xmlFree(buf);
  4044. return(NULL);
  4045. }
  4046. size *= 2;
  4047. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  4048. if (tmp == NULL) {
  4049. xmlErrMemory(ctxt, NULL);
  4050. xmlFree(buf);
  4051. return(NULL);
  4052. }
  4053. buf = tmp;
  4054. }
  4055. buf[len++] = cur;
  4056. count++;
  4057. if (count > 50) {
  4058. SHRINK;
  4059. GROW;
  4060. count = 0;
  4061. if (ctxt->instate == XML_PARSER_EOF) {
  4062. xmlFree(buf);
  4063. return(NULL);
  4064. }
  4065. }
  4066. NEXT;
  4067. cur = CUR;
  4068. if (cur == 0) {
  4069. GROW;
  4070. SHRINK;
  4071. cur = CUR;
  4072. }
  4073. }
  4074. buf[len] = 0;
  4075. if (cur != stop) {
  4076. xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
  4077. } else {
  4078. NEXT;
  4079. }
  4080. ctxt->instate = oldstate;
  4081. return(buf);
  4082. }
  4083. static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
  4084. /*
  4085. * used for the test in the inner loop of the char data testing
  4086. */
  4087. static const unsigned char test_char_data[256] = {
  4088. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4089. 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
  4090. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4091. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4092. 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
  4093. 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
  4094. 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
  4095. 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
  4096. 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
  4097. 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
  4098. 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
  4099. 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
  4100. 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  4101. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  4102. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  4103. 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
  4104. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
  4105. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4106. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4107. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4108. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4109. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4110. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4111. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4112. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4113. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4114. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4115. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4116. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4117. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4118. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4119. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
  4120. };
  4121. /**
  4122. * xmlParseCharData:
  4123. * @ctxt: an XML parser context
  4124. * @cdata: int indicating whether we are within a CDATA section
  4125. *
  4126. * parse a CharData section.
  4127. * if we are within a CDATA section ']]>' marks an end of section.
  4128. *
  4129. * The right angle bracket (>) may be represented using the string "&gt;",
  4130. * and must, for compatibility, be escaped using "&gt;" or a character
  4131. * reference when it appears in the string "]]>" in content, when that
  4132. * string is not marking the end of a CDATA section.
  4133. *
  4134. * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
  4135. */
  4136. void
  4137. xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
  4138. const xmlChar *in;
  4139. int nbchar = 0;
  4140. int line = ctxt->input->line;
  4141. int col = ctxt->input->col;
  4142. int ccol;
  4143. SHRINK;
  4144. GROW;
  4145. /*
  4146. * Accelerated common case where input don't need to be
  4147. * modified before passing it to the handler.
  4148. */
  4149. if (!cdata) {
  4150. in = ctxt->input->cur;
  4151. do {
  4152. get_more_space:
  4153. while (*in == 0x20) { in++; ctxt->input->col++; }
  4154. if (*in == 0xA) {
  4155. do {
  4156. ctxt->input->line++; ctxt->input->col = 1;
  4157. in++;
  4158. } while (*in == 0xA);
  4159. goto get_more_space;
  4160. }
  4161. if (*in == '<') {
  4162. nbchar = in - ctxt->input->cur;
  4163. if (nbchar > 0) {
  4164. const xmlChar *tmp = ctxt->input->cur;
  4165. ctxt->input->cur = in;
  4166. if ((ctxt->sax != NULL) &&
  4167. (ctxt->sax->ignorableWhitespace !=
  4168. ctxt->sax->characters)) {
  4169. if (areBlanks(ctxt, tmp, nbchar, 1)) {
  4170. if (ctxt->sax->ignorableWhitespace != NULL)
  4171. ctxt->sax->ignorableWhitespace(ctxt->userData,
  4172. tmp, nbchar);
  4173. } else {
  4174. if (ctxt->sax->characters != NULL)
  4175. ctxt->sax->characters(ctxt->userData,
  4176. tmp, nbchar);
  4177. if (*ctxt->space == -1)
  4178. *ctxt->space = -2;
  4179. }
  4180. } else if ((ctxt->sax != NULL) &&
  4181. (ctxt->sax->characters != NULL)) {
  4182. ctxt->sax->characters(ctxt->userData,
  4183. tmp, nbchar);
  4184. }
  4185. }
  4186. return;
  4187. }
  4188. get_more:
  4189. ccol = ctxt->input->col;
  4190. while (test_char_data[*in]) {
  4191. in++;
  4192. ccol++;
  4193. }
  4194. ctxt->input->col = ccol;
  4195. if (*in == 0xA) {
  4196. do {
  4197. ctxt->input->line++; ctxt->input->col = 1;
  4198. in++;
  4199. } while (*in == 0xA);
  4200. goto get_more;
  4201. }
  4202. if (*in == ']') {
  4203. if ((in[1] == ']') && (in[2] == '>')) {
  4204. xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
  4205. ctxt->input->cur = in + 1;
  4206. return;
  4207. }
  4208. in++;
  4209. ctxt->input->col++;
  4210. goto get_more;
  4211. }
  4212. nbchar = in - ctxt->input->cur;
  4213. if (nbchar > 0) {
  4214. if ((ctxt->sax != NULL) &&
  4215. (ctxt->sax->ignorableWhitespace !=
  4216. ctxt->sax->characters) &&
  4217. (IS_BLANK_CH(*ctxt->input->cur))) {
  4218. const xmlChar *tmp = ctxt->input->cur;
  4219. ctxt->input->cur = in;
  4220. if (areBlanks(ctxt, tmp, nbchar, 0)) {
  4221. if (ctxt->sax->ignorableWhitespace != NULL)
  4222. ctxt->sax->ignorableWhitespace(ctxt->userData,
  4223. tmp, nbchar);
  4224. } else {
  4225. if (ctxt->sax->characters != NULL)
  4226. ctxt->sax->characters(ctxt->userData,
  4227. tmp, nbchar);
  4228. if (*ctxt->space == -1)
  4229. *ctxt->space = -2;
  4230. }
  4231. line = ctxt->input->line;
  4232. col = ctxt->input->col;
  4233. } else if (ctxt->sax != NULL) {
  4234. if (ctxt->sax->characters != NULL)
  4235. ctxt->sax->characters(ctxt->userData,
  4236. ctxt->input->cur, nbchar);
  4237. line = ctxt->input->line;
  4238. col = ctxt->input->col;
  4239. }
  4240. /* something really bad happened in the SAX callback */
  4241. if (ctxt->instate != XML_PARSER_CONTENT)
  4242. return;
  4243. }
  4244. ctxt->input->cur = in;
  4245. if (*in == 0xD) {
  4246. in++;
  4247. if (*in == 0xA) {
  4248. ctxt->input->cur = in;
  4249. in++;
  4250. ctxt->input->line++; ctxt->input->col = 1;
  4251. continue; /* while */
  4252. }
  4253. in--;
  4254. }
  4255. if (*in == '<') {
  4256. return;
  4257. }
  4258. if (*in == '&') {
  4259. return;
  4260. }
  4261. SHRINK;
  4262. GROW;
  4263. if (ctxt->instate == XML_PARSER_EOF)
  4264. return;
  4265. in = ctxt->input->cur;
  4266. } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
  4267. nbchar = 0;
  4268. }
  4269. ctxt->input->line = line;
  4270. ctxt->input->col = col;
  4271. xmlParseCharDataComplex(ctxt, cdata);
  4272. }
  4273. /**
  4274. * xmlParseCharDataComplex:
  4275. * @ctxt: an XML parser context
  4276. * @cdata: int indicating whether we are within a CDATA section
  4277. *
  4278. * parse a CharData section.this is the fallback function
  4279. * of xmlParseCharData() when the parsing requires handling
  4280. * of non-ASCII characters.
  4281. */
  4282. static void
  4283. xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
  4284. xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
  4285. int nbchar = 0;
  4286. int cur, l;
  4287. int count = 0;
  4288. SHRINK;
  4289. GROW;
  4290. cur = CUR_CHAR(l);
  4291. while ((cur != '<') && /* checked */
  4292. (cur != '&') &&
  4293. (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
  4294. if ((cur == ']') && (NXT(1) == ']') &&
  4295. (NXT(2) == '>')) {
  4296. if (cdata) break;
  4297. else {
  4298. xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
  4299. }
  4300. }
  4301. COPY_BUF(l,buf,nbchar,cur);
  4302. /* move current position before possible calling of ctxt->sax->characters */
  4303. NEXTL(l);
  4304. cur = CUR_CHAR(l);
  4305. if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
  4306. buf[nbchar] = 0;
  4307. /*
  4308. * OK the segment is to be consumed as chars.
  4309. */
  4310. if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
  4311. if (areBlanks(ctxt, buf, nbchar, 0)) {
  4312. if (ctxt->sax->ignorableWhitespace != NULL)
  4313. ctxt->sax->ignorableWhitespace(ctxt->userData,
  4314. buf, nbchar);
  4315. } else {
  4316. if (ctxt->sax->characters != NULL)
  4317. ctxt->sax->characters(ctxt->userData, buf, nbchar);
  4318. if ((ctxt->sax->characters !=
  4319. ctxt->sax->ignorableWhitespace) &&
  4320. (*ctxt->space == -1))
  4321. *ctxt->space = -2;
  4322. }
  4323. }
  4324. nbchar = 0;
  4325. /* something really bad happened in the SAX callback */
  4326. if (ctxt->instate != XML_PARSER_CONTENT)
  4327. return;
  4328. }
  4329. count++;
  4330. if (count > 50) {
  4331. SHRINK;
  4332. GROW;
  4333. count = 0;
  4334. if (ctxt->instate == XML_PARSER_EOF)
  4335. return;
  4336. }
  4337. }
  4338. if (nbchar != 0) {
  4339. buf[nbchar] = 0;
  4340. /*
  4341. * OK the segment is to be consumed as chars.
  4342. */
  4343. if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
  4344. if (areBlanks(ctxt, buf, nbchar, 0)) {
  4345. if (ctxt->sax->ignorableWhitespace != NULL)
  4346. ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
  4347. } else {
  4348. if (ctxt->sax->characters != NULL)
  4349. ctxt->sax->characters(ctxt->userData, buf, nbchar);
  4350. if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
  4351. (*ctxt->space == -1))
  4352. *ctxt->space = -2;
  4353. }
  4354. }
  4355. }
  4356. if ((cur != 0) && (!IS_CHAR(cur))) {
  4357. /* Generate the error and skip the offending character */
  4358. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  4359. "PCDATA invalid Char value %d\n",
  4360. cur);
  4361. NEXTL(l);
  4362. }
  4363. }
  4364. /**
  4365. * xmlParseExternalID:
  4366. * @ctxt: an XML parser context
  4367. * @publicID: a xmlChar** receiving PubidLiteral
  4368. * @strict: indicate whether we should restrict parsing to only
  4369. * production [75], see NOTE below
  4370. *
  4371. * Parse an External ID or a Public ID
  4372. *
  4373. * NOTE: Productions [75] and [83] interact badly since [75] can generate
  4374. * 'PUBLIC' S PubidLiteral S SystemLiteral
  4375. *
  4376. * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
  4377. * | 'PUBLIC' S PubidLiteral S SystemLiteral
  4378. *
  4379. * [83] PublicID ::= 'PUBLIC' S PubidLiteral
  4380. *
  4381. * Returns the function returns SystemLiteral and in the second
  4382. * case publicID receives PubidLiteral, is strict is off
  4383. * it is possible to return NULL and have publicID set.
  4384. */
  4385. xmlChar *
  4386. xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
  4387. xmlChar *URI = NULL;
  4388. SHRINK;
  4389. *publicID = NULL;
  4390. if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
  4391. SKIP(6);
  4392. if (SKIP_BLANKS == 0) {
  4393. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4394. "Space required after 'SYSTEM'\n");
  4395. }
  4396. URI = xmlParseSystemLiteral(ctxt);
  4397. if (URI == NULL) {
  4398. xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
  4399. }
  4400. } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
  4401. SKIP(6);
  4402. if (SKIP_BLANKS == 0) {
  4403. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4404. "Space required after 'PUBLIC'\n");
  4405. }
  4406. *publicID = xmlParsePubidLiteral(ctxt);
  4407. if (*publicID == NULL) {
  4408. xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
  4409. }
  4410. if (strict) {
  4411. /*
  4412. * We don't handle [83] so "S SystemLiteral" is required.
  4413. */
  4414. if (SKIP_BLANKS == 0) {
  4415. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4416. "Space required after the Public Identifier\n");
  4417. }
  4418. } else {
  4419. /*
  4420. * We handle [83] so we return immediately, if
  4421. * "S SystemLiteral" is not detected. We skip blanks if no
  4422. * system literal was found, but this is harmless since we must
  4423. * be at the end of a NotationDecl.
  4424. */
  4425. if (SKIP_BLANKS == 0) return(NULL);
  4426. if ((CUR != '\'') && (CUR != '"')) return(NULL);
  4427. }
  4428. URI = xmlParseSystemLiteral(ctxt);
  4429. if (URI == NULL) {
  4430. xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
  4431. }
  4432. }
  4433. return(URI);
  4434. }
  4435. /**
  4436. * xmlParseCommentComplex:
  4437. * @ctxt: an XML parser context
  4438. * @buf: the already parsed part of the buffer
  4439. * @len: number of bytes in the buffer
  4440. * @size: allocated size of the buffer
  4441. *
  4442. * Skip an XML (SGML) comment <!-- .... -->
  4443. * The spec says that "For compatibility, the string "--" (double-hyphen)
  4444. * must not occur within comments. "
  4445. * This is the slow routine in case the accelerator for ascii didn't work
  4446. *
  4447. * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  4448. */
  4449. static void
  4450. xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
  4451. size_t len, size_t size) {
  4452. int q, ql;
  4453. int r, rl;
  4454. int cur, l;
  4455. size_t count = 0;
  4456. int inputid;
  4457. inputid = ctxt->input->id;
  4458. if (buf == NULL) {
  4459. len = 0;
  4460. size = XML_PARSER_BUFFER_SIZE;
  4461. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  4462. if (buf == NULL) {
  4463. xmlErrMemory(ctxt, NULL);
  4464. return;
  4465. }
  4466. }
  4467. GROW; /* Assure there's enough input data */
  4468. q = CUR_CHAR(ql);
  4469. if (q == 0)
  4470. goto not_terminated;
  4471. if (!IS_CHAR(q)) {
  4472. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  4473. "xmlParseComment: invalid xmlChar value %d\n",
  4474. q);
  4475. xmlFree (buf);
  4476. return;
  4477. }
  4478. NEXTL(ql);
  4479. r = CUR_CHAR(rl);
  4480. if (r == 0)
  4481. goto not_terminated;
  4482. if (!IS_CHAR(r)) {
  4483. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  4484. "xmlParseComment: invalid xmlChar value %d\n",
  4485. q);
  4486. xmlFree (buf);
  4487. return;
  4488. }
  4489. NEXTL(rl);
  4490. cur = CUR_CHAR(l);
  4491. if (cur == 0)
  4492. goto not_terminated;
  4493. while (IS_CHAR(cur) && /* checked */
  4494. ((cur != '>') ||
  4495. (r != '-') || (q != '-'))) {
  4496. if ((r == '-') && (q == '-')) {
  4497. xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
  4498. }
  4499. if ((len > XML_MAX_TEXT_LENGTH) &&
  4500. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  4501. xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
  4502. "Comment too big found", NULL);
  4503. xmlFree (buf);
  4504. return;
  4505. }
  4506. if (len + 5 >= size) {
  4507. xmlChar *new_buf;
  4508. size_t new_size;
  4509. new_size = size * 2;
  4510. new_buf = (xmlChar *) xmlRealloc(buf, new_size);
  4511. if (new_buf == NULL) {
  4512. xmlFree (buf);
  4513. xmlErrMemory(ctxt, NULL);
  4514. return;
  4515. }
  4516. buf = new_buf;
  4517. size = new_size;
  4518. }
  4519. COPY_BUF(ql,buf,len,q);
  4520. q = r;
  4521. ql = rl;
  4522. r = cur;
  4523. rl = l;
  4524. count++;
  4525. if (count > 50) {
  4526. SHRINK;
  4527. GROW;
  4528. count = 0;
  4529. if (ctxt->instate == XML_PARSER_EOF) {
  4530. xmlFree(buf);
  4531. return;
  4532. }
  4533. }
  4534. NEXTL(l);
  4535. cur = CUR_CHAR(l);
  4536. if (cur == 0) {
  4537. SHRINK;
  4538. GROW;
  4539. cur = CUR_CHAR(l);
  4540. }
  4541. }
  4542. buf[len] = 0;
  4543. if (cur == 0) {
  4544. xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
  4545. "Comment not terminated \n<!--%.50s\n", buf);
  4546. } else if (!IS_CHAR(cur)) {
  4547. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  4548. "xmlParseComment: invalid xmlChar value %d\n",
  4549. cur);
  4550. } else {
  4551. if (inputid != ctxt->input->id) {
  4552. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  4553. "Comment doesn't start and stop in the same"
  4554. " entity\n");
  4555. }
  4556. NEXT;
  4557. if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
  4558. (!ctxt->disableSAX))
  4559. ctxt->sax->comment(ctxt->userData, buf);
  4560. }
  4561. xmlFree(buf);
  4562. return;
  4563. not_terminated:
  4564. xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
  4565. "Comment not terminated\n", NULL);
  4566. xmlFree(buf);
  4567. return;
  4568. }
  4569. /**
  4570. * xmlParseComment:
  4571. * @ctxt: an XML parser context
  4572. *
  4573. * Skip an XML (SGML) comment <!-- .... -->
  4574. * The spec says that "For compatibility, the string "--" (double-hyphen)
  4575. * must not occur within comments. "
  4576. *
  4577. * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  4578. */
  4579. void
  4580. xmlParseComment(xmlParserCtxtPtr ctxt) {
  4581. xmlChar *buf = NULL;
  4582. size_t size = XML_PARSER_BUFFER_SIZE;
  4583. size_t len = 0;
  4584. xmlParserInputState state;
  4585. const xmlChar *in;
  4586. size_t nbchar = 0;
  4587. int ccol;
  4588. int inputid;
  4589. /*
  4590. * Check that there is a comment right here.
  4591. */
  4592. if ((RAW != '<') || (NXT(1) != '!') ||
  4593. (NXT(2) != '-') || (NXT(3) != '-')) return;
  4594. state = ctxt->instate;
  4595. ctxt->instate = XML_PARSER_COMMENT;
  4596. inputid = ctxt->input->id;
  4597. SKIP(4);
  4598. SHRINK;
  4599. GROW;
  4600. /*
  4601. * Accelerated common case where input don't need to be
  4602. * modified before passing it to the handler.
  4603. */
  4604. in = ctxt->input->cur;
  4605. do {
  4606. if (*in == 0xA) {
  4607. do {
  4608. ctxt->input->line++; ctxt->input->col = 1;
  4609. in++;
  4610. } while (*in == 0xA);
  4611. }
  4612. get_more:
  4613. ccol = ctxt->input->col;
  4614. while (((*in > '-') && (*in <= 0x7F)) ||
  4615. ((*in >= 0x20) && (*in < '-')) ||
  4616. (*in == 0x09)) {
  4617. in++;
  4618. ccol++;
  4619. }
  4620. ctxt->input->col = ccol;
  4621. if (*in == 0xA) {
  4622. do {
  4623. ctxt->input->line++; ctxt->input->col = 1;
  4624. in++;
  4625. } while (*in == 0xA);
  4626. goto get_more;
  4627. }
  4628. nbchar = in - ctxt->input->cur;
  4629. /*
  4630. * save current set of data
  4631. */
  4632. if (nbchar > 0) {
  4633. if ((ctxt->sax != NULL) &&
  4634. (ctxt->sax->comment != NULL)) {
  4635. if (buf == NULL) {
  4636. if ((*in == '-') && (in[1] == '-'))
  4637. size = nbchar + 1;
  4638. else
  4639. size = XML_PARSER_BUFFER_SIZE + nbchar;
  4640. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  4641. if (buf == NULL) {
  4642. xmlErrMemory(ctxt, NULL);
  4643. ctxt->instate = state;
  4644. return;
  4645. }
  4646. len = 0;
  4647. } else if (len + nbchar + 1 >= size) {
  4648. xmlChar *new_buf;
  4649. size += len + nbchar + XML_PARSER_BUFFER_SIZE;
  4650. new_buf = (xmlChar *) xmlRealloc(buf,
  4651. size * sizeof(xmlChar));
  4652. if (new_buf == NULL) {
  4653. xmlFree (buf);
  4654. xmlErrMemory(ctxt, NULL);
  4655. ctxt->instate = state;
  4656. return;
  4657. }
  4658. buf = new_buf;
  4659. }
  4660. memcpy(&buf[len], ctxt->input->cur, nbchar);
  4661. len += nbchar;
  4662. buf[len] = 0;
  4663. }
  4664. }
  4665. if ((len > XML_MAX_TEXT_LENGTH) &&
  4666. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  4667. xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
  4668. "Comment too big found", NULL);
  4669. xmlFree (buf);
  4670. return;
  4671. }
  4672. ctxt->input->cur = in;
  4673. if (*in == 0xA) {
  4674. in++;
  4675. ctxt->input->line++; ctxt->input->col = 1;
  4676. }
  4677. if (*in == 0xD) {
  4678. in++;
  4679. if (*in == 0xA) {
  4680. ctxt->input->cur = in;
  4681. in++;
  4682. ctxt->input->line++; ctxt->input->col = 1;
  4683. goto get_more;
  4684. }
  4685. in--;
  4686. }
  4687. SHRINK;
  4688. GROW;
  4689. if (ctxt->instate == XML_PARSER_EOF) {
  4690. xmlFree(buf);
  4691. return;
  4692. }
  4693. in = ctxt->input->cur;
  4694. if (*in == '-') {
  4695. if (in[1] == '-') {
  4696. if (in[2] == '>') {
  4697. if (ctxt->input->id != inputid) {
  4698. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  4699. "comment doesn't start and stop in the"
  4700. " same entity\n");
  4701. }
  4702. SKIP(3);
  4703. if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
  4704. (!ctxt->disableSAX)) {
  4705. if (buf != NULL)
  4706. ctxt->sax->comment(ctxt->userData, buf);
  4707. else
  4708. ctxt->sax->comment(ctxt->userData, BAD_CAST "");
  4709. }
  4710. if (buf != NULL)
  4711. xmlFree(buf);
  4712. if (ctxt->instate != XML_PARSER_EOF)
  4713. ctxt->instate = state;
  4714. return;
  4715. }
  4716. if (buf != NULL) {
  4717. xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
  4718. "Double hyphen within comment: "
  4719. "<!--%.50s\n",
  4720. buf);
  4721. } else
  4722. xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
  4723. "Double hyphen within comment\n", NULL);
  4724. if (ctxt->instate == XML_PARSER_EOF) {
  4725. xmlFree(buf);
  4726. return;
  4727. }
  4728. in++;
  4729. ctxt->input->col++;
  4730. }
  4731. in++;
  4732. ctxt->input->col++;
  4733. goto get_more;
  4734. }
  4735. } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
  4736. xmlParseCommentComplex(ctxt, buf, len, size);
  4737. ctxt->instate = state;
  4738. return;
  4739. }
  4740. /**
  4741. * xmlParsePITarget:
  4742. * @ctxt: an XML parser context
  4743. *
  4744. * parse the name of a PI
  4745. *
  4746. * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
  4747. *
  4748. * Returns the PITarget name or NULL
  4749. */
  4750. const xmlChar *
  4751. xmlParsePITarget(xmlParserCtxtPtr ctxt) {
  4752. const xmlChar *name;
  4753. name = xmlParseName(ctxt);
  4754. if ((name != NULL) &&
  4755. ((name[0] == 'x') || (name[0] == 'X')) &&
  4756. ((name[1] == 'm') || (name[1] == 'M')) &&
  4757. ((name[2] == 'l') || (name[2] == 'L'))) {
  4758. int i;
  4759. if ((name[0] == 'x') && (name[1] == 'm') &&
  4760. (name[2] == 'l') && (name[3] == 0)) {
  4761. xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
  4762. "XML declaration allowed only at the start of the document\n");
  4763. return(name);
  4764. } else if (name[3] == 0) {
  4765. xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
  4766. return(name);
  4767. }
  4768. for (i = 0;;i++) {
  4769. if (xmlW3CPIs[i] == NULL) break;
  4770. if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
  4771. return(name);
  4772. }
  4773. xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
  4774. "xmlParsePITarget: invalid name prefix 'xml'\n",
  4775. NULL, NULL);
  4776. }
  4777. if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
  4778. xmlNsErr(ctxt, XML_NS_ERR_COLON,
  4779. "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
  4780. }
  4781. return(name);
  4782. }
  4783. #ifdef LIBXML_CATALOG_ENABLED
  4784. /**
  4785. * xmlParseCatalogPI:
  4786. * @ctxt: an XML parser context
  4787. * @catalog: the PI value string
  4788. *
  4789. * parse an XML Catalog Processing Instruction.
  4790. *
  4791. * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
  4792. *
  4793. * Occurs only if allowed by the user and if happening in the Misc
  4794. * part of the document before any doctype information
  4795. * This will add the given catalog to the parsing context in order
  4796. * to be used if there is a resolution need further down in the document
  4797. */
  4798. static void
  4799. xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
  4800. xmlChar *URL = NULL;
  4801. const xmlChar *tmp, *base;
  4802. xmlChar marker;
  4803. tmp = catalog;
  4804. while (IS_BLANK_CH(*tmp)) tmp++;
  4805. if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
  4806. goto error;
  4807. tmp += 7;
  4808. while (IS_BLANK_CH(*tmp)) tmp++;
  4809. if (*tmp != '=') {
  4810. return;
  4811. }
  4812. tmp++;
  4813. while (IS_BLANK_CH(*tmp)) tmp++;
  4814. marker = *tmp;
  4815. if ((marker != '\'') && (marker != '"'))
  4816. goto error;
  4817. tmp++;
  4818. base = tmp;
  4819. while ((*tmp != 0) && (*tmp != marker)) tmp++;
  4820. if (*tmp == 0)
  4821. goto error;
  4822. URL = xmlStrndup(base, tmp - base);
  4823. tmp++;
  4824. while (IS_BLANK_CH(*tmp)) tmp++;
  4825. if (*tmp != 0)
  4826. goto error;
  4827. if (URL != NULL) {
  4828. ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
  4829. xmlFree(URL);
  4830. }
  4831. return;
  4832. error:
  4833. xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
  4834. "Catalog PI syntax error: %s\n",
  4835. catalog, NULL);
  4836. if (URL != NULL)
  4837. xmlFree(URL);
  4838. }
  4839. #endif
  4840. /**
  4841. * xmlParsePI:
  4842. * @ctxt: an XML parser context
  4843. *
  4844. * parse an XML Processing Instruction.
  4845. *
  4846. * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
  4847. *
  4848. * The processing is transferred to SAX once parsed.
  4849. */
  4850. void
  4851. xmlParsePI(xmlParserCtxtPtr ctxt) {
  4852. xmlChar *buf = NULL;
  4853. size_t len = 0;
  4854. size_t size = XML_PARSER_BUFFER_SIZE;
  4855. int cur, l;
  4856. const xmlChar *target;
  4857. xmlParserInputState state;
  4858. int count = 0;
  4859. if ((RAW == '<') && (NXT(1) == '?')) {
  4860. int inputid = ctxt->input->id;
  4861. state = ctxt->instate;
  4862. ctxt->instate = XML_PARSER_PI;
  4863. /*
  4864. * this is a Processing Instruction.
  4865. */
  4866. SKIP(2);
  4867. SHRINK;
  4868. /*
  4869. * Parse the target name and check for special support like
  4870. * namespace.
  4871. */
  4872. target = xmlParsePITarget(ctxt);
  4873. if (target != NULL) {
  4874. if ((RAW == '?') && (NXT(1) == '>')) {
  4875. if (inputid != ctxt->input->id) {
  4876. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  4877. "PI declaration doesn't start and stop in"
  4878. " the same entity\n");
  4879. }
  4880. SKIP(2);
  4881. /*
  4882. * SAX: PI detected.
  4883. */
  4884. if ((ctxt->sax) && (!ctxt->disableSAX) &&
  4885. (ctxt->sax->processingInstruction != NULL))
  4886. ctxt->sax->processingInstruction(ctxt->userData,
  4887. target, NULL);
  4888. if (ctxt->instate != XML_PARSER_EOF)
  4889. ctxt->instate = state;
  4890. return;
  4891. }
  4892. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  4893. if (buf == NULL) {
  4894. xmlErrMemory(ctxt, NULL);
  4895. ctxt->instate = state;
  4896. return;
  4897. }
  4898. if (SKIP_BLANKS == 0) {
  4899. xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
  4900. "ParsePI: PI %s space expected\n", target);
  4901. }
  4902. cur = CUR_CHAR(l);
  4903. while (IS_CHAR(cur) && /* checked */
  4904. ((cur != '?') || (NXT(1) != '>'))) {
  4905. if (len + 5 >= size) {
  4906. xmlChar *tmp;
  4907. size_t new_size = size * 2;
  4908. tmp = (xmlChar *) xmlRealloc(buf, new_size);
  4909. if (tmp == NULL) {
  4910. xmlErrMemory(ctxt, NULL);
  4911. xmlFree(buf);
  4912. ctxt->instate = state;
  4913. return;
  4914. }
  4915. buf = tmp;
  4916. size = new_size;
  4917. }
  4918. count++;
  4919. if (count > 50) {
  4920. SHRINK;
  4921. GROW;
  4922. if (ctxt->instate == XML_PARSER_EOF) {
  4923. xmlFree(buf);
  4924. return;
  4925. }
  4926. count = 0;
  4927. if ((len > XML_MAX_TEXT_LENGTH) &&
  4928. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  4929. xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
  4930. "PI %s too big found", target);
  4931. xmlFree(buf);
  4932. ctxt->instate = state;
  4933. return;
  4934. }
  4935. }
  4936. COPY_BUF(l,buf,len,cur);
  4937. NEXTL(l);
  4938. cur = CUR_CHAR(l);
  4939. if (cur == 0) {
  4940. SHRINK;
  4941. GROW;
  4942. cur = CUR_CHAR(l);
  4943. }
  4944. }
  4945. if ((len > XML_MAX_TEXT_LENGTH) &&
  4946. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  4947. xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
  4948. "PI %s too big found", target);
  4949. xmlFree(buf);
  4950. ctxt->instate = state;
  4951. return;
  4952. }
  4953. buf[len] = 0;
  4954. if (cur != '?') {
  4955. xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
  4956. "ParsePI: PI %s never end ...\n", target);
  4957. } else {
  4958. if (inputid != ctxt->input->id) {
  4959. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  4960. "PI declaration doesn't start and stop in"
  4961. " the same entity\n");
  4962. }
  4963. SKIP(2);
  4964. #ifdef LIBXML_CATALOG_ENABLED
  4965. if (((state == XML_PARSER_MISC) ||
  4966. (state == XML_PARSER_START)) &&
  4967. (xmlStrEqual(target, XML_CATALOG_PI))) {
  4968. xmlCatalogAllow allow = xmlCatalogGetDefaults();
  4969. if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
  4970. (allow == XML_CATA_ALLOW_ALL))
  4971. xmlParseCatalogPI(ctxt, buf);
  4972. }
  4973. #endif
  4974. /*
  4975. * SAX: PI detected.
  4976. */
  4977. if ((ctxt->sax) && (!ctxt->disableSAX) &&
  4978. (ctxt->sax->processingInstruction != NULL))
  4979. ctxt->sax->processingInstruction(ctxt->userData,
  4980. target, buf);
  4981. }
  4982. xmlFree(buf);
  4983. } else {
  4984. xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
  4985. }
  4986. if (ctxt->instate != XML_PARSER_EOF)
  4987. ctxt->instate = state;
  4988. }
  4989. }
  4990. /**
  4991. * xmlParseNotationDecl:
  4992. * @ctxt: an XML parser context
  4993. *
  4994. * parse a notation declaration
  4995. *
  4996. * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
  4997. *
  4998. * Hence there is actually 3 choices:
  4999. * 'PUBLIC' S PubidLiteral
  5000. * 'PUBLIC' S PubidLiteral S SystemLiteral
  5001. * and 'SYSTEM' S SystemLiteral
  5002. *
  5003. * See the NOTE on xmlParseExternalID().
  5004. */
  5005. void
  5006. xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
  5007. const xmlChar *name;
  5008. xmlChar *Pubid;
  5009. xmlChar *Systemid;
  5010. if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
  5011. int inputid = ctxt->input->id;
  5012. SHRINK;
  5013. SKIP(10);
  5014. if (SKIP_BLANKS == 0) {
  5015. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5016. "Space required after '<!NOTATION'\n");
  5017. return;
  5018. }
  5019. name = xmlParseName(ctxt);
  5020. if (name == NULL) {
  5021. xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
  5022. return;
  5023. }
  5024. if (xmlStrchr(name, ':') != NULL) {
  5025. xmlNsErr(ctxt, XML_NS_ERR_COLON,
  5026. "colons are forbidden from notation names '%s'\n",
  5027. name, NULL, NULL);
  5028. }
  5029. if (SKIP_BLANKS == 0) {
  5030. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5031. "Space required after the NOTATION name'\n");
  5032. return;
  5033. }
  5034. /*
  5035. * Parse the IDs.
  5036. */
  5037. Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
  5038. SKIP_BLANKS;
  5039. if (RAW == '>') {
  5040. if (inputid != ctxt->input->id) {
  5041. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5042. "Notation declaration doesn't start and stop"
  5043. " in the same entity\n");
  5044. }
  5045. NEXT;
  5046. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  5047. (ctxt->sax->notationDecl != NULL))
  5048. ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
  5049. } else {
  5050. xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
  5051. }
  5052. if (Systemid != NULL) xmlFree(Systemid);
  5053. if (Pubid != NULL) xmlFree(Pubid);
  5054. }
  5055. }
  5056. /**
  5057. * xmlParseEntityDecl:
  5058. * @ctxt: an XML parser context
  5059. *
  5060. * parse <!ENTITY declarations
  5061. *
  5062. * [70] EntityDecl ::= GEDecl | PEDecl
  5063. *
  5064. * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
  5065. *
  5066. * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
  5067. *
  5068. * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
  5069. *
  5070. * [74] PEDef ::= EntityValue | ExternalID
  5071. *
  5072. * [76] NDataDecl ::= S 'NDATA' S Name
  5073. *
  5074. * [ VC: Notation Declared ]
  5075. * The Name must match the declared name of a notation.
  5076. */
  5077. void
  5078. xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
  5079. const xmlChar *name = NULL;
  5080. xmlChar *value = NULL;
  5081. xmlChar *URI = NULL, *literal = NULL;
  5082. const xmlChar *ndata = NULL;
  5083. int isParameter = 0;
  5084. xmlChar *orig = NULL;
  5085. /* GROW; done in the caller */
  5086. if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
  5087. int inputid = ctxt->input->id;
  5088. SHRINK;
  5089. SKIP(8);
  5090. if (SKIP_BLANKS == 0) {
  5091. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5092. "Space required after '<!ENTITY'\n");
  5093. }
  5094. if (RAW == '%') {
  5095. NEXT;
  5096. if (SKIP_BLANKS == 0) {
  5097. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5098. "Space required after '%%'\n");
  5099. }
  5100. isParameter = 1;
  5101. }
  5102. name = xmlParseName(ctxt);
  5103. if (name == NULL) {
  5104. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5105. "xmlParseEntityDecl: no name\n");
  5106. return;
  5107. }
  5108. if (xmlStrchr(name, ':') != NULL) {
  5109. xmlNsErr(ctxt, XML_NS_ERR_COLON,
  5110. "colons are forbidden from entities names '%s'\n",
  5111. name, NULL, NULL);
  5112. }
  5113. if (SKIP_BLANKS == 0) {
  5114. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5115. "Space required after the entity name\n");
  5116. }
  5117. ctxt->instate = XML_PARSER_ENTITY_DECL;
  5118. /*
  5119. * handle the various case of definitions...
  5120. */
  5121. if (isParameter) {
  5122. if ((RAW == '"') || (RAW == '\'')) {
  5123. value = xmlParseEntityValue(ctxt, &orig);
  5124. if (value) {
  5125. if ((ctxt->sax != NULL) &&
  5126. (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
  5127. ctxt->sax->entityDecl(ctxt->userData, name,
  5128. XML_INTERNAL_PARAMETER_ENTITY,
  5129. NULL, NULL, value);
  5130. }
  5131. } else {
  5132. URI = xmlParseExternalID(ctxt, &literal, 1);
  5133. if ((URI == NULL) && (literal == NULL)) {
  5134. xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
  5135. }
  5136. if (URI) {
  5137. xmlURIPtr uri;
  5138. uri = xmlParseURI((const char *) URI);
  5139. if (uri == NULL) {
  5140. xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
  5141. "Invalid URI: %s\n", URI);
  5142. /*
  5143. * This really ought to be a well formedness error
  5144. * but the XML Core WG decided otherwise c.f. issue
  5145. * E26 of the XML erratas.
  5146. */
  5147. } else {
  5148. if (uri->fragment != NULL) {
  5149. /*
  5150. * Okay this is foolish to block those but not
  5151. * invalid URIs.
  5152. */
  5153. xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
  5154. } else {
  5155. if ((ctxt->sax != NULL) &&
  5156. (!ctxt->disableSAX) &&
  5157. (ctxt->sax->entityDecl != NULL))
  5158. ctxt->sax->entityDecl(ctxt->userData, name,
  5159. XML_EXTERNAL_PARAMETER_ENTITY,
  5160. literal, URI, NULL);
  5161. }
  5162. xmlFreeURI(uri);
  5163. }
  5164. }
  5165. }
  5166. } else {
  5167. if ((RAW == '"') || (RAW == '\'')) {
  5168. value = xmlParseEntityValue(ctxt, &orig);
  5169. if ((ctxt->sax != NULL) &&
  5170. (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
  5171. ctxt->sax->entityDecl(ctxt->userData, name,
  5172. XML_INTERNAL_GENERAL_ENTITY,
  5173. NULL, NULL, value);
  5174. /*
  5175. * For expat compatibility in SAX mode.
  5176. */
  5177. if ((ctxt->myDoc == NULL) ||
  5178. (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
  5179. if (ctxt->myDoc == NULL) {
  5180. ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
  5181. if (ctxt->myDoc == NULL) {
  5182. xmlErrMemory(ctxt, "New Doc failed");
  5183. return;
  5184. }
  5185. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  5186. }
  5187. if (ctxt->myDoc->intSubset == NULL)
  5188. ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
  5189. BAD_CAST "fake", NULL, NULL);
  5190. xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
  5191. NULL, NULL, value);
  5192. }
  5193. } else {
  5194. URI = xmlParseExternalID(ctxt, &literal, 1);
  5195. if ((URI == NULL) && (literal == NULL)) {
  5196. xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
  5197. }
  5198. if (URI) {
  5199. xmlURIPtr uri;
  5200. uri = xmlParseURI((const char *)URI);
  5201. if (uri == NULL) {
  5202. xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
  5203. "Invalid URI: %s\n", URI);
  5204. /*
  5205. * This really ought to be a well formedness error
  5206. * but the XML Core WG decided otherwise c.f. issue
  5207. * E26 of the XML erratas.
  5208. */
  5209. } else {
  5210. if (uri->fragment != NULL) {
  5211. /*
  5212. * Okay this is foolish to block those but not
  5213. * invalid URIs.
  5214. */
  5215. xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
  5216. }
  5217. xmlFreeURI(uri);
  5218. }
  5219. }
  5220. if ((RAW != '>') && (SKIP_BLANKS == 0)) {
  5221. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5222. "Space required before 'NDATA'\n");
  5223. }
  5224. if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
  5225. SKIP(5);
  5226. if (SKIP_BLANKS == 0) {
  5227. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5228. "Space required after 'NDATA'\n");
  5229. }
  5230. ndata = xmlParseName(ctxt);
  5231. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  5232. (ctxt->sax->unparsedEntityDecl != NULL))
  5233. ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
  5234. literal, URI, ndata);
  5235. } else {
  5236. if ((ctxt->sax != NULL) &&
  5237. (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
  5238. ctxt->sax->entityDecl(ctxt->userData, name,
  5239. XML_EXTERNAL_GENERAL_PARSED_ENTITY,
  5240. literal, URI, NULL);
  5241. /*
  5242. * For expat compatibility in SAX mode.
  5243. * assuming the entity replacement was asked for
  5244. */
  5245. if ((ctxt->replaceEntities != 0) &&
  5246. ((ctxt->myDoc == NULL) ||
  5247. (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
  5248. if (ctxt->myDoc == NULL) {
  5249. ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
  5250. if (ctxt->myDoc == NULL) {
  5251. xmlErrMemory(ctxt, "New Doc failed");
  5252. return;
  5253. }
  5254. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  5255. }
  5256. if (ctxt->myDoc->intSubset == NULL)
  5257. ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
  5258. BAD_CAST "fake", NULL, NULL);
  5259. xmlSAX2EntityDecl(ctxt, name,
  5260. XML_EXTERNAL_GENERAL_PARSED_ENTITY,
  5261. literal, URI, NULL);
  5262. }
  5263. }
  5264. }
  5265. }
  5266. if (ctxt->instate == XML_PARSER_EOF)
  5267. goto done;
  5268. SKIP_BLANKS;
  5269. if (RAW != '>') {
  5270. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
  5271. "xmlParseEntityDecl: entity %s not terminated\n", name);
  5272. xmlHaltParser(ctxt);
  5273. } else {
  5274. if (inputid != ctxt->input->id) {
  5275. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5276. "Entity declaration doesn't start and stop in"
  5277. " the same entity\n");
  5278. }
  5279. NEXT;
  5280. }
  5281. if (orig != NULL) {
  5282. /*
  5283. * Ugly mechanism to save the raw entity value.
  5284. */
  5285. xmlEntityPtr cur = NULL;
  5286. if (isParameter) {
  5287. if ((ctxt->sax != NULL) &&
  5288. (ctxt->sax->getParameterEntity != NULL))
  5289. cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
  5290. } else {
  5291. if ((ctxt->sax != NULL) &&
  5292. (ctxt->sax->getEntity != NULL))
  5293. cur = ctxt->sax->getEntity(ctxt->userData, name);
  5294. if ((cur == NULL) && (ctxt->userData==ctxt)) {
  5295. cur = xmlSAX2GetEntity(ctxt, name);
  5296. }
  5297. }
  5298. if ((cur != NULL) && (cur->orig == NULL)) {
  5299. cur->orig = orig;
  5300. orig = NULL;
  5301. }
  5302. }
  5303. done:
  5304. if (value != NULL) xmlFree(value);
  5305. if (URI != NULL) xmlFree(URI);
  5306. if (literal != NULL) xmlFree(literal);
  5307. if (orig != NULL) xmlFree(orig);
  5308. }
  5309. }
  5310. /**
  5311. * xmlParseDefaultDecl:
  5312. * @ctxt: an XML parser context
  5313. * @value: Receive a possible fixed default value for the attribute
  5314. *
  5315. * Parse an attribute default declaration
  5316. *
  5317. * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
  5318. *
  5319. * [ VC: Required Attribute ]
  5320. * if the default declaration is the keyword #REQUIRED, then the
  5321. * attribute must be specified for all elements of the type in the
  5322. * attribute-list declaration.
  5323. *
  5324. * [ VC: Attribute Default Legal ]
  5325. * The declared default value must meet the lexical constraints of
  5326. * the declared attribute type c.f. xmlValidateAttributeDecl()
  5327. *
  5328. * [ VC: Fixed Attribute Default ]
  5329. * if an attribute has a default value declared with the #FIXED
  5330. * keyword, instances of that attribute must match the default value.
  5331. *
  5332. * [ WFC: No < in Attribute Values ]
  5333. * handled in xmlParseAttValue()
  5334. *
  5335. * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
  5336. * or XML_ATTRIBUTE_FIXED.
  5337. */
  5338. int
  5339. xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
  5340. int val;
  5341. xmlChar *ret;
  5342. *value = NULL;
  5343. if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
  5344. SKIP(9);
  5345. return(XML_ATTRIBUTE_REQUIRED);
  5346. }
  5347. if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
  5348. SKIP(8);
  5349. return(XML_ATTRIBUTE_IMPLIED);
  5350. }
  5351. val = XML_ATTRIBUTE_NONE;
  5352. if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
  5353. SKIP(6);
  5354. val = XML_ATTRIBUTE_FIXED;
  5355. if (SKIP_BLANKS == 0) {
  5356. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5357. "Space required after '#FIXED'\n");
  5358. }
  5359. }
  5360. ret = xmlParseAttValue(ctxt);
  5361. ctxt->instate = XML_PARSER_DTD;
  5362. if (ret == NULL) {
  5363. xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
  5364. "Attribute default value declaration error\n");
  5365. } else
  5366. *value = ret;
  5367. return(val);
  5368. }
  5369. /**
  5370. * xmlParseNotationType:
  5371. * @ctxt: an XML parser context
  5372. *
  5373. * parse an Notation attribute type.
  5374. *
  5375. * Note: the leading 'NOTATION' S part has already being parsed...
  5376. *
  5377. * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
  5378. *
  5379. * [ VC: Notation Attributes ]
  5380. * Values of this type must match one of the notation names included
  5381. * in the declaration; all notation names in the declaration must be declared.
  5382. *
  5383. * Returns: the notation attribute tree built while parsing
  5384. */
  5385. xmlEnumerationPtr
  5386. xmlParseNotationType(xmlParserCtxtPtr ctxt) {
  5387. const xmlChar *name;
  5388. xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
  5389. if (RAW != '(') {
  5390. xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
  5391. return(NULL);
  5392. }
  5393. SHRINK;
  5394. do {
  5395. NEXT;
  5396. SKIP_BLANKS;
  5397. name = xmlParseName(ctxt);
  5398. if (name == NULL) {
  5399. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5400. "Name expected in NOTATION declaration\n");
  5401. xmlFreeEnumeration(ret);
  5402. return(NULL);
  5403. }
  5404. tmp = ret;
  5405. while (tmp != NULL) {
  5406. if (xmlStrEqual(name, tmp->name)) {
  5407. xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
  5408. "standalone: attribute notation value token %s duplicated\n",
  5409. name, NULL);
  5410. if (!xmlDictOwns(ctxt->dict, name))
  5411. xmlFree((xmlChar *) name);
  5412. break;
  5413. }
  5414. tmp = tmp->next;
  5415. }
  5416. if (tmp == NULL) {
  5417. cur = xmlCreateEnumeration(name);
  5418. if (cur == NULL) {
  5419. xmlFreeEnumeration(ret);
  5420. return(NULL);
  5421. }
  5422. if (last == NULL) ret = last = cur;
  5423. else {
  5424. last->next = cur;
  5425. last = cur;
  5426. }
  5427. }
  5428. SKIP_BLANKS;
  5429. } while (RAW == '|');
  5430. if (RAW != ')') {
  5431. xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
  5432. xmlFreeEnumeration(ret);
  5433. return(NULL);
  5434. }
  5435. NEXT;
  5436. return(ret);
  5437. }
  5438. /**
  5439. * xmlParseEnumerationType:
  5440. * @ctxt: an XML parser context
  5441. *
  5442. * parse an Enumeration attribute type.
  5443. *
  5444. * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
  5445. *
  5446. * [ VC: Enumeration ]
  5447. * Values of this type must match one of the Nmtoken tokens in
  5448. * the declaration
  5449. *
  5450. * Returns: the enumeration attribute tree built while parsing
  5451. */
  5452. xmlEnumerationPtr
  5453. xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
  5454. xmlChar *name;
  5455. xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
  5456. if (RAW != '(') {
  5457. xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
  5458. return(NULL);
  5459. }
  5460. SHRINK;
  5461. do {
  5462. NEXT;
  5463. SKIP_BLANKS;
  5464. name = xmlParseNmtoken(ctxt);
  5465. if (name == NULL) {
  5466. xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
  5467. return(ret);
  5468. }
  5469. tmp = ret;
  5470. while (tmp != NULL) {
  5471. if (xmlStrEqual(name, tmp->name)) {
  5472. xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
  5473. "standalone: attribute enumeration value token %s duplicated\n",
  5474. name, NULL);
  5475. if (!xmlDictOwns(ctxt->dict, name))
  5476. xmlFree(name);
  5477. break;
  5478. }
  5479. tmp = tmp->next;
  5480. }
  5481. if (tmp == NULL) {
  5482. cur = xmlCreateEnumeration(name);
  5483. if (!xmlDictOwns(ctxt->dict, name))
  5484. xmlFree(name);
  5485. if (cur == NULL) {
  5486. xmlFreeEnumeration(ret);
  5487. return(NULL);
  5488. }
  5489. if (last == NULL) ret = last = cur;
  5490. else {
  5491. last->next = cur;
  5492. last = cur;
  5493. }
  5494. }
  5495. SKIP_BLANKS;
  5496. } while (RAW == '|');
  5497. if (RAW != ')') {
  5498. xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
  5499. return(ret);
  5500. }
  5501. NEXT;
  5502. return(ret);
  5503. }
  5504. /**
  5505. * xmlParseEnumeratedType:
  5506. * @ctxt: an XML parser context
  5507. * @tree: the enumeration tree built while parsing
  5508. *
  5509. * parse an Enumerated attribute type.
  5510. *
  5511. * [57] EnumeratedType ::= NotationType | Enumeration
  5512. *
  5513. * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
  5514. *
  5515. *
  5516. * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
  5517. */
  5518. int
  5519. xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
  5520. if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
  5521. SKIP(8);
  5522. if (SKIP_BLANKS == 0) {
  5523. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5524. "Space required after 'NOTATION'\n");
  5525. return(0);
  5526. }
  5527. *tree = xmlParseNotationType(ctxt);
  5528. if (*tree == NULL) return(0);
  5529. return(XML_ATTRIBUTE_NOTATION);
  5530. }
  5531. *tree = xmlParseEnumerationType(ctxt);
  5532. if (*tree == NULL) return(0);
  5533. return(XML_ATTRIBUTE_ENUMERATION);
  5534. }
  5535. /**
  5536. * xmlParseAttributeType:
  5537. * @ctxt: an XML parser context
  5538. * @tree: the enumeration tree built while parsing
  5539. *
  5540. * parse the Attribute list def for an element
  5541. *
  5542. * [54] AttType ::= StringType | TokenizedType | EnumeratedType
  5543. *
  5544. * [55] StringType ::= 'CDATA'
  5545. *
  5546. * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
  5547. * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
  5548. *
  5549. * Validity constraints for attribute values syntax are checked in
  5550. * xmlValidateAttributeValue()
  5551. *
  5552. * [ VC: ID ]
  5553. * Values of type ID must match the Name production. A name must not
  5554. * appear more than once in an XML document as a value of this type;
  5555. * i.e., ID values must uniquely identify the elements which bear them.
  5556. *
  5557. * [ VC: One ID per Element Type ]
  5558. * No element type may have more than one ID attribute specified.
  5559. *
  5560. * [ VC: ID Attribute Default ]
  5561. * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
  5562. *
  5563. * [ VC: IDREF ]
  5564. * Values of type IDREF must match the Name production, and values
  5565. * of type IDREFS must match Names; each IDREF Name must match the value
  5566. * of an ID attribute on some element in the XML document; i.e. IDREF
  5567. * values must match the value of some ID attribute.
  5568. *
  5569. * [ VC: Entity Name ]
  5570. * Values of type ENTITY must match the Name production, values
  5571. * of type ENTITIES must match Names; each Entity Name must match the
  5572. * name of an unparsed entity declared in the DTD.
  5573. *
  5574. * [ VC: Name Token ]
  5575. * Values of type NMTOKEN must match the Nmtoken production; values
  5576. * of type NMTOKENS must match Nmtokens.
  5577. *
  5578. * Returns the attribute type
  5579. */
  5580. int
  5581. xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
  5582. SHRINK;
  5583. if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
  5584. SKIP(5);
  5585. return(XML_ATTRIBUTE_CDATA);
  5586. } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
  5587. SKIP(6);
  5588. return(XML_ATTRIBUTE_IDREFS);
  5589. } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
  5590. SKIP(5);
  5591. return(XML_ATTRIBUTE_IDREF);
  5592. } else if ((RAW == 'I') && (NXT(1) == 'D')) {
  5593. SKIP(2);
  5594. return(XML_ATTRIBUTE_ID);
  5595. } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
  5596. SKIP(6);
  5597. return(XML_ATTRIBUTE_ENTITY);
  5598. } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
  5599. SKIP(8);
  5600. return(XML_ATTRIBUTE_ENTITIES);
  5601. } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
  5602. SKIP(8);
  5603. return(XML_ATTRIBUTE_NMTOKENS);
  5604. } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
  5605. SKIP(7);
  5606. return(XML_ATTRIBUTE_NMTOKEN);
  5607. }
  5608. return(xmlParseEnumeratedType(ctxt, tree));
  5609. }
  5610. /**
  5611. * xmlParseAttributeListDecl:
  5612. * @ctxt: an XML parser context
  5613. *
  5614. * : parse the Attribute list def for an element
  5615. *
  5616. * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  5617. *
  5618. * [53] AttDef ::= S Name S AttType S DefaultDecl
  5619. *
  5620. */
  5621. void
  5622. xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
  5623. const xmlChar *elemName;
  5624. const xmlChar *attrName;
  5625. xmlEnumerationPtr tree;
  5626. if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
  5627. int inputid = ctxt->input->id;
  5628. SKIP(9);
  5629. if (SKIP_BLANKS == 0) {
  5630. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5631. "Space required after '<!ATTLIST'\n");
  5632. }
  5633. elemName = xmlParseName(ctxt);
  5634. if (elemName == NULL) {
  5635. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5636. "ATTLIST: no name for Element\n");
  5637. return;
  5638. }
  5639. SKIP_BLANKS;
  5640. GROW;
  5641. while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
  5642. int type;
  5643. int def;
  5644. xmlChar *defaultValue = NULL;
  5645. GROW;
  5646. tree = NULL;
  5647. attrName = xmlParseName(ctxt);
  5648. if (attrName == NULL) {
  5649. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5650. "ATTLIST: no name for Attribute\n");
  5651. break;
  5652. }
  5653. GROW;
  5654. if (SKIP_BLANKS == 0) {
  5655. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5656. "Space required after the attribute name\n");
  5657. break;
  5658. }
  5659. type = xmlParseAttributeType(ctxt, &tree);
  5660. if (type <= 0) {
  5661. break;
  5662. }
  5663. GROW;
  5664. if (SKIP_BLANKS == 0) {
  5665. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5666. "Space required after the attribute type\n");
  5667. if (tree != NULL)
  5668. xmlFreeEnumeration(tree);
  5669. break;
  5670. }
  5671. def = xmlParseDefaultDecl(ctxt, &defaultValue);
  5672. if (def <= 0) {
  5673. if (defaultValue != NULL)
  5674. xmlFree(defaultValue);
  5675. if (tree != NULL)
  5676. xmlFreeEnumeration(tree);
  5677. break;
  5678. }
  5679. if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
  5680. xmlAttrNormalizeSpace(defaultValue, defaultValue);
  5681. GROW;
  5682. if (RAW != '>') {
  5683. if (SKIP_BLANKS == 0) {
  5684. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5685. "Space required after the attribute default value\n");
  5686. if (defaultValue != NULL)
  5687. xmlFree(defaultValue);
  5688. if (tree != NULL)
  5689. xmlFreeEnumeration(tree);
  5690. break;
  5691. }
  5692. }
  5693. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  5694. (ctxt->sax->attributeDecl != NULL))
  5695. ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
  5696. type, def, defaultValue, tree);
  5697. else if (tree != NULL)
  5698. xmlFreeEnumeration(tree);
  5699. if ((ctxt->sax2) && (defaultValue != NULL) &&
  5700. (def != XML_ATTRIBUTE_IMPLIED) &&
  5701. (def != XML_ATTRIBUTE_REQUIRED)) {
  5702. xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
  5703. }
  5704. if (ctxt->sax2) {
  5705. xmlAddSpecialAttr(ctxt, elemName, attrName, type);
  5706. }
  5707. if (defaultValue != NULL)
  5708. xmlFree(defaultValue);
  5709. GROW;
  5710. }
  5711. if (RAW == '>') {
  5712. if (inputid != ctxt->input->id) {
  5713. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5714. "Attribute list declaration doesn't start and"
  5715. " stop in the same entity\n");
  5716. }
  5717. NEXT;
  5718. }
  5719. }
  5720. }
  5721. /**
  5722. * xmlParseElementMixedContentDecl:
  5723. * @ctxt: an XML parser context
  5724. * @inputchk: the input used for the current entity, needed for boundary checks
  5725. *
  5726. * parse the declaration for a Mixed Element content
  5727. * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
  5728. *
  5729. * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
  5730. * '(' S? '#PCDATA' S? ')'
  5731. *
  5732. * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
  5733. *
  5734. * [ VC: No Duplicate Types ]
  5735. * The same name must not appear more than once in a single
  5736. * mixed-content declaration.
  5737. *
  5738. * returns: the list of the xmlElementContentPtr describing the element choices
  5739. */
  5740. xmlElementContentPtr
  5741. xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
  5742. xmlElementContentPtr ret = NULL, cur = NULL, n;
  5743. const xmlChar *elem = NULL;
  5744. GROW;
  5745. if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
  5746. SKIP(7);
  5747. SKIP_BLANKS;
  5748. SHRINK;
  5749. if (RAW == ')') {
  5750. if (ctxt->input->id != inputchk) {
  5751. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5752. "Element content declaration doesn't start and"
  5753. " stop in the same entity\n");
  5754. }
  5755. NEXT;
  5756. ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
  5757. if (ret == NULL)
  5758. return(NULL);
  5759. if (RAW == '*') {
  5760. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  5761. NEXT;
  5762. }
  5763. return(ret);
  5764. }
  5765. if ((RAW == '(') || (RAW == '|')) {
  5766. ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
  5767. if (ret == NULL) return(NULL);
  5768. }
  5769. while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
  5770. NEXT;
  5771. if (elem == NULL) {
  5772. ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
  5773. if (ret == NULL) {
  5774. xmlFreeDocElementContent(ctxt->myDoc, cur);
  5775. return(NULL);
  5776. }
  5777. ret->c1 = cur;
  5778. if (cur != NULL)
  5779. cur->parent = ret;
  5780. cur = ret;
  5781. } else {
  5782. n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
  5783. if (n == NULL) {
  5784. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5785. return(NULL);
  5786. }
  5787. n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
  5788. if (n->c1 != NULL)
  5789. n->c1->parent = n;
  5790. cur->c2 = n;
  5791. if (n != NULL)
  5792. n->parent = cur;
  5793. cur = n;
  5794. }
  5795. SKIP_BLANKS;
  5796. elem = xmlParseName(ctxt);
  5797. if (elem == NULL) {
  5798. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5799. "xmlParseElementMixedContentDecl : Name expected\n");
  5800. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5801. return(NULL);
  5802. }
  5803. SKIP_BLANKS;
  5804. GROW;
  5805. }
  5806. if ((RAW == ')') && (NXT(1) == '*')) {
  5807. if (elem != NULL) {
  5808. cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
  5809. XML_ELEMENT_CONTENT_ELEMENT);
  5810. if (cur->c2 != NULL)
  5811. cur->c2->parent = cur;
  5812. }
  5813. if (ret != NULL)
  5814. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  5815. if (ctxt->input->id != inputchk) {
  5816. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5817. "Element content declaration doesn't start and"
  5818. " stop in the same entity\n");
  5819. }
  5820. SKIP(2);
  5821. } else {
  5822. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5823. xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
  5824. return(NULL);
  5825. }
  5826. } else {
  5827. xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
  5828. }
  5829. return(ret);
  5830. }
  5831. /**
  5832. * xmlParseElementChildrenContentDeclPriv:
  5833. * @ctxt: an XML parser context
  5834. * @inputchk: the input used for the current entity, needed for boundary checks
  5835. * @depth: the level of recursion
  5836. *
  5837. * parse the declaration for a Mixed Element content
  5838. * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
  5839. *
  5840. *
  5841. * [47] children ::= (choice | seq) ('?' | '*' | '+')?
  5842. *
  5843. * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  5844. *
  5845. * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
  5846. *
  5847. * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  5848. *
  5849. * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
  5850. * TODO Parameter-entity replacement text must be properly nested
  5851. * with parenthesized groups. That is to say, if either of the
  5852. * opening or closing parentheses in a choice, seq, or Mixed
  5853. * construct is contained in the replacement text for a parameter
  5854. * entity, both must be contained in the same replacement text. For
  5855. * interoperability, if a parameter-entity reference appears in a
  5856. * choice, seq, or Mixed construct, its replacement text should not
  5857. * be empty, and neither the first nor last non-blank character of
  5858. * the replacement text should be a connector (| or ,).
  5859. *
  5860. * Returns the tree of xmlElementContentPtr describing the element
  5861. * hierarchy.
  5862. */
  5863. static xmlElementContentPtr
  5864. xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
  5865. int depth) {
  5866. xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
  5867. const xmlChar *elem;
  5868. xmlChar type = 0;
  5869. if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
  5870. (depth > 2048)) {
  5871. xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
  5872. "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
  5873. depth);
  5874. return(NULL);
  5875. }
  5876. SKIP_BLANKS;
  5877. GROW;
  5878. if (RAW == '(') {
  5879. int inputid = ctxt->input->id;
  5880. /* Recurse on first child */
  5881. NEXT;
  5882. SKIP_BLANKS;
  5883. cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
  5884. depth + 1);
  5885. if (cur == NULL)
  5886. return(NULL);
  5887. SKIP_BLANKS;
  5888. GROW;
  5889. } else {
  5890. elem = xmlParseName(ctxt);
  5891. if (elem == NULL) {
  5892. xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
  5893. return(NULL);
  5894. }
  5895. cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
  5896. if (cur == NULL) {
  5897. xmlErrMemory(ctxt, NULL);
  5898. return(NULL);
  5899. }
  5900. GROW;
  5901. if (RAW == '?') {
  5902. cur->ocur = XML_ELEMENT_CONTENT_OPT;
  5903. NEXT;
  5904. } else if (RAW == '*') {
  5905. cur->ocur = XML_ELEMENT_CONTENT_MULT;
  5906. NEXT;
  5907. } else if (RAW == '+') {
  5908. cur->ocur = XML_ELEMENT_CONTENT_PLUS;
  5909. NEXT;
  5910. } else {
  5911. cur->ocur = XML_ELEMENT_CONTENT_ONCE;
  5912. }
  5913. GROW;
  5914. }
  5915. SKIP_BLANKS;
  5916. SHRINK;
  5917. while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
  5918. /*
  5919. * Each loop we parse one separator and one element.
  5920. */
  5921. if (RAW == ',') {
  5922. if (type == 0) type = CUR;
  5923. /*
  5924. * Detect "Name | Name , Name" error
  5925. */
  5926. else if (type != CUR) {
  5927. xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
  5928. "xmlParseElementChildrenContentDecl : '%c' expected\n",
  5929. type);
  5930. if ((last != NULL) && (last != ret))
  5931. xmlFreeDocElementContent(ctxt->myDoc, last);
  5932. if (ret != NULL)
  5933. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5934. return(NULL);
  5935. }
  5936. NEXT;
  5937. op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
  5938. if (op == NULL) {
  5939. if ((last != NULL) && (last != ret))
  5940. xmlFreeDocElementContent(ctxt->myDoc, last);
  5941. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5942. return(NULL);
  5943. }
  5944. if (last == NULL) {
  5945. op->c1 = ret;
  5946. if (ret != NULL)
  5947. ret->parent = op;
  5948. ret = cur = op;
  5949. } else {
  5950. cur->c2 = op;
  5951. if (op != NULL)
  5952. op->parent = cur;
  5953. op->c1 = last;
  5954. if (last != NULL)
  5955. last->parent = op;
  5956. cur =op;
  5957. last = NULL;
  5958. }
  5959. } else if (RAW == '|') {
  5960. if (type == 0) type = CUR;
  5961. /*
  5962. * Detect "Name , Name | Name" error
  5963. */
  5964. else if (type != CUR) {
  5965. xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
  5966. "xmlParseElementChildrenContentDecl : '%c' expected\n",
  5967. type);
  5968. if ((last != NULL) && (last != ret))
  5969. xmlFreeDocElementContent(ctxt->myDoc, last);
  5970. if (ret != NULL)
  5971. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5972. return(NULL);
  5973. }
  5974. NEXT;
  5975. op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
  5976. if (op == NULL) {
  5977. if ((last != NULL) && (last != ret))
  5978. xmlFreeDocElementContent(ctxt->myDoc, last);
  5979. if (ret != NULL)
  5980. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5981. return(NULL);
  5982. }
  5983. if (last == NULL) {
  5984. op->c1 = ret;
  5985. if (ret != NULL)
  5986. ret->parent = op;
  5987. ret = cur = op;
  5988. } else {
  5989. cur->c2 = op;
  5990. if (op != NULL)
  5991. op->parent = cur;
  5992. op->c1 = last;
  5993. if (last != NULL)
  5994. last->parent = op;
  5995. cur =op;
  5996. last = NULL;
  5997. }
  5998. } else {
  5999. xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
  6000. if ((last != NULL) && (last != ret))
  6001. xmlFreeDocElementContent(ctxt->myDoc, last);
  6002. if (ret != NULL)
  6003. xmlFreeDocElementContent(ctxt->myDoc, ret);
  6004. return(NULL);
  6005. }
  6006. GROW;
  6007. SKIP_BLANKS;
  6008. GROW;
  6009. if (RAW == '(') {
  6010. int inputid = ctxt->input->id;
  6011. /* Recurse on second child */
  6012. NEXT;
  6013. SKIP_BLANKS;
  6014. last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
  6015. depth + 1);
  6016. if (last == NULL) {
  6017. if (ret != NULL)
  6018. xmlFreeDocElementContent(ctxt->myDoc, ret);
  6019. return(NULL);
  6020. }
  6021. SKIP_BLANKS;
  6022. } else {
  6023. elem = xmlParseName(ctxt);
  6024. if (elem == NULL) {
  6025. xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
  6026. if (ret != NULL)
  6027. xmlFreeDocElementContent(ctxt->myDoc, ret);
  6028. return(NULL);
  6029. }
  6030. last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
  6031. if (last == NULL) {
  6032. if (ret != NULL)
  6033. xmlFreeDocElementContent(ctxt->myDoc, ret);
  6034. return(NULL);
  6035. }
  6036. if (RAW == '?') {
  6037. last->ocur = XML_ELEMENT_CONTENT_OPT;
  6038. NEXT;
  6039. } else if (RAW == '*') {
  6040. last->ocur = XML_ELEMENT_CONTENT_MULT;
  6041. NEXT;
  6042. } else if (RAW == '+') {
  6043. last->ocur = XML_ELEMENT_CONTENT_PLUS;
  6044. NEXT;
  6045. } else {
  6046. last->ocur = XML_ELEMENT_CONTENT_ONCE;
  6047. }
  6048. }
  6049. SKIP_BLANKS;
  6050. GROW;
  6051. }
  6052. if ((cur != NULL) && (last != NULL)) {
  6053. cur->c2 = last;
  6054. if (last != NULL)
  6055. last->parent = cur;
  6056. }
  6057. if (ctxt->input->id != inputchk) {
  6058. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6059. "Element content declaration doesn't start and stop in"
  6060. " the same entity\n");
  6061. }
  6062. NEXT;
  6063. if (RAW == '?') {
  6064. if (ret != NULL) {
  6065. if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
  6066. (ret->ocur == XML_ELEMENT_CONTENT_MULT))
  6067. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  6068. else
  6069. ret->ocur = XML_ELEMENT_CONTENT_OPT;
  6070. }
  6071. NEXT;
  6072. } else if (RAW == '*') {
  6073. if (ret != NULL) {
  6074. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  6075. cur = ret;
  6076. /*
  6077. * Some normalization:
  6078. * (a | b* | c?)* == (a | b | c)*
  6079. */
  6080. while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
  6081. if ((cur->c1 != NULL) &&
  6082. ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
  6083. (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
  6084. cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
  6085. if ((cur->c2 != NULL) &&
  6086. ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
  6087. (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
  6088. cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
  6089. cur = cur->c2;
  6090. }
  6091. }
  6092. NEXT;
  6093. } else if (RAW == '+') {
  6094. if (ret != NULL) {
  6095. int found = 0;
  6096. if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
  6097. (ret->ocur == XML_ELEMENT_CONTENT_MULT))
  6098. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  6099. else
  6100. ret->ocur = XML_ELEMENT_CONTENT_PLUS;
  6101. /*
  6102. * Some normalization:
  6103. * (a | b*)+ == (a | b)*
  6104. * (a | b?)+ == (a | b)*
  6105. */
  6106. while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
  6107. if ((cur->c1 != NULL) &&
  6108. ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
  6109. (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
  6110. cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
  6111. found = 1;
  6112. }
  6113. if ((cur->c2 != NULL) &&
  6114. ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
  6115. (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
  6116. cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
  6117. found = 1;
  6118. }
  6119. cur = cur->c2;
  6120. }
  6121. if (found)
  6122. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  6123. }
  6124. NEXT;
  6125. }
  6126. return(ret);
  6127. }
  6128. /**
  6129. * xmlParseElementChildrenContentDecl:
  6130. * @ctxt: an XML parser context
  6131. * @inputchk: the input used for the current entity, needed for boundary checks
  6132. *
  6133. * parse the declaration for a Mixed Element content
  6134. * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
  6135. *
  6136. * [47] children ::= (choice | seq) ('?' | '*' | '+')?
  6137. *
  6138. * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  6139. *
  6140. * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
  6141. *
  6142. * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  6143. *
  6144. * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
  6145. * TODO Parameter-entity replacement text must be properly nested
  6146. * with parenthesized groups. That is to say, if either of the
  6147. * opening or closing parentheses in a choice, seq, or Mixed
  6148. * construct is contained in the replacement text for a parameter
  6149. * entity, both must be contained in the same replacement text. For
  6150. * interoperability, if a parameter-entity reference appears in a
  6151. * choice, seq, or Mixed construct, its replacement text should not
  6152. * be empty, and neither the first nor last non-blank character of
  6153. * the replacement text should be a connector (| or ,).
  6154. *
  6155. * Returns the tree of xmlElementContentPtr describing the element
  6156. * hierarchy.
  6157. */
  6158. xmlElementContentPtr
  6159. xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
  6160. /* stub left for API/ABI compat */
  6161. return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
  6162. }
  6163. /**
  6164. * xmlParseElementContentDecl:
  6165. * @ctxt: an XML parser context
  6166. * @name: the name of the element being defined.
  6167. * @result: the Element Content pointer will be stored here if any
  6168. *
  6169. * parse the declaration for an Element content either Mixed or Children,
  6170. * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
  6171. *
  6172. * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
  6173. *
  6174. * returns: the type of element content XML_ELEMENT_TYPE_xxx
  6175. */
  6176. int
  6177. xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
  6178. xmlElementContentPtr *result) {
  6179. xmlElementContentPtr tree = NULL;
  6180. int inputid = ctxt->input->id;
  6181. int res;
  6182. *result = NULL;
  6183. if (RAW != '(') {
  6184. xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
  6185. "xmlParseElementContentDecl : %s '(' expected\n", name);
  6186. return(-1);
  6187. }
  6188. NEXT;
  6189. GROW;
  6190. if (ctxt->instate == XML_PARSER_EOF)
  6191. return(-1);
  6192. SKIP_BLANKS;
  6193. if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
  6194. tree = xmlParseElementMixedContentDecl(ctxt, inputid);
  6195. res = XML_ELEMENT_TYPE_MIXED;
  6196. } else {
  6197. tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
  6198. res = XML_ELEMENT_TYPE_ELEMENT;
  6199. }
  6200. SKIP_BLANKS;
  6201. *result = tree;
  6202. return(res);
  6203. }
  6204. /**
  6205. * xmlParseElementDecl:
  6206. * @ctxt: an XML parser context
  6207. *
  6208. * parse an Element declaration.
  6209. *
  6210. * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  6211. *
  6212. * [ VC: Unique Element Type Declaration ]
  6213. * No element type may be declared more than once
  6214. *
  6215. * Returns the type of the element, or -1 in case of error
  6216. */
  6217. int
  6218. xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
  6219. const xmlChar *name;
  6220. int ret = -1;
  6221. xmlElementContentPtr content = NULL;
  6222. /* GROW; done in the caller */
  6223. if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
  6224. int inputid = ctxt->input->id;
  6225. SKIP(9);
  6226. if (SKIP_BLANKS == 0) {
  6227. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  6228. "Space required after 'ELEMENT'\n");
  6229. return(-1);
  6230. }
  6231. name = xmlParseName(ctxt);
  6232. if (name == NULL) {
  6233. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  6234. "xmlParseElementDecl: no name for Element\n");
  6235. return(-1);
  6236. }
  6237. if (SKIP_BLANKS == 0) {
  6238. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  6239. "Space required after the element name\n");
  6240. }
  6241. if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
  6242. SKIP(5);
  6243. /*
  6244. * Element must always be empty.
  6245. */
  6246. ret = XML_ELEMENT_TYPE_EMPTY;
  6247. } else if ((RAW == 'A') && (NXT(1) == 'N') &&
  6248. (NXT(2) == 'Y')) {
  6249. SKIP(3);
  6250. /*
  6251. * Element is a generic container.
  6252. */
  6253. ret = XML_ELEMENT_TYPE_ANY;
  6254. } else if (RAW == '(') {
  6255. ret = xmlParseElementContentDecl(ctxt, name, &content);
  6256. } else {
  6257. /*
  6258. * [ WFC: PEs in Internal Subset ] error handling.
  6259. */
  6260. if ((RAW == '%') && (ctxt->external == 0) &&
  6261. (ctxt->inputNr == 1)) {
  6262. xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
  6263. "PEReference: forbidden within markup decl in internal subset\n");
  6264. } else {
  6265. xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
  6266. "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
  6267. }
  6268. return(-1);
  6269. }
  6270. SKIP_BLANKS;
  6271. if (RAW != '>') {
  6272. xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
  6273. if (content != NULL) {
  6274. xmlFreeDocElementContent(ctxt->myDoc, content);
  6275. }
  6276. } else {
  6277. if (inputid != ctxt->input->id) {
  6278. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6279. "Element declaration doesn't start and stop in"
  6280. " the same entity\n");
  6281. }
  6282. NEXT;
  6283. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  6284. (ctxt->sax->elementDecl != NULL)) {
  6285. if (content != NULL)
  6286. content->parent = NULL;
  6287. ctxt->sax->elementDecl(ctxt->userData, name, ret,
  6288. content);
  6289. if ((content != NULL) && (content->parent == NULL)) {
  6290. /*
  6291. * this is a trick: if xmlAddElementDecl is called,
  6292. * instead of copying the full tree it is plugged directly
  6293. * if called from the parser. Avoid duplicating the
  6294. * interfaces or change the API/ABI
  6295. */
  6296. xmlFreeDocElementContent(ctxt->myDoc, content);
  6297. }
  6298. } else if (content != NULL) {
  6299. xmlFreeDocElementContent(ctxt->myDoc, content);
  6300. }
  6301. }
  6302. }
  6303. return(ret);
  6304. }
  6305. /**
  6306. * xmlParseConditionalSections
  6307. * @ctxt: an XML parser context
  6308. *
  6309. * [61] conditionalSect ::= includeSect | ignoreSect
  6310. * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
  6311. * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
  6312. * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
  6313. * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
  6314. */
  6315. static void
  6316. xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
  6317. int *inputIds = NULL;
  6318. size_t inputIdsSize = 0;
  6319. size_t depth = 0;
  6320. while (ctxt->instate != XML_PARSER_EOF) {
  6321. if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  6322. int id = ctxt->input->id;
  6323. SKIP(3);
  6324. SKIP_BLANKS;
  6325. if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
  6326. SKIP(7);
  6327. SKIP_BLANKS;
  6328. if (RAW != '[') {
  6329. xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
  6330. xmlHaltParser(ctxt);
  6331. goto error;
  6332. }
  6333. if (ctxt->input->id != id) {
  6334. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6335. "All markup of the conditional section is"
  6336. " not in the same entity\n");
  6337. }
  6338. NEXT;
  6339. if (inputIdsSize <= depth) {
  6340. int *tmp;
  6341. inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
  6342. tmp = (int *) xmlRealloc(inputIds,
  6343. inputIdsSize * sizeof(int));
  6344. if (tmp == NULL) {
  6345. xmlErrMemory(ctxt, NULL);
  6346. goto error;
  6347. }
  6348. inputIds = tmp;
  6349. }
  6350. inputIds[depth] = id;
  6351. depth++;
  6352. } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
  6353. int state;
  6354. xmlParserInputState instate;
  6355. size_t ignoreDepth = 0;
  6356. SKIP(6);
  6357. SKIP_BLANKS;
  6358. if (RAW != '[') {
  6359. xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
  6360. xmlHaltParser(ctxt);
  6361. goto error;
  6362. }
  6363. if (ctxt->input->id != id) {
  6364. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6365. "All markup of the conditional section is"
  6366. " not in the same entity\n");
  6367. }
  6368. NEXT;
  6369. /*
  6370. * Parse up to the end of the conditional section but disable
  6371. * SAX event generating DTD building in the meantime
  6372. */
  6373. state = ctxt->disableSAX;
  6374. instate = ctxt->instate;
  6375. if (ctxt->recovery == 0) ctxt->disableSAX = 1;
  6376. ctxt->instate = XML_PARSER_IGNORE;
  6377. while (RAW != 0) {
  6378. if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  6379. SKIP(3);
  6380. ignoreDepth++;
  6381. /* Check for integer overflow */
  6382. if (ignoreDepth == 0) {
  6383. xmlErrMemory(ctxt, NULL);
  6384. goto error;
  6385. }
  6386. } else if ((RAW == ']') && (NXT(1) == ']') &&
  6387. (NXT(2) == '>')) {
  6388. if (ignoreDepth == 0)
  6389. break;
  6390. SKIP(3);
  6391. ignoreDepth--;
  6392. } else {
  6393. NEXT;
  6394. }
  6395. }
  6396. ctxt->disableSAX = state;
  6397. ctxt->instate = instate;
  6398. if (RAW == 0) {
  6399. xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
  6400. goto error;
  6401. }
  6402. if (ctxt->input->id != id) {
  6403. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6404. "All markup of the conditional section is"
  6405. " not in the same entity\n");
  6406. }
  6407. SKIP(3);
  6408. } else {
  6409. xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
  6410. xmlHaltParser(ctxt);
  6411. goto error;
  6412. }
  6413. } else if ((depth > 0) &&
  6414. (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
  6415. depth--;
  6416. if (ctxt->input->id != inputIds[depth]) {
  6417. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6418. "All markup of the conditional section is not"
  6419. " in the same entity\n");
  6420. }
  6421. SKIP(3);
  6422. } else {
  6423. const xmlChar *check = CUR_PTR;
  6424. unsigned int cons = ctxt->input->consumed;
  6425. xmlParseMarkupDecl(ctxt);
  6426. if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
  6427. xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
  6428. xmlHaltParser(ctxt);
  6429. goto error;
  6430. }
  6431. }
  6432. if (depth == 0)
  6433. break;
  6434. SKIP_BLANKS;
  6435. GROW;
  6436. }
  6437. error:
  6438. xmlFree(inputIds);
  6439. }
  6440. /**
  6441. * xmlParseMarkupDecl:
  6442. * @ctxt: an XML parser context
  6443. *
  6444. * parse Markup declarations
  6445. *
  6446. * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
  6447. * NotationDecl | PI | Comment
  6448. *
  6449. * [ VC: Proper Declaration/PE Nesting ]
  6450. * Parameter-entity replacement text must be properly nested with
  6451. * markup declarations. That is to say, if either the first character
  6452. * or the last character of a markup declaration (markupdecl above) is
  6453. * contained in the replacement text for a parameter-entity reference,
  6454. * both must be contained in the same replacement text.
  6455. *
  6456. * [ WFC: PEs in Internal Subset ]
  6457. * In the internal DTD subset, parameter-entity references can occur
  6458. * only where markup declarations can occur, not within markup declarations.
  6459. * (This does not apply to references that occur in external parameter
  6460. * entities or to the external subset.)
  6461. */
  6462. void
  6463. xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
  6464. GROW;
  6465. if (CUR == '<') {
  6466. if (NXT(1) == '!') {
  6467. switch (NXT(2)) {
  6468. case 'E':
  6469. if (NXT(3) == 'L')
  6470. xmlParseElementDecl(ctxt);
  6471. else if (NXT(3) == 'N')
  6472. xmlParseEntityDecl(ctxt);
  6473. break;
  6474. case 'A':
  6475. xmlParseAttributeListDecl(ctxt);
  6476. break;
  6477. case 'N':
  6478. xmlParseNotationDecl(ctxt);
  6479. break;
  6480. case '-':
  6481. xmlParseComment(ctxt);
  6482. break;
  6483. default:
  6484. /* there is an error but it will be detected later */
  6485. break;
  6486. }
  6487. } else if (NXT(1) == '?') {
  6488. xmlParsePI(ctxt);
  6489. }
  6490. }
  6491. /*
  6492. * detect requirement to exit there and act accordingly
  6493. * and avoid having instate overridden later on
  6494. */
  6495. if (ctxt->instate == XML_PARSER_EOF)
  6496. return;
  6497. ctxt->instate = XML_PARSER_DTD;
  6498. }
  6499. /**
  6500. * xmlParseTextDecl:
  6501. * @ctxt: an XML parser context
  6502. *
  6503. * parse an XML declaration header for external entities
  6504. *
  6505. * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  6506. */
  6507. void
  6508. xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
  6509. xmlChar *version;
  6510. const xmlChar *encoding;
  6511. int oldstate;
  6512. /*
  6513. * We know that '<?xml' is here.
  6514. */
  6515. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  6516. SKIP(5);
  6517. } else {
  6518. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
  6519. return;
  6520. }
  6521. /* Avoid expansion of parameter entities when skipping blanks. */
  6522. oldstate = ctxt->instate;
  6523. ctxt->instate = XML_PARSER_START;
  6524. if (SKIP_BLANKS == 0) {
  6525. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  6526. "Space needed after '<?xml'\n");
  6527. }
  6528. /*
  6529. * We may have the VersionInfo here.
  6530. */
  6531. version = xmlParseVersionInfo(ctxt);
  6532. if (version == NULL)
  6533. version = xmlCharStrdup(XML_DEFAULT_VERSION);
  6534. else {
  6535. if (SKIP_BLANKS == 0) {
  6536. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  6537. "Space needed here\n");
  6538. }
  6539. }
  6540. ctxt->input->version = version;
  6541. /*
  6542. * We must have the encoding declaration
  6543. */
  6544. encoding = xmlParseEncodingDecl(ctxt);
  6545. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  6546. /*
  6547. * The XML REC instructs us to stop parsing right here
  6548. */
  6549. ctxt->instate = oldstate;
  6550. return;
  6551. }
  6552. if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
  6553. xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
  6554. "Missing encoding in text declaration\n");
  6555. }
  6556. SKIP_BLANKS;
  6557. if ((RAW == '?') && (NXT(1) == '>')) {
  6558. SKIP(2);
  6559. } else if (RAW == '>') {
  6560. /* Deprecated old WD ... */
  6561. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
  6562. NEXT;
  6563. } else {
  6564. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
  6565. MOVETO_ENDTAG(CUR_PTR);
  6566. NEXT;
  6567. }
  6568. ctxt->instate = oldstate;
  6569. }
  6570. /**
  6571. * xmlParseExternalSubset:
  6572. * @ctxt: an XML parser context
  6573. * @ExternalID: the external identifier
  6574. * @SystemID: the system identifier (or URL)
  6575. *
  6576. * parse Markup declarations from an external subset
  6577. *
  6578. * [30] extSubset ::= textDecl? extSubsetDecl
  6579. *
  6580. * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
  6581. */
  6582. void
  6583. xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
  6584. const xmlChar *SystemID) {
  6585. xmlDetectSAX2(ctxt);
  6586. GROW;
  6587. if ((ctxt->encoding == NULL) &&
  6588. (ctxt->input->end - ctxt->input->cur >= 4)) {
  6589. xmlChar start[4];
  6590. xmlCharEncoding enc;
  6591. start[0] = RAW;
  6592. start[1] = NXT(1);
  6593. start[2] = NXT(2);
  6594. start[3] = NXT(3);
  6595. enc = xmlDetectCharEncoding(start, 4);
  6596. if (enc != XML_CHAR_ENCODING_NONE)
  6597. xmlSwitchEncoding(ctxt, enc);
  6598. }
  6599. if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
  6600. xmlParseTextDecl(ctxt);
  6601. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  6602. /*
  6603. * The XML REC instructs us to stop parsing right here
  6604. */
  6605. xmlHaltParser(ctxt);
  6606. return;
  6607. }
  6608. }
  6609. if (ctxt->myDoc == NULL) {
  6610. ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
  6611. if (ctxt->myDoc == NULL) {
  6612. xmlErrMemory(ctxt, "New Doc failed");
  6613. return;
  6614. }
  6615. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  6616. }
  6617. if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
  6618. xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
  6619. ctxt->instate = XML_PARSER_DTD;
  6620. ctxt->external = 1;
  6621. SKIP_BLANKS;
  6622. while (((RAW == '<') && (NXT(1) == '?')) ||
  6623. ((RAW == '<') && (NXT(1) == '!')) ||
  6624. (RAW == '%')) {
  6625. const xmlChar *check = CUR_PTR;
  6626. unsigned int cons = ctxt->input->consumed;
  6627. GROW;
  6628. if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  6629. xmlParseConditionalSections(ctxt);
  6630. } else
  6631. xmlParseMarkupDecl(ctxt);
  6632. SKIP_BLANKS;
  6633. if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
  6634. xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
  6635. break;
  6636. }
  6637. }
  6638. if (RAW != 0) {
  6639. xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
  6640. }
  6641. }
  6642. /**
  6643. * xmlParseReference:
  6644. * @ctxt: an XML parser context
  6645. *
  6646. * parse and handle entity references in content, depending on the SAX
  6647. * interface, this may end-up in a call to character() if this is a
  6648. * CharRef, a predefined entity, if there is no reference() callback.
  6649. * or if the parser was asked to switch to that mode.
  6650. *
  6651. * [67] Reference ::= EntityRef | CharRef
  6652. */
  6653. void
  6654. xmlParseReference(xmlParserCtxtPtr ctxt) {
  6655. xmlEntityPtr ent;
  6656. xmlChar *val;
  6657. int was_checked;
  6658. xmlNodePtr list = NULL;
  6659. xmlParserErrors ret = XML_ERR_OK;
  6660. if (RAW != '&')
  6661. return;
  6662. /*
  6663. * Simple case of a CharRef
  6664. */
  6665. if (NXT(1) == '#') {
  6666. int i = 0;
  6667. xmlChar out[16];
  6668. int hex = NXT(2);
  6669. int value = xmlParseCharRef(ctxt);
  6670. if (value == 0)
  6671. return;
  6672. if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
  6673. /*
  6674. * So we are using non-UTF-8 buffers
  6675. * Check that the char fit on 8bits, if not
  6676. * generate a CharRef.
  6677. */
  6678. if (value <= 0xFF) {
  6679. out[0] = value;
  6680. out[1] = 0;
  6681. if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
  6682. (!ctxt->disableSAX))
  6683. ctxt->sax->characters(ctxt->userData, out, 1);
  6684. } else {
  6685. if ((hex == 'x') || (hex == 'X'))
  6686. snprintf((char *)out, sizeof(out), "#x%X", value);
  6687. else
  6688. snprintf((char *)out, sizeof(out), "#%d", value);
  6689. if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
  6690. (!ctxt->disableSAX))
  6691. ctxt->sax->reference(ctxt->userData, out);
  6692. }
  6693. } else {
  6694. /*
  6695. * Just encode the value in UTF-8
  6696. */
  6697. COPY_BUF(0 ,out, i, value);
  6698. out[i] = 0;
  6699. if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
  6700. (!ctxt->disableSAX))
  6701. ctxt->sax->characters(ctxt->userData, out, i);
  6702. }
  6703. return;
  6704. }
  6705. /*
  6706. * We are seeing an entity reference
  6707. */
  6708. ent = xmlParseEntityRef(ctxt);
  6709. if (ent == NULL) return;
  6710. if (!ctxt->wellFormed)
  6711. return;
  6712. was_checked = ent->checked;
  6713. /* special case of predefined entities */
  6714. if ((ent->name == NULL) ||
  6715. (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
  6716. val = ent->content;
  6717. if (val == NULL) return;
  6718. /*
  6719. * inline the entity.
  6720. */
  6721. if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
  6722. (!ctxt->disableSAX))
  6723. ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
  6724. return;
  6725. }
  6726. /*
  6727. * The first reference to the entity trigger a parsing phase
  6728. * where the ent->children is filled with the result from
  6729. * the parsing.
  6730. * Note: external parsed entities will not be loaded, it is not
  6731. * required for a non-validating parser, unless the parsing option
  6732. * of validating, or substituting entities were given. Doing so is
  6733. * far more secure as the parser will only process data coming from
  6734. * the document entity by default.
  6735. */
  6736. if (((ent->checked == 0) ||
  6737. ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
  6738. ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
  6739. (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
  6740. unsigned long oldnbent = ctxt->nbentities, diff;
  6741. /*
  6742. * This is a bit hackish but this seems the best
  6743. * way to make sure both SAX and DOM entity support
  6744. * behaves okay.
  6745. */
  6746. void *user_data;
  6747. if (ctxt->userData == ctxt)
  6748. user_data = NULL;
  6749. else
  6750. user_data = ctxt->userData;
  6751. /*
  6752. * Check that this entity is well formed
  6753. * 4.3.2: An internal general parsed entity is well-formed
  6754. * if its replacement text matches the production labeled
  6755. * content.
  6756. */
  6757. if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
  6758. ctxt->depth++;
  6759. ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
  6760. user_data, &list);
  6761. ctxt->depth--;
  6762. } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
  6763. ctxt->depth++;
  6764. ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
  6765. user_data, ctxt->depth, ent->URI,
  6766. ent->ExternalID, &list);
  6767. ctxt->depth--;
  6768. } else {
  6769. ret = XML_ERR_ENTITY_PE_INTERNAL;
  6770. xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
  6771. "invalid entity type found\n", NULL);
  6772. }
  6773. /*
  6774. * Store the number of entities needing parsing for this entity
  6775. * content and do checkings
  6776. */
  6777. diff = ctxt->nbentities - oldnbent + 1;
  6778. if (diff > INT_MAX / 2)
  6779. diff = INT_MAX / 2;
  6780. ent->checked = diff * 2;
  6781. if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
  6782. ent->checked |= 1;
  6783. if (ret == XML_ERR_ENTITY_LOOP) {
  6784. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  6785. xmlHaltParser(ctxt);
  6786. xmlFreeNodeList(list);
  6787. return;
  6788. }
  6789. if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
  6790. xmlFreeNodeList(list);
  6791. return;
  6792. }
  6793. if ((ret == XML_ERR_OK) && (list != NULL)) {
  6794. if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
  6795. (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
  6796. (ent->children == NULL)) {
  6797. ent->children = list;
  6798. /*
  6799. * Prune it directly in the generated document
  6800. * except for single text nodes.
  6801. */
  6802. if ((ctxt->replaceEntities == 0) ||
  6803. (ctxt->parseMode == XML_PARSE_READER) ||
  6804. ((list->type == XML_TEXT_NODE) &&
  6805. (list->next == NULL))) {
  6806. ent->owner = 1;
  6807. while (list != NULL) {
  6808. list->parent = (xmlNodePtr) ent;
  6809. xmlSetTreeDoc(list, ent->doc);
  6810. if (list->next == NULL)
  6811. ent->last = list;
  6812. list = list->next;
  6813. }
  6814. list = NULL;
  6815. } else {
  6816. ent->owner = 0;
  6817. while (list != NULL) {
  6818. list->parent = (xmlNodePtr) ctxt->node;
  6819. list->doc = ctxt->myDoc;
  6820. if (list->next == NULL)
  6821. ent->last = list;
  6822. list = list->next;
  6823. }
  6824. list = ent->children;
  6825. #ifdef LIBXML_LEGACY_ENABLED
  6826. if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
  6827. xmlAddEntityReference(ent, list, NULL);
  6828. #endif /* LIBXML_LEGACY_ENABLED */
  6829. }
  6830. } else {
  6831. xmlFreeNodeList(list);
  6832. list = NULL;
  6833. }
  6834. } else if ((ret != XML_ERR_OK) &&
  6835. (ret != XML_WAR_UNDECLARED_ENTITY)) {
  6836. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  6837. "Entity '%s' failed to parse\n", ent->name);
  6838. if (ent->content != NULL)
  6839. ent->content[0] = 0;
  6840. xmlParserEntityCheck(ctxt, 0, ent, 0);
  6841. } else if (list != NULL) {
  6842. xmlFreeNodeList(list);
  6843. list = NULL;
  6844. }
  6845. if (ent->checked == 0)
  6846. ent->checked = 2;
  6847. /* Prevent entity from being parsed and expanded twice (Bug 760367). */
  6848. was_checked = 0;
  6849. } else if (ent->checked != 1) {
  6850. ctxt->nbentities += ent->checked / 2;
  6851. }
  6852. /*
  6853. * Now that the entity content has been gathered
  6854. * provide it to the application, this can take different forms based
  6855. * on the parsing modes.
  6856. */
  6857. if (ent->children == NULL) {
  6858. /*
  6859. * Probably running in SAX mode and the callbacks don't
  6860. * build the entity content. So unless we already went
  6861. * though parsing for first checking go though the entity
  6862. * content to generate callbacks associated to the entity
  6863. */
  6864. if (was_checked != 0) {
  6865. void *user_data;
  6866. /*
  6867. * This is a bit hackish but this seems the best
  6868. * way to make sure both SAX and DOM entity support
  6869. * behaves okay.
  6870. */
  6871. if (ctxt->userData == ctxt)
  6872. user_data = NULL;
  6873. else
  6874. user_data = ctxt->userData;
  6875. if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
  6876. ctxt->depth++;
  6877. ret = xmlParseBalancedChunkMemoryInternal(ctxt,
  6878. ent->content, user_data, NULL);
  6879. ctxt->depth--;
  6880. } else if (ent->etype ==
  6881. XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
  6882. ctxt->depth++;
  6883. ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
  6884. ctxt->sax, user_data, ctxt->depth,
  6885. ent->URI, ent->ExternalID, NULL);
  6886. ctxt->depth--;
  6887. } else {
  6888. ret = XML_ERR_ENTITY_PE_INTERNAL;
  6889. xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
  6890. "invalid entity type found\n", NULL);
  6891. }
  6892. if (ret == XML_ERR_ENTITY_LOOP) {
  6893. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  6894. return;
  6895. }
  6896. }
  6897. if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
  6898. (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
  6899. /*
  6900. * Entity reference callback comes second, it's somewhat
  6901. * superfluous but a compatibility to historical behaviour
  6902. */
  6903. ctxt->sax->reference(ctxt->userData, ent->name);
  6904. }
  6905. return;
  6906. }
  6907. /*
  6908. * If we didn't get any children for the entity being built
  6909. */
  6910. if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
  6911. (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
  6912. /*
  6913. * Create a node.
  6914. */
  6915. ctxt->sax->reference(ctxt->userData, ent->name);
  6916. return;
  6917. }
  6918. if ((ctxt->replaceEntities) || (ent->children == NULL)) {
  6919. /*
  6920. * There is a problem on the handling of _private for entities
  6921. * (bug 155816): Should we copy the content of the field from
  6922. * the entity (possibly overwriting some value set by the user
  6923. * when a copy is created), should we leave it alone, or should
  6924. * we try to take care of different situations? The problem
  6925. * is exacerbated by the usage of this field by the xmlReader.
  6926. * To fix this bug, we look at _private on the created node
  6927. * and, if it's NULL, we copy in whatever was in the entity.
  6928. * If it's not NULL we leave it alone. This is somewhat of a
  6929. * hack - maybe we should have further tests to determine
  6930. * what to do.
  6931. */
  6932. if ((ctxt->node != NULL) && (ent->children != NULL)) {
  6933. /*
  6934. * Seems we are generating the DOM content, do
  6935. * a simple tree copy for all references except the first
  6936. * In the first occurrence list contains the replacement.
  6937. */
  6938. if (((list == NULL) && (ent->owner == 0)) ||
  6939. (ctxt->parseMode == XML_PARSE_READER)) {
  6940. xmlNodePtr nw = NULL, cur, firstChild = NULL;
  6941. /*
  6942. * We are copying here, make sure there is no abuse
  6943. */
  6944. ctxt->sizeentcopy += ent->length + 5;
  6945. if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
  6946. return;
  6947. /*
  6948. * when operating on a reader, the entities definitions
  6949. * are always owning the entities subtree.
  6950. if (ctxt->parseMode == XML_PARSE_READER)
  6951. ent->owner = 1;
  6952. */
  6953. cur = ent->children;
  6954. while (cur != NULL) {
  6955. nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
  6956. if (nw != NULL) {
  6957. if (nw->_private == NULL)
  6958. nw->_private = cur->_private;
  6959. if (firstChild == NULL){
  6960. firstChild = nw;
  6961. }
  6962. nw = xmlAddChild(ctxt->node, nw);
  6963. }
  6964. if (cur == ent->last) {
  6965. /*
  6966. * needed to detect some strange empty
  6967. * node cases in the reader tests
  6968. */
  6969. if ((ctxt->parseMode == XML_PARSE_READER) &&
  6970. (nw != NULL) &&
  6971. (nw->type == XML_ELEMENT_NODE) &&
  6972. (nw->children == NULL))
  6973. nw->extra = 1;
  6974. break;
  6975. }
  6976. cur = cur->next;
  6977. }
  6978. #ifdef LIBXML_LEGACY_ENABLED
  6979. if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
  6980. xmlAddEntityReference(ent, firstChild, nw);
  6981. #endif /* LIBXML_LEGACY_ENABLED */
  6982. } else if ((list == NULL) || (ctxt->inputNr > 0)) {
  6983. xmlNodePtr nw = NULL, cur, next, last,
  6984. firstChild = NULL;
  6985. /*
  6986. * We are copying here, make sure there is no abuse
  6987. */
  6988. ctxt->sizeentcopy += ent->length + 5;
  6989. if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
  6990. return;
  6991. /*
  6992. * Copy the entity child list and make it the new
  6993. * entity child list. The goal is to make sure any
  6994. * ID or REF referenced will be the one from the
  6995. * document content and not the entity copy.
  6996. */
  6997. cur = ent->children;
  6998. ent->children = NULL;
  6999. last = ent->last;
  7000. ent->last = NULL;
  7001. while (cur != NULL) {
  7002. next = cur->next;
  7003. cur->next = NULL;
  7004. cur->parent = NULL;
  7005. nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
  7006. if (nw != NULL) {
  7007. if (nw->_private == NULL)
  7008. nw->_private = cur->_private;
  7009. if (firstChild == NULL){
  7010. firstChild = cur;
  7011. }
  7012. xmlAddChild((xmlNodePtr) ent, nw);
  7013. xmlAddChild(ctxt->node, cur);
  7014. }
  7015. if (cur == last)
  7016. break;
  7017. cur = next;
  7018. }
  7019. if (ent->owner == 0)
  7020. ent->owner = 1;
  7021. #ifdef LIBXML_LEGACY_ENABLED
  7022. if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
  7023. xmlAddEntityReference(ent, firstChild, nw);
  7024. #endif /* LIBXML_LEGACY_ENABLED */
  7025. } else {
  7026. const xmlChar *nbktext;
  7027. /*
  7028. * the name change is to avoid coalescing of the
  7029. * node with a possible previous text one which
  7030. * would make ent->children a dangling pointer
  7031. */
  7032. nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
  7033. -1);
  7034. if (ent->children->type == XML_TEXT_NODE)
  7035. ent->children->name = nbktext;
  7036. if ((ent->last != ent->children) &&
  7037. (ent->last->type == XML_TEXT_NODE))
  7038. ent->last->name = nbktext;
  7039. xmlAddChildList(ctxt->node, ent->children);
  7040. }
  7041. /*
  7042. * This is to avoid a nasty side effect, see
  7043. * characters() in SAX.c
  7044. */
  7045. ctxt->nodemem = 0;
  7046. ctxt->nodelen = 0;
  7047. return;
  7048. }
  7049. }
  7050. }
  7051. /**
  7052. * xmlParseEntityRef:
  7053. * @ctxt: an XML parser context
  7054. *
  7055. * parse ENTITY references declarations
  7056. *
  7057. * [68] EntityRef ::= '&' Name ';'
  7058. *
  7059. * [ WFC: Entity Declared ]
  7060. * In a document without any DTD, a document with only an internal DTD
  7061. * subset which contains no parameter entity references, or a document
  7062. * with "standalone='yes'", the Name given in the entity reference
  7063. * must match that in an entity declaration, except that well-formed
  7064. * documents need not declare any of the following entities: amp, lt,
  7065. * gt, apos, quot. The declaration of a parameter entity must precede
  7066. * any reference to it. Similarly, the declaration of a general entity
  7067. * must precede any reference to it which appears in a default value in an
  7068. * attribute-list declaration. Note that if entities are declared in the
  7069. * external subset or in external parameter entities, a non-validating
  7070. * processor is not obligated to read and process their declarations;
  7071. * for such documents, the rule that an entity must be declared is a
  7072. * well-formedness constraint only if standalone='yes'.
  7073. *
  7074. * [ WFC: Parsed Entity ]
  7075. * An entity reference must not contain the name of an unparsed entity
  7076. *
  7077. * Returns the xmlEntityPtr if found, or NULL otherwise.
  7078. */
  7079. xmlEntityPtr
  7080. xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
  7081. const xmlChar *name;
  7082. xmlEntityPtr ent = NULL;
  7083. GROW;
  7084. if (ctxt->instate == XML_PARSER_EOF)
  7085. return(NULL);
  7086. if (RAW != '&')
  7087. return(NULL);
  7088. NEXT;
  7089. name = xmlParseName(ctxt);
  7090. if (name == NULL) {
  7091. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7092. "xmlParseEntityRef: no name\n");
  7093. return(NULL);
  7094. }
  7095. if (RAW != ';') {
  7096. xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
  7097. return(NULL);
  7098. }
  7099. NEXT;
  7100. /*
  7101. * Predefined entities override any extra definition
  7102. */
  7103. if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
  7104. ent = xmlGetPredefinedEntity(name);
  7105. if (ent != NULL)
  7106. return(ent);
  7107. }
  7108. /*
  7109. * Increase the number of entity references parsed
  7110. */
  7111. ctxt->nbentities++;
  7112. /*
  7113. * Ask first SAX for entity resolution, otherwise try the
  7114. * entities which may have stored in the parser context.
  7115. */
  7116. if (ctxt->sax != NULL) {
  7117. if (ctxt->sax->getEntity != NULL)
  7118. ent = ctxt->sax->getEntity(ctxt->userData, name);
  7119. if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
  7120. (ctxt->options & XML_PARSE_OLDSAX))
  7121. ent = xmlGetPredefinedEntity(name);
  7122. if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
  7123. (ctxt->userData==ctxt)) {
  7124. ent = xmlSAX2GetEntity(ctxt, name);
  7125. }
  7126. }
  7127. if (ctxt->instate == XML_PARSER_EOF)
  7128. return(NULL);
  7129. /*
  7130. * [ WFC: Entity Declared ]
  7131. * In a document without any DTD, a document with only an
  7132. * internal DTD subset which contains no parameter entity
  7133. * references, or a document with "standalone='yes'", the
  7134. * Name given in the entity reference must match that in an
  7135. * entity declaration, except that well-formed documents
  7136. * need not declare any of the following entities: amp, lt,
  7137. * gt, apos, quot.
  7138. * The declaration of a parameter entity must precede any
  7139. * reference to it.
  7140. * Similarly, the declaration of a general entity must
  7141. * precede any reference to it which appears in a default
  7142. * value in an attribute-list declaration. Note that if
  7143. * entities are declared in the external subset or in
  7144. * external parameter entities, a non-validating processor
  7145. * is not obligated to read and process their declarations;
  7146. * for such documents, the rule that an entity must be
  7147. * declared is a well-formedness constraint only if
  7148. * standalone='yes'.
  7149. */
  7150. if (ent == NULL) {
  7151. if ((ctxt->standalone == 1) ||
  7152. ((ctxt->hasExternalSubset == 0) &&
  7153. (ctxt->hasPErefs == 0))) {
  7154. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  7155. "Entity '%s' not defined\n", name);
  7156. } else {
  7157. xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7158. "Entity '%s' not defined\n", name);
  7159. if ((ctxt->inSubset == 0) &&
  7160. (ctxt->sax != NULL) &&
  7161. (ctxt->sax->reference != NULL)) {
  7162. ctxt->sax->reference(ctxt->userData, name);
  7163. }
  7164. }
  7165. xmlParserEntityCheck(ctxt, 0, ent, 0);
  7166. ctxt->valid = 0;
  7167. }
  7168. /*
  7169. * [ WFC: Parsed Entity ]
  7170. * An entity reference must not contain the name of an
  7171. * unparsed entity
  7172. */
  7173. else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
  7174. xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
  7175. "Entity reference to unparsed entity %s\n", name);
  7176. }
  7177. /*
  7178. * [ WFC: No External Entity References ]
  7179. * Attribute values cannot contain direct or indirect
  7180. * entity references to external entities.
  7181. */
  7182. else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
  7183. (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
  7184. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
  7185. "Attribute references external entity '%s'\n", name);
  7186. }
  7187. /*
  7188. * [ WFC: No < in Attribute Values ]
  7189. * The replacement text of any entity referred to directly or
  7190. * indirectly in an attribute value (other than "&lt;") must
  7191. * not contain a <.
  7192. */
  7193. else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
  7194. (ent != NULL) &&
  7195. (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
  7196. if (((ent->checked & 1) || (ent->checked == 0)) &&
  7197. (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
  7198. xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
  7199. "'<' in entity '%s' is not allowed in attributes values\n", name);
  7200. }
  7201. }
  7202. /*
  7203. * Internal check, no parameter entities here ...
  7204. */
  7205. else {
  7206. switch (ent->etype) {
  7207. case XML_INTERNAL_PARAMETER_ENTITY:
  7208. case XML_EXTERNAL_PARAMETER_ENTITY:
  7209. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
  7210. "Attempt to reference the parameter entity '%s'\n",
  7211. name);
  7212. break;
  7213. default:
  7214. break;
  7215. }
  7216. }
  7217. /*
  7218. * [ WFC: No Recursion ]
  7219. * A parsed entity must not contain a recursive reference
  7220. * to itself, either directly or indirectly.
  7221. * Done somewhere else
  7222. */
  7223. return(ent);
  7224. }
  7225. /**
  7226. * xmlParseStringEntityRef:
  7227. * @ctxt: an XML parser context
  7228. * @str: a pointer to an index in the string
  7229. *
  7230. * parse ENTITY references declarations, but this version parses it from
  7231. * a string value.
  7232. *
  7233. * [68] EntityRef ::= '&' Name ';'
  7234. *
  7235. * [ WFC: Entity Declared ]
  7236. * In a document without any DTD, a document with only an internal DTD
  7237. * subset which contains no parameter entity references, or a document
  7238. * with "standalone='yes'", the Name given in the entity reference
  7239. * must match that in an entity declaration, except that well-formed
  7240. * documents need not declare any of the following entities: amp, lt,
  7241. * gt, apos, quot. The declaration of a parameter entity must precede
  7242. * any reference to it. Similarly, the declaration of a general entity
  7243. * must precede any reference to it which appears in a default value in an
  7244. * attribute-list declaration. Note that if entities are declared in the
  7245. * external subset or in external parameter entities, a non-validating
  7246. * processor is not obligated to read and process their declarations;
  7247. * for such documents, the rule that an entity must be declared is a
  7248. * well-formedness constraint only if standalone='yes'.
  7249. *
  7250. * [ WFC: Parsed Entity ]
  7251. * An entity reference must not contain the name of an unparsed entity
  7252. *
  7253. * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
  7254. * is updated to the current location in the string.
  7255. */
  7256. static xmlEntityPtr
  7257. xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
  7258. xmlChar *name;
  7259. const xmlChar *ptr;
  7260. xmlChar cur;
  7261. xmlEntityPtr ent = NULL;
  7262. if ((str == NULL) || (*str == NULL))
  7263. return(NULL);
  7264. ptr = *str;
  7265. cur = *ptr;
  7266. if (cur != '&')
  7267. return(NULL);
  7268. ptr++;
  7269. name = xmlParseStringName(ctxt, &ptr);
  7270. if (name == NULL) {
  7271. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7272. "xmlParseStringEntityRef: no name\n");
  7273. *str = ptr;
  7274. return(NULL);
  7275. }
  7276. if (*ptr != ';') {
  7277. xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
  7278. xmlFree(name);
  7279. *str = ptr;
  7280. return(NULL);
  7281. }
  7282. ptr++;
  7283. /*
  7284. * Predefined entities override any extra definition
  7285. */
  7286. if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
  7287. ent = xmlGetPredefinedEntity(name);
  7288. if (ent != NULL) {
  7289. xmlFree(name);
  7290. *str = ptr;
  7291. return(ent);
  7292. }
  7293. }
  7294. /*
  7295. * Increase the number of entity references parsed
  7296. */
  7297. ctxt->nbentities++;
  7298. /*
  7299. * Ask first SAX for entity resolution, otherwise try the
  7300. * entities which may have stored in the parser context.
  7301. */
  7302. if (ctxt->sax != NULL) {
  7303. if (ctxt->sax->getEntity != NULL)
  7304. ent = ctxt->sax->getEntity(ctxt->userData, name);
  7305. if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
  7306. ent = xmlGetPredefinedEntity(name);
  7307. if ((ent == NULL) && (ctxt->userData==ctxt)) {
  7308. ent = xmlSAX2GetEntity(ctxt, name);
  7309. }
  7310. }
  7311. if (ctxt->instate == XML_PARSER_EOF) {
  7312. xmlFree(name);
  7313. return(NULL);
  7314. }
  7315. /*
  7316. * [ WFC: Entity Declared ]
  7317. * In a document without any DTD, a document with only an
  7318. * internal DTD subset which contains no parameter entity
  7319. * references, or a document with "standalone='yes'", the
  7320. * Name given in the entity reference must match that in an
  7321. * entity declaration, except that well-formed documents
  7322. * need not declare any of the following entities: amp, lt,
  7323. * gt, apos, quot.
  7324. * The declaration of a parameter entity must precede any
  7325. * reference to it.
  7326. * Similarly, the declaration of a general entity must
  7327. * precede any reference to it which appears in a default
  7328. * value in an attribute-list declaration. Note that if
  7329. * entities are declared in the external subset or in
  7330. * external parameter entities, a non-validating processor
  7331. * is not obligated to read and process their declarations;
  7332. * for such documents, the rule that an entity must be
  7333. * declared is a well-formedness constraint only if
  7334. * standalone='yes'.
  7335. */
  7336. if (ent == NULL) {
  7337. if ((ctxt->standalone == 1) ||
  7338. ((ctxt->hasExternalSubset == 0) &&
  7339. (ctxt->hasPErefs == 0))) {
  7340. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  7341. "Entity '%s' not defined\n", name);
  7342. } else {
  7343. xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7344. "Entity '%s' not defined\n",
  7345. name);
  7346. }
  7347. xmlParserEntityCheck(ctxt, 0, ent, 0);
  7348. /* TODO ? check regressions ctxt->valid = 0; */
  7349. }
  7350. /*
  7351. * [ WFC: Parsed Entity ]
  7352. * An entity reference must not contain the name of an
  7353. * unparsed entity
  7354. */
  7355. else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
  7356. xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
  7357. "Entity reference to unparsed entity %s\n", name);
  7358. }
  7359. /*
  7360. * [ WFC: No External Entity References ]
  7361. * Attribute values cannot contain direct or indirect
  7362. * entity references to external entities.
  7363. */
  7364. else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
  7365. (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
  7366. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
  7367. "Attribute references external entity '%s'\n", name);
  7368. }
  7369. /*
  7370. * [ WFC: No < in Attribute Values ]
  7371. * The replacement text of any entity referred to directly or
  7372. * indirectly in an attribute value (other than "&lt;") must
  7373. * not contain a <.
  7374. */
  7375. else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
  7376. (ent != NULL) && (ent->content != NULL) &&
  7377. (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
  7378. (xmlStrchr(ent->content, '<'))) {
  7379. xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
  7380. "'<' in entity '%s' is not allowed in attributes values\n",
  7381. name);
  7382. }
  7383. /*
  7384. * Internal check, no parameter entities here ...
  7385. */
  7386. else {
  7387. switch (ent->etype) {
  7388. case XML_INTERNAL_PARAMETER_ENTITY:
  7389. case XML_EXTERNAL_PARAMETER_ENTITY:
  7390. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
  7391. "Attempt to reference the parameter entity '%s'\n",
  7392. name);
  7393. break;
  7394. default:
  7395. break;
  7396. }
  7397. }
  7398. /*
  7399. * [ WFC: No Recursion ]
  7400. * A parsed entity must not contain a recursive reference
  7401. * to itself, either directly or indirectly.
  7402. * Done somewhere else
  7403. */
  7404. xmlFree(name);
  7405. *str = ptr;
  7406. return(ent);
  7407. }
  7408. /**
  7409. * xmlParsePEReference:
  7410. * @ctxt: an XML parser context
  7411. *
  7412. * parse PEReference declarations
  7413. * The entity content is handled directly by pushing it's content as
  7414. * a new input stream.
  7415. *
  7416. * [69] PEReference ::= '%' Name ';'
  7417. *
  7418. * [ WFC: No Recursion ]
  7419. * A parsed entity must not contain a recursive
  7420. * reference to itself, either directly or indirectly.
  7421. *
  7422. * [ WFC: Entity Declared ]
  7423. * In a document without any DTD, a document with only an internal DTD
  7424. * subset which contains no parameter entity references, or a document
  7425. * with "standalone='yes'", ... ... The declaration of a parameter
  7426. * entity must precede any reference to it...
  7427. *
  7428. * [ VC: Entity Declared ]
  7429. * In a document with an external subset or external parameter entities
  7430. * with "standalone='no'", ... ... The declaration of a parameter entity
  7431. * must precede any reference to it...
  7432. *
  7433. * [ WFC: In DTD ]
  7434. * Parameter-entity references may only appear in the DTD.
  7435. * NOTE: misleading but this is handled.
  7436. */
  7437. void
  7438. xmlParsePEReference(xmlParserCtxtPtr ctxt)
  7439. {
  7440. const xmlChar *name;
  7441. xmlEntityPtr entity = NULL;
  7442. xmlParserInputPtr input;
  7443. if (RAW != '%')
  7444. return;
  7445. NEXT;
  7446. name = xmlParseName(ctxt);
  7447. if (name == NULL) {
  7448. xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
  7449. return;
  7450. }
  7451. if (xmlParserDebugEntities)
  7452. xmlGenericError(xmlGenericErrorContext,
  7453. "PEReference: %s\n", name);
  7454. if (RAW != ';') {
  7455. xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
  7456. return;
  7457. }
  7458. NEXT;
  7459. /*
  7460. * Increase the number of entity references parsed
  7461. */
  7462. ctxt->nbentities++;
  7463. /*
  7464. * Request the entity from SAX
  7465. */
  7466. if ((ctxt->sax != NULL) &&
  7467. (ctxt->sax->getParameterEntity != NULL))
  7468. entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
  7469. if (ctxt->instate == XML_PARSER_EOF)
  7470. return;
  7471. if (entity == NULL) {
  7472. /*
  7473. * [ WFC: Entity Declared ]
  7474. * In a document without any DTD, a document with only an
  7475. * internal DTD subset which contains no parameter entity
  7476. * references, or a document with "standalone='yes'", ...
  7477. * ... The declaration of a parameter entity must precede
  7478. * any reference to it...
  7479. */
  7480. if ((ctxt->standalone == 1) ||
  7481. ((ctxt->hasExternalSubset == 0) &&
  7482. (ctxt->hasPErefs == 0))) {
  7483. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  7484. "PEReference: %%%s; not found\n",
  7485. name);
  7486. } else {
  7487. /*
  7488. * [ VC: Entity Declared ]
  7489. * In a document with an external subset or external
  7490. * parameter entities with "standalone='no'", ...
  7491. * ... The declaration of a parameter entity must
  7492. * precede any reference to it...
  7493. */
  7494. if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
  7495. xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7496. "PEReference: %%%s; not found\n",
  7497. name, NULL);
  7498. } else
  7499. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7500. "PEReference: %%%s; not found\n",
  7501. name, NULL);
  7502. ctxt->valid = 0;
  7503. }
  7504. xmlParserEntityCheck(ctxt, 0, NULL, 0);
  7505. } else {
  7506. /*
  7507. * Internal checking in case the entity quest barfed
  7508. */
  7509. if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
  7510. (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
  7511. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7512. "Internal: %%%s; is not a parameter entity\n",
  7513. name, NULL);
  7514. } else {
  7515. xmlChar start[4];
  7516. xmlCharEncoding enc;
  7517. if (xmlParserEntityCheck(ctxt, 0, entity, 0))
  7518. return;
  7519. if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
  7520. ((ctxt->options & XML_PARSE_NOENT) == 0) &&
  7521. ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
  7522. ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
  7523. ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
  7524. (ctxt->replaceEntities == 0) &&
  7525. (ctxt->validate == 0))
  7526. return;
  7527. input = xmlNewEntityInputStream(ctxt, entity);
  7528. if (xmlPushInput(ctxt, input) < 0) {
  7529. xmlFreeInputStream(input);
  7530. return;
  7531. }
  7532. if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
  7533. /*
  7534. * Get the 4 first bytes and decode the charset
  7535. * if enc != XML_CHAR_ENCODING_NONE
  7536. * plug some encoding conversion routines.
  7537. * Note that, since we may have some non-UTF8
  7538. * encoding (like UTF16, bug 135229), the 'length'
  7539. * is not known, but we can calculate based upon
  7540. * the amount of data in the buffer.
  7541. */
  7542. GROW
  7543. if (ctxt->instate == XML_PARSER_EOF)
  7544. return;
  7545. if ((ctxt->input->end - ctxt->input->cur)>=4) {
  7546. start[0] = RAW;
  7547. start[1] = NXT(1);
  7548. start[2] = NXT(2);
  7549. start[3] = NXT(3);
  7550. enc = xmlDetectCharEncoding(start, 4);
  7551. if (enc != XML_CHAR_ENCODING_NONE) {
  7552. xmlSwitchEncoding(ctxt, enc);
  7553. }
  7554. }
  7555. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
  7556. (IS_BLANK_CH(NXT(5)))) {
  7557. xmlParseTextDecl(ctxt);
  7558. }
  7559. }
  7560. }
  7561. }
  7562. ctxt->hasPErefs = 1;
  7563. }
  7564. /**
  7565. * xmlLoadEntityContent:
  7566. * @ctxt: an XML parser context
  7567. * @entity: an unloaded system entity
  7568. *
  7569. * Load the original content of the given system entity from the
  7570. * ExternalID/SystemID given. This is to be used for Included in Literal
  7571. * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
  7572. *
  7573. * Returns 0 in case of success and -1 in case of failure
  7574. */
  7575. static int
  7576. xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
  7577. xmlParserInputPtr input;
  7578. xmlBufferPtr buf;
  7579. int l, c;
  7580. int count = 0;
  7581. if ((ctxt == NULL) || (entity == NULL) ||
  7582. ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
  7583. (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
  7584. (entity->content != NULL)) {
  7585. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  7586. "xmlLoadEntityContent parameter error");
  7587. return(-1);
  7588. }
  7589. if (xmlParserDebugEntities)
  7590. xmlGenericError(xmlGenericErrorContext,
  7591. "Reading %s entity content input\n", entity->name);
  7592. buf = xmlBufferCreate();
  7593. if (buf == NULL) {
  7594. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  7595. "xmlLoadEntityContent parameter error");
  7596. return(-1);
  7597. }
  7598. input = xmlNewEntityInputStream(ctxt, entity);
  7599. if (input == NULL) {
  7600. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  7601. "xmlLoadEntityContent input error");
  7602. xmlBufferFree(buf);
  7603. return(-1);
  7604. }
  7605. /*
  7606. * Push the entity as the current input, read char by char
  7607. * saving to the buffer until the end of the entity or an error
  7608. */
  7609. if (xmlPushInput(ctxt, input) < 0) {
  7610. xmlBufferFree(buf);
  7611. return(-1);
  7612. }
  7613. GROW;
  7614. c = CUR_CHAR(l);
  7615. while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
  7616. (IS_CHAR(c))) {
  7617. xmlBufferAdd(buf, ctxt->input->cur, l);
  7618. if (count++ > XML_PARSER_CHUNK_SIZE) {
  7619. count = 0;
  7620. GROW;
  7621. if (ctxt->instate == XML_PARSER_EOF) {
  7622. xmlBufferFree(buf);
  7623. return(-1);
  7624. }
  7625. }
  7626. NEXTL(l);
  7627. c = CUR_CHAR(l);
  7628. if (c == 0) {
  7629. count = 0;
  7630. GROW;
  7631. if (ctxt->instate == XML_PARSER_EOF) {
  7632. xmlBufferFree(buf);
  7633. return(-1);
  7634. }
  7635. c = CUR_CHAR(l);
  7636. }
  7637. }
  7638. if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
  7639. xmlPopInput(ctxt);
  7640. } else if (!IS_CHAR(c)) {
  7641. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  7642. "xmlLoadEntityContent: invalid char value %d\n",
  7643. c);
  7644. xmlBufferFree(buf);
  7645. return(-1);
  7646. }
  7647. entity->content = buf->content;
  7648. buf->content = NULL;
  7649. xmlBufferFree(buf);
  7650. return(0);
  7651. }
  7652. /**
  7653. * xmlParseStringPEReference:
  7654. * @ctxt: an XML parser context
  7655. * @str: a pointer to an index in the string
  7656. *
  7657. * parse PEReference declarations
  7658. *
  7659. * [69] PEReference ::= '%' Name ';'
  7660. *
  7661. * [ WFC: No Recursion ]
  7662. * A parsed entity must not contain a recursive
  7663. * reference to itself, either directly or indirectly.
  7664. *
  7665. * [ WFC: Entity Declared ]
  7666. * In a document without any DTD, a document with only an internal DTD
  7667. * subset which contains no parameter entity references, or a document
  7668. * with "standalone='yes'", ... ... The declaration of a parameter
  7669. * entity must precede any reference to it...
  7670. *
  7671. * [ VC: Entity Declared ]
  7672. * In a document with an external subset or external parameter entities
  7673. * with "standalone='no'", ... ... The declaration of a parameter entity
  7674. * must precede any reference to it...
  7675. *
  7676. * [ WFC: In DTD ]
  7677. * Parameter-entity references may only appear in the DTD.
  7678. * NOTE: misleading but this is handled.
  7679. *
  7680. * Returns the string of the entity content.
  7681. * str is updated to the current value of the index
  7682. */
  7683. static xmlEntityPtr
  7684. xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
  7685. const xmlChar *ptr;
  7686. xmlChar cur;
  7687. xmlChar *name;
  7688. xmlEntityPtr entity = NULL;
  7689. if ((str == NULL) || (*str == NULL)) return(NULL);
  7690. ptr = *str;
  7691. cur = *ptr;
  7692. if (cur != '%')
  7693. return(NULL);
  7694. ptr++;
  7695. name = xmlParseStringName(ctxt, &ptr);
  7696. if (name == NULL) {
  7697. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7698. "xmlParseStringPEReference: no name\n");
  7699. *str = ptr;
  7700. return(NULL);
  7701. }
  7702. cur = *ptr;
  7703. if (cur != ';') {
  7704. xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
  7705. xmlFree(name);
  7706. *str = ptr;
  7707. return(NULL);
  7708. }
  7709. ptr++;
  7710. /*
  7711. * Increase the number of entity references parsed
  7712. */
  7713. ctxt->nbentities++;
  7714. /*
  7715. * Request the entity from SAX
  7716. */
  7717. if ((ctxt->sax != NULL) &&
  7718. (ctxt->sax->getParameterEntity != NULL))
  7719. entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
  7720. if (ctxt->instate == XML_PARSER_EOF) {
  7721. xmlFree(name);
  7722. *str = ptr;
  7723. return(NULL);
  7724. }
  7725. if (entity == NULL) {
  7726. /*
  7727. * [ WFC: Entity Declared ]
  7728. * In a document without any DTD, a document with only an
  7729. * internal DTD subset which contains no parameter entity
  7730. * references, or a document with "standalone='yes'", ...
  7731. * ... The declaration of a parameter entity must precede
  7732. * any reference to it...
  7733. */
  7734. if ((ctxt->standalone == 1) ||
  7735. ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
  7736. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  7737. "PEReference: %%%s; not found\n", name);
  7738. } else {
  7739. /*
  7740. * [ VC: Entity Declared ]
  7741. * In a document with an external subset or external
  7742. * parameter entities with "standalone='no'", ...
  7743. * ... The declaration of a parameter entity must
  7744. * precede any reference to it...
  7745. */
  7746. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7747. "PEReference: %%%s; not found\n",
  7748. name, NULL);
  7749. ctxt->valid = 0;
  7750. }
  7751. xmlParserEntityCheck(ctxt, 0, NULL, 0);
  7752. } else {
  7753. /*
  7754. * Internal checking in case the entity quest barfed
  7755. */
  7756. if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
  7757. (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
  7758. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7759. "%%%s; is not a parameter entity\n",
  7760. name, NULL);
  7761. }
  7762. }
  7763. ctxt->hasPErefs = 1;
  7764. xmlFree(name);
  7765. *str = ptr;
  7766. return(entity);
  7767. }
  7768. /**
  7769. * xmlParseDocTypeDecl:
  7770. * @ctxt: an XML parser context
  7771. *
  7772. * parse a DOCTYPE declaration
  7773. *
  7774. * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
  7775. * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
  7776. *
  7777. * [ VC: Root Element Type ]
  7778. * The Name in the document type declaration must match the element
  7779. * type of the root element.
  7780. */
  7781. void
  7782. xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
  7783. const xmlChar *name = NULL;
  7784. xmlChar *ExternalID = NULL;
  7785. xmlChar *URI = NULL;
  7786. /*
  7787. * We know that '<!DOCTYPE' has been detected.
  7788. */
  7789. SKIP(9);
  7790. SKIP_BLANKS;
  7791. /*
  7792. * Parse the DOCTYPE name.
  7793. */
  7794. name = xmlParseName(ctxt);
  7795. if (name == NULL) {
  7796. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7797. "xmlParseDocTypeDecl : no DOCTYPE name !\n");
  7798. }
  7799. ctxt->intSubName = name;
  7800. SKIP_BLANKS;
  7801. /*
  7802. * Check for SystemID and ExternalID
  7803. */
  7804. URI = xmlParseExternalID(ctxt, &ExternalID, 1);
  7805. if ((URI != NULL) || (ExternalID != NULL)) {
  7806. ctxt->hasExternalSubset = 1;
  7807. }
  7808. ctxt->extSubURI = URI;
  7809. ctxt->extSubSystem = ExternalID;
  7810. SKIP_BLANKS;
  7811. /*
  7812. * Create and update the internal subset.
  7813. */
  7814. if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
  7815. (!ctxt->disableSAX))
  7816. ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
  7817. if (ctxt->instate == XML_PARSER_EOF)
  7818. return;
  7819. /*
  7820. * Is there any internal subset declarations ?
  7821. * they are handled separately in xmlParseInternalSubset()
  7822. */
  7823. if (RAW == '[')
  7824. return;
  7825. /*
  7826. * We should be at the end of the DOCTYPE declaration.
  7827. */
  7828. if (RAW != '>') {
  7829. xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
  7830. }
  7831. NEXT;
  7832. }
  7833. /**
  7834. * xmlParseInternalSubset:
  7835. * @ctxt: an XML parser context
  7836. *
  7837. * parse the internal subset declaration
  7838. *
  7839. * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
  7840. */
  7841. static void
  7842. xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
  7843. /*
  7844. * Is there any DTD definition ?
  7845. */
  7846. if (RAW == '[') {
  7847. int baseInputNr = ctxt->inputNr;
  7848. ctxt->instate = XML_PARSER_DTD;
  7849. NEXT;
  7850. /*
  7851. * Parse the succession of Markup declarations and
  7852. * PEReferences.
  7853. * Subsequence (markupdecl | PEReference | S)*
  7854. */
  7855. while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
  7856. (ctxt->instate != XML_PARSER_EOF)) {
  7857. const xmlChar *check = CUR_PTR;
  7858. unsigned int cons = ctxt->input->consumed;
  7859. SKIP_BLANKS;
  7860. xmlParseMarkupDecl(ctxt);
  7861. xmlParsePEReference(ctxt);
  7862. /*
  7863. * Conditional sections are allowed from external entities included
  7864. * by PE References in the internal subset.
  7865. */
  7866. if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
  7867. (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  7868. xmlParseConditionalSections(ctxt);
  7869. }
  7870. if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
  7871. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  7872. "xmlParseInternalSubset: error detected in Markup declaration\n");
  7873. if (ctxt->inputNr > baseInputNr)
  7874. xmlPopInput(ctxt);
  7875. else
  7876. break;
  7877. }
  7878. }
  7879. if (RAW == ']') {
  7880. NEXT;
  7881. SKIP_BLANKS;
  7882. }
  7883. }
  7884. /*
  7885. * We should be at the end of the DOCTYPE declaration.
  7886. */
  7887. if (RAW != '>') {
  7888. xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
  7889. return;
  7890. }
  7891. NEXT;
  7892. }
  7893. #ifdef LIBXML_SAX1_ENABLED
  7894. /**
  7895. * xmlParseAttribute:
  7896. * @ctxt: an XML parser context
  7897. * @value: a xmlChar ** used to store the value of the attribute
  7898. *
  7899. * parse an attribute
  7900. *
  7901. * [41] Attribute ::= Name Eq AttValue
  7902. *
  7903. * [ WFC: No External Entity References ]
  7904. * Attribute values cannot contain direct or indirect entity references
  7905. * to external entities.
  7906. *
  7907. * [ WFC: No < in Attribute Values ]
  7908. * The replacement text of any entity referred to directly or indirectly in
  7909. * an attribute value (other than "&lt;") must not contain a <.
  7910. *
  7911. * [ VC: Attribute Value Type ]
  7912. * The attribute must have been declared; the value must be of the type
  7913. * declared for it.
  7914. *
  7915. * [25] Eq ::= S? '=' S?
  7916. *
  7917. * With namespace:
  7918. *
  7919. * [NS 11] Attribute ::= QName Eq AttValue
  7920. *
  7921. * Also the case QName == xmlns:??? is handled independently as a namespace
  7922. * definition.
  7923. *
  7924. * Returns the attribute name, and the value in *value.
  7925. */
  7926. const xmlChar *
  7927. xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
  7928. const xmlChar *name;
  7929. xmlChar *val;
  7930. *value = NULL;
  7931. GROW;
  7932. name = xmlParseName(ctxt);
  7933. if (name == NULL) {
  7934. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7935. "error parsing attribute name\n");
  7936. return(NULL);
  7937. }
  7938. /*
  7939. * read the value
  7940. */
  7941. SKIP_BLANKS;
  7942. if (RAW == '=') {
  7943. NEXT;
  7944. SKIP_BLANKS;
  7945. val = xmlParseAttValue(ctxt);
  7946. ctxt->instate = XML_PARSER_CONTENT;
  7947. } else {
  7948. xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
  7949. "Specification mandates value for attribute %s\n", name);
  7950. return(NULL);
  7951. }
  7952. /*
  7953. * Check that xml:lang conforms to the specification
  7954. * No more registered as an error, just generate a warning now
  7955. * since this was deprecated in XML second edition
  7956. */
  7957. if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
  7958. if (!xmlCheckLanguageID(val)) {
  7959. xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
  7960. "Malformed value for xml:lang : %s\n",
  7961. val, NULL);
  7962. }
  7963. }
  7964. /*
  7965. * Check that xml:space conforms to the specification
  7966. */
  7967. if (xmlStrEqual(name, BAD_CAST "xml:space")) {
  7968. if (xmlStrEqual(val, BAD_CAST "default"))
  7969. *(ctxt->space) = 0;
  7970. else if (xmlStrEqual(val, BAD_CAST "preserve"))
  7971. *(ctxt->space) = 1;
  7972. else {
  7973. xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
  7974. "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
  7975. val, NULL);
  7976. }
  7977. }
  7978. *value = val;
  7979. return(name);
  7980. }
  7981. /**
  7982. * xmlParseStartTag:
  7983. * @ctxt: an XML parser context
  7984. *
  7985. * parse a start of tag either for rule element or
  7986. * EmptyElement. In both case we don't parse the tag closing chars.
  7987. *
  7988. * [40] STag ::= '<' Name (S Attribute)* S? '>'
  7989. *
  7990. * [ WFC: Unique Att Spec ]
  7991. * No attribute name may appear more than once in the same start-tag or
  7992. * empty-element tag.
  7993. *
  7994. * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
  7995. *
  7996. * [ WFC: Unique Att Spec ]
  7997. * No attribute name may appear more than once in the same start-tag or
  7998. * empty-element tag.
  7999. *
  8000. * With namespace:
  8001. *
  8002. * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
  8003. *
  8004. * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
  8005. *
  8006. * Returns the element name parsed
  8007. */
  8008. const xmlChar *
  8009. xmlParseStartTag(xmlParserCtxtPtr ctxt) {
  8010. const xmlChar *name;
  8011. const xmlChar *attname;
  8012. xmlChar *attvalue;
  8013. const xmlChar **atts = ctxt->atts;
  8014. int nbatts = 0;
  8015. int maxatts = ctxt->maxatts;
  8016. int i;
  8017. if (RAW != '<') return(NULL);
  8018. NEXT1;
  8019. name = xmlParseName(ctxt);
  8020. if (name == NULL) {
  8021. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  8022. "xmlParseStartTag: invalid element name\n");
  8023. return(NULL);
  8024. }
  8025. /*
  8026. * Now parse the attributes, it ends up with the ending
  8027. *
  8028. * (S Attribute)* S?
  8029. */
  8030. SKIP_BLANKS;
  8031. GROW;
  8032. while (((RAW != '>') &&
  8033. ((RAW != '/') || (NXT(1) != '>')) &&
  8034. (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
  8035. const xmlChar *q = CUR_PTR;
  8036. unsigned int cons = ctxt->input->consumed;
  8037. attname = xmlParseAttribute(ctxt, &attvalue);
  8038. if ((attname != NULL) && (attvalue != NULL)) {
  8039. /*
  8040. * [ WFC: Unique Att Spec ]
  8041. * No attribute name may appear more than once in the same
  8042. * start-tag or empty-element tag.
  8043. */
  8044. for (i = 0; i < nbatts;i += 2) {
  8045. if (xmlStrEqual(atts[i], attname)) {
  8046. xmlErrAttributeDup(ctxt, NULL, attname);
  8047. xmlFree(attvalue);
  8048. goto failed;
  8049. }
  8050. }
  8051. /*
  8052. * Add the pair to atts
  8053. */
  8054. if (atts == NULL) {
  8055. maxatts = 22; /* allow for 10 attrs by default */
  8056. atts = (const xmlChar **)
  8057. xmlMalloc(maxatts * sizeof(xmlChar *));
  8058. if (atts == NULL) {
  8059. xmlErrMemory(ctxt, NULL);
  8060. if (attvalue != NULL)
  8061. xmlFree(attvalue);
  8062. goto failed;
  8063. }
  8064. ctxt->atts = atts;
  8065. ctxt->maxatts = maxatts;
  8066. } else if (nbatts + 4 > maxatts) {
  8067. const xmlChar **n;
  8068. maxatts *= 2;
  8069. n = (const xmlChar **) xmlRealloc((void *) atts,
  8070. maxatts * sizeof(const xmlChar *));
  8071. if (n == NULL) {
  8072. xmlErrMemory(ctxt, NULL);
  8073. if (attvalue != NULL)
  8074. xmlFree(attvalue);
  8075. goto failed;
  8076. }
  8077. atts = n;
  8078. ctxt->atts = atts;
  8079. ctxt->maxatts = maxatts;
  8080. }
  8081. atts[nbatts++] = attname;
  8082. atts[nbatts++] = attvalue;
  8083. atts[nbatts] = NULL;
  8084. atts[nbatts + 1] = NULL;
  8085. } else {
  8086. if (attvalue != NULL)
  8087. xmlFree(attvalue);
  8088. }
  8089. failed:
  8090. GROW
  8091. if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
  8092. break;
  8093. if (SKIP_BLANKS == 0) {
  8094. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  8095. "attributes construct error\n");
  8096. }
  8097. if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
  8098. (attname == NULL) && (attvalue == NULL)) {
  8099. xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
  8100. "xmlParseStartTag: problem parsing attributes\n");
  8101. break;
  8102. }
  8103. SHRINK;
  8104. GROW;
  8105. }
  8106. /*
  8107. * SAX: Start of Element !
  8108. */
  8109. if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
  8110. (!ctxt->disableSAX)) {
  8111. if (nbatts > 0)
  8112. ctxt->sax->startElement(ctxt->userData, name, atts);
  8113. else
  8114. ctxt->sax->startElement(ctxt->userData, name, NULL);
  8115. }
  8116. if (atts != NULL) {
  8117. /* Free only the content strings */
  8118. for (i = 1;i < nbatts;i+=2)
  8119. if (atts[i] != NULL)
  8120. xmlFree((xmlChar *) atts[i]);
  8121. }
  8122. return(name);
  8123. }
  8124. /**
  8125. * xmlParseEndTag1:
  8126. * @ctxt: an XML parser context
  8127. * @line: line of the start tag
  8128. * @nsNr: number of namespaces on the start tag
  8129. *
  8130. * parse an end of tag
  8131. *
  8132. * [42] ETag ::= '</' Name S? '>'
  8133. *
  8134. * With namespace
  8135. *
  8136. * [NS 9] ETag ::= '</' QName S? '>'
  8137. */
  8138. static void
  8139. xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
  8140. const xmlChar *name;
  8141. GROW;
  8142. if ((RAW != '<') || (NXT(1) != '/')) {
  8143. xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
  8144. "xmlParseEndTag: '</' not found\n");
  8145. return;
  8146. }
  8147. SKIP(2);
  8148. name = xmlParseNameAndCompare(ctxt,ctxt->name);
  8149. /*
  8150. * We should definitely be at the ending "S? '>'" part
  8151. */
  8152. GROW;
  8153. SKIP_BLANKS;
  8154. if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
  8155. xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
  8156. } else
  8157. NEXT1;
  8158. /*
  8159. * [ WFC: Element Type Match ]
  8160. * The Name in an element's end-tag must match the element type in the
  8161. * start-tag.
  8162. *
  8163. */
  8164. if (name != (xmlChar*)1) {
  8165. if (name == NULL) name = BAD_CAST "unparsable";
  8166. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
  8167. "Opening and ending tag mismatch: %s line %d and %s\n",
  8168. ctxt->name, line, name);
  8169. }
  8170. /*
  8171. * SAX: End of Tag
  8172. */
  8173. if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
  8174. (!ctxt->disableSAX))
  8175. ctxt->sax->endElement(ctxt->userData, ctxt->name);
  8176. namePop(ctxt);
  8177. spacePop(ctxt);
  8178. return;
  8179. }
  8180. /**
  8181. * xmlParseEndTag:
  8182. * @ctxt: an XML parser context
  8183. *
  8184. * parse an end of tag
  8185. *
  8186. * [42] ETag ::= '</' Name S? '>'
  8187. *
  8188. * With namespace
  8189. *
  8190. * [NS 9] ETag ::= '</' QName S? '>'
  8191. */
  8192. void
  8193. xmlParseEndTag(xmlParserCtxtPtr ctxt) {
  8194. xmlParseEndTag1(ctxt, 0);
  8195. }
  8196. #endif /* LIBXML_SAX1_ENABLED */
  8197. /************************************************************************
  8198. * *
  8199. * SAX 2 specific operations *
  8200. * *
  8201. ************************************************************************/
  8202. /*
  8203. * xmlGetNamespace:
  8204. * @ctxt: an XML parser context
  8205. * @prefix: the prefix to lookup
  8206. *
  8207. * Lookup the namespace name for the @prefix (which ca be NULL)
  8208. * The prefix must come from the @ctxt->dict dictionary
  8209. *
  8210. * Returns the namespace name or NULL if not bound
  8211. */
  8212. static const xmlChar *
  8213. xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
  8214. int i;
  8215. if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
  8216. for (i = ctxt->nsNr - 2;i >= 0;i-=2)
  8217. if (ctxt->nsTab[i] == prefix) {
  8218. if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
  8219. return(NULL);
  8220. return(ctxt->nsTab[i + 1]);
  8221. }
  8222. return(NULL);
  8223. }
  8224. /**
  8225. * xmlParseQName:
  8226. * @ctxt: an XML parser context
  8227. * @prefix: pointer to store the prefix part
  8228. *
  8229. * parse an XML Namespace QName
  8230. *
  8231. * [6] QName ::= (Prefix ':')? LocalPart
  8232. * [7] Prefix ::= NCName
  8233. * [8] LocalPart ::= NCName
  8234. *
  8235. * Returns the Name parsed or NULL
  8236. */
  8237. static const xmlChar *
  8238. xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
  8239. const xmlChar *l, *p;
  8240. GROW;
  8241. l = xmlParseNCName(ctxt);
  8242. if (l == NULL) {
  8243. if (CUR == ':') {
  8244. l = xmlParseName(ctxt);
  8245. if (l != NULL) {
  8246. xmlNsErr(ctxt, XML_NS_ERR_QNAME,
  8247. "Failed to parse QName '%s'\n", l, NULL, NULL);
  8248. *prefix = NULL;
  8249. return(l);
  8250. }
  8251. }
  8252. return(NULL);
  8253. }
  8254. if (CUR == ':') {
  8255. NEXT;
  8256. p = l;
  8257. l = xmlParseNCName(ctxt);
  8258. if (l == NULL) {
  8259. xmlChar *tmp;
  8260. if (ctxt->instate == XML_PARSER_EOF)
  8261. return(NULL);
  8262. xmlNsErr(ctxt, XML_NS_ERR_QNAME,
  8263. "Failed to parse QName '%s:'\n", p, NULL, NULL);
  8264. l = xmlParseNmtoken(ctxt);
  8265. if (l == NULL) {
  8266. if (ctxt->instate == XML_PARSER_EOF)
  8267. return(NULL);
  8268. tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
  8269. } else {
  8270. tmp = xmlBuildQName(l, p, NULL, 0);
  8271. xmlFree((char *)l);
  8272. }
  8273. p = xmlDictLookup(ctxt->dict, tmp, -1);
  8274. if (tmp != NULL) xmlFree(tmp);
  8275. *prefix = NULL;
  8276. return(p);
  8277. }
  8278. if (CUR == ':') {
  8279. xmlChar *tmp;
  8280. xmlNsErr(ctxt, XML_NS_ERR_QNAME,
  8281. "Failed to parse QName '%s:%s:'\n", p, l, NULL);
  8282. NEXT;
  8283. tmp = (xmlChar *) xmlParseName(ctxt);
  8284. if (tmp != NULL) {
  8285. tmp = xmlBuildQName(tmp, l, NULL, 0);
  8286. l = xmlDictLookup(ctxt->dict, tmp, -1);
  8287. if (tmp != NULL) xmlFree(tmp);
  8288. *prefix = p;
  8289. return(l);
  8290. }
  8291. if (ctxt->instate == XML_PARSER_EOF)
  8292. return(NULL);
  8293. tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
  8294. l = xmlDictLookup(ctxt->dict, tmp, -1);
  8295. if (tmp != NULL) xmlFree(tmp);
  8296. *prefix = p;
  8297. return(l);
  8298. }
  8299. *prefix = p;
  8300. } else
  8301. *prefix = NULL;
  8302. return(l);
  8303. }
  8304. /**
  8305. * xmlParseQNameAndCompare:
  8306. * @ctxt: an XML parser context
  8307. * @name: the localname
  8308. * @prefix: the prefix, if any.
  8309. *
  8310. * parse an XML name and compares for match
  8311. * (specialized for endtag parsing)
  8312. *
  8313. * Returns NULL for an illegal name, (xmlChar*) 1 for success
  8314. * and the name for mismatch
  8315. */
  8316. static const xmlChar *
  8317. xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
  8318. xmlChar const *prefix) {
  8319. const xmlChar *cmp;
  8320. const xmlChar *in;
  8321. const xmlChar *ret;
  8322. const xmlChar *prefix2;
  8323. if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
  8324. GROW;
  8325. in = ctxt->input->cur;
  8326. cmp = prefix;
  8327. while (*in != 0 && *in == *cmp) {
  8328. ++in;
  8329. ++cmp;
  8330. }
  8331. if ((*cmp == 0) && (*in == ':')) {
  8332. in++;
  8333. cmp = name;
  8334. while (*in != 0 && *in == *cmp) {
  8335. ++in;
  8336. ++cmp;
  8337. }
  8338. if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
  8339. /* success */
  8340. ctxt->input->col += in - ctxt->input->cur;
  8341. ctxt->input->cur = in;
  8342. return((const xmlChar*) 1);
  8343. }
  8344. }
  8345. /*
  8346. * all strings coms from the dictionary, equality can be done directly
  8347. */
  8348. ret = xmlParseQName (ctxt, &prefix2);
  8349. if ((ret == name) && (prefix == prefix2))
  8350. return((const xmlChar*) 1);
  8351. return ret;
  8352. }
  8353. /**
  8354. * xmlParseAttValueInternal:
  8355. * @ctxt: an XML parser context
  8356. * @len: attribute len result
  8357. * @alloc: whether the attribute was reallocated as a new string
  8358. * @normalize: if 1 then further non-CDATA normalization must be done
  8359. *
  8360. * parse a value for an attribute.
  8361. * NOTE: if no normalization is needed, the routine will return pointers
  8362. * directly from the data buffer.
  8363. *
  8364. * 3.3.3 Attribute-Value Normalization:
  8365. * Before the value of an attribute is passed to the application or
  8366. * checked for validity, the XML processor must normalize it as follows:
  8367. * - a character reference is processed by appending the referenced
  8368. * character to the attribute value
  8369. * - an entity reference is processed by recursively processing the
  8370. * replacement text of the entity
  8371. * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
  8372. * appending #x20 to the normalized value, except that only a single
  8373. * #x20 is appended for a "#xD#xA" sequence that is part of an external
  8374. * parsed entity or the literal entity value of an internal parsed entity
  8375. * - other characters are processed by appending them to the normalized value
  8376. * If the declared value is not CDATA, then the XML processor must further
  8377. * process the normalized attribute value by discarding any leading and
  8378. * trailing space (#x20) characters, and by replacing sequences of space
  8379. * (#x20) characters by a single space (#x20) character.
  8380. * All attributes for which no declaration has been read should be treated
  8381. * by a non-validating parser as if declared CDATA.
  8382. *
  8383. * Returns the AttValue parsed or NULL. The value has to be freed by the
  8384. * caller if it was copied, this can be detected by val[*len] == 0.
  8385. */
  8386. #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
  8387. const xmlChar *oldbase = ctxt->input->base;\
  8388. GROW;\
  8389. if (ctxt->instate == XML_PARSER_EOF)\
  8390. return(NULL);\
  8391. if (oldbase != ctxt->input->base) {\
  8392. ptrdiff_t delta = ctxt->input->base - oldbase;\
  8393. start = start + delta;\
  8394. in = in + delta;\
  8395. }\
  8396. end = ctxt->input->end;
  8397. static xmlChar *
  8398. xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
  8399. int normalize)
  8400. {
  8401. xmlChar limit = 0;
  8402. const xmlChar *in = NULL, *start, *end, *last;
  8403. xmlChar *ret = NULL;
  8404. int line, col;
  8405. GROW;
  8406. in = (xmlChar *) CUR_PTR;
  8407. line = ctxt->input->line;
  8408. col = ctxt->input->col;
  8409. if (*in != '"' && *in != '\'') {
  8410. xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
  8411. return (NULL);
  8412. }
  8413. ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
  8414. /*
  8415. * try to handle in this routine the most common case where no
  8416. * allocation of a new string is required and where content is
  8417. * pure ASCII.
  8418. */
  8419. limit = *in++;
  8420. col++;
  8421. end = ctxt->input->end;
  8422. start = in;
  8423. if (in >= end) {
  8424. GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
  8425. }
  8426. if (normalize) {
  8427. /*
  8428. * Skip any leading spaces
  8429. */
  8430. while ((in < end) && (*in != limit) &&
  8431. ((*in == 0x20) || (*in == 0x9) ||
  8432. (*in == 0xA) || (*in == 0xD))) {
  8433. if (*in == 0xA) {
  8434. line++; col = 1;
  8435. } else {
  8436. col++;
  8437. }
  8438. in++;
  8439. start = in;
  8440. if (in >= end) {
  8441. GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
  8442. if (((in - start) > XML_MAX_TEXT_LENGTH) &&
  8443. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8444. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  8445. "AttValue length too long\n");
  8446. return(NULL);
  8447. }
  8448. }
  8449. }
  8450. while ((in < end) && (*in != limit) && (*in >= 0x20) &&
  8451. (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
  8452. col++;
  8453. if ((*in++ == 0x20) && (*in == 0x20)) break;
  8454. if (in >= end) {
  8455. GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
  8456. if (((in - start) > XML_MAX_TEXT_LENGTH) &&
  8457. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8458. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  8459. "AttValue length too long\n");
  8460. return(NULL);
  8461. }
  8462. }
  8463. }
  8464. last = in;
  8465. /*
  8466. * skip the trailing blanks
  8467. */
  8468. while ((last[-1] == 0x20) && (last > start)) last--;
  8469. while ((in < end) && (*in != limit) &&
  8470. ((*in == 0x20) || (*in == 0x9) ||
  8471. (*in == 0xA) || (*in == 0xD))) {
  8472. if (*in == 0xA) {
  8473. line++, col = 1;
  8474. } else {
  8475. col++;
  8476. }
  8477. in++;
  8478. if (in >= end) {
  8479. const xmlChar *oldbase = ctxt->input->base;
  8480. GROW;
  8481. if (ctxt->instate == XML_PARSER_EOF)
  8482. return(NULL);
  8483. if (oldbase != ctxt->input->base) {
  8484. ptrdiff_t delta = ctxt->input->base - oldbase;
  8485. start = start + delta;
  8486. in = in + delta;
  8487. last = last + delta;
  8488. }
  8489. end = ctxt->input->end;
  8490. if (((in - start) > XML_MAX_TEXT_LENGTH) &&
  8491. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8492. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  8493. "AttValue length too long\n");
  8494. return(NULL);
  8495. }
  8496. }
  8497. }
  8498. if (((in - start) > XML_MAX_TEXT_LENGTH) &&
  8499. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8500. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  8501. "AttValue length too long\n");
  8502. return(NULL);
  8503. }
  8504. if (*in != limit) goto need_complex;
  8505. } else {
  8506. while ((in < end) && (*in != limit) && (*in >= 0x20) &&
  8507. (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
  8508. in++;
  8509. col++;
  8510. if (in >= end) {
  8511. GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
  8512. if (((in - start) > XML_MAX_TEXT_LENGTH) &&
  8513. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8514. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  8515. "AttValue length too long\n");
  8516. return(NULL);
  8517. }
  8518. }
  8519. }
  8520. last = in;
  8521. if (((in - start) > XML_MAX_TEXT_LENGTH) &&
  8522. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8523. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  8524. "AttValue length too long\n");
  8525. return(NULL);
  8526. }
  8527. if (*in != limit) goto need_complex;
  8528. }
  8529. in++;
  8530. col++;
  8531. if (len != NULL) {
  8532. *len = last - start;
  8533. ret = (xmlChar *) start;
  8534. } else {
  8535. if (alloc) *alloc = 1;
  8536. ret = xmlStrndup(start, last - start);
  8537. }
  8538. CUR_PTR = in;
  8539. ctxt->input->line = line;
  8540. ctxt->input->col = col;
  8541. if (alloc) *alloc = 0;
  8542. return ret;
  8543. need_complex:
  8544. if (alloc) *alloc = 1;
  8545. return xmlParseAttValueComplex(ctxt, len, normalize);
  8546. }
  8547. /**
  8548. * xmlParseAttribute2:
  8549. * @ctxt: an XML parser context
  8550. * @pref: the element prefix
  8551. * @elem: the element name
  8552. * @prefix: a xmlChar ** used to store the value of the attribute prefix
  8553. * @value: a xmlChar ** used to store the value of the attribute
  8554. * @len: an int * to save the length of the attribute
  8555. * @alloc: an int * to indicate if the attribute was allocated
  8556. *
  8557. * parse an attribute in the new SAX2 framework.
  8558. *
  8559. * Returns the attribute name, and the value in *value, .
  8560. */
  8561. static const xmlChar *
  8562. xmlParseAttribute2(xmlParserCtxtPtr ctxt,
  8563. const xmlChar * pref, const xmlChar * elem,
  8564. const xmlChar ** prefix, xmlChar ** value,
  8565. int *len, int *alloc)
  8566. {
  8567. const xmlChar *name;
  8568. xmlChar *val, *internal_val = NULL;
  8569. int normalize = 0;
  8570. *value = NULL;
  8571. GROW;
  8572. name = xmlParseQName(ctxt, prefix);
  8573. if (name == NULL) {
  8574. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  8575. "error parsing attribute name\n");
  8576. return (NULL);
  8577. }
  8578. /*
  8579. * get the type if needed
  8580. */
  8581. if (ctxt->attsSpecial != NULL) {
  8582. int type;
  8583. type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
  8584. pref, elem, *prefix, name);
  8585. if (type != 0)
  8586. normalize = 1;
  8587. }
  8588. /*
  8589. * read the value
  8590. */
  8591. SKIP_BLANKS;
  8592. if (RAW == '=') {
  8593. NEXT;
  8594. SKIP_BLANKS;
  8595. val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
  8596. if (normalize) {
  8597. /*
  8598. * Sometimes a second normalisation pass for spaces is needed
  8599. * but that only happens if charrefs or entities references
  8600. * have been used in the attribute value, i.e. the attribute
  8601. * value have been extracted in an allocated string already.
  8602. */
  8603. if (*alloc) {
  8604. const xmlChar *val2;
  8605. val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
  8606. if ((val2 != NULL) && (val2 != val)) {
  8607. xmlFree(val);
  8608. val = (xmlChar *) val2;
  8609. }
  8610. }
  8611. }
  8612. ctxt->instate = XML_PARSER_CONTENT;
  8613. } else {
  8614. xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
  8615. "Specification mandates value for attribute %s\n",
  8616. name);
  8617. return (NULL);
  8618. }
  8619. if (*prefix == ctxt->str_xml) {
  8620. /*
  8621. * Check that xml:lang conforms to the specification
  8622. * No more registered as an error, just generate a warning now
  8623. * since this was deprecated in XML second edition
  8624. */
  8625. if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
  8626. internal_val = xmlStrndup(val, *len);
  8627. if (!xmlCheckLanguageID(internal_val)) {
  8628. xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
  8629. "Malformed value for xml:lang : %s\n",
  8630. internal_val, NULL);
  8631. }
  8632. }
  8633. /*
  8634. * Check that xml:space conforms to the specification
  8635. */
  8636. if (xmlStrEqual(name, BAD_CAST "space")) {
  8637. internal_val = xmlStrndup(val, *len);
  8638. if (xmlStrEqual(internal_val, BAD_CAST "default"))
  8639. *(ctxt->space) = 0;
  8640. else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
  8641. *(ctxt->space) = 1;
  8642. else {
  8643. xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
  8644. "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
  8645. internal_val, NULL);
  8646. }
  8647. }
  8648. if (internal_val) {
  8649. xmlFree(internal_val);
  8650. }
  8651. }
  8652. *value = val;
  8653. return (name);
  8654. }
  8655. /**
  8656. * xmlParseStartTag2:
  8657. * @ctxt: an XML parser context
  8658. *
  8659. * parse a start of tag either for rule element or
  8660. * EmptyElement. In both case we don't parse the tag closing chars.
  8661. * This routine is called when running SAX2 parsing
  8662. *
  8663. * [40] STag ::= '<' Name (S Attribute)* S? '>'
  8664. *
  8665. * [ WFC: Unique Att Spec ]
  8666. * No attribute name may appear more than once in the same start-tag or
  8667. * empty-element tag.
  8668. *
  8669. * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
  8670. *
  8671. * [ WFC: Unique Att Spec ]
  8672. * No attribute name may appear more than once in the same start-tag or
  8673. * empty-element tag.
  8674. *
  8675. * With namespace:
  8676. *
  8677. * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
  8678. *
  8679. * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
  8680. *
  8681. * Returns the element name parsed
  8682. */
  8683. static const xmlChar *
  8684. xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
  8685. const xmlChar **URI, int *tlen) {
  8686. const xmlChar *localname;
  8687. const xmlChar *prefix;
  8688. const xmlChar *attname;
  8689. const xmlChar *aprefix;
  8690. const xmlChar *nsname;
  8691. xmlChar *attvalue;
  8692. const xmlChar **atts = ctxt->atts;
  8693. int maxatts = ctxt->maxatts;
  8694. int nratts, nbatts, nbdef, inputid;
  8695. int i, j, nbNs, attval;
  8696. unsigned long cur;
  8697. int nsNr = ctxt->nsNr;
  8698. if (RAW != '<') return(NULL);
  8699. NEXT1;
  8700. /*
  8701. * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
  8702. * point since the attribute values may be stored as pointers to
  8703. * the buffer and calling SHRINK would destroy them !
  8704. * The Shrinking is only possible once the full set of attribute
  8705. * callbacks have been done.
  8706. */
  8707. SHRINK;
  8708. cur = ctxt->input->cur - ctxt->input->base;
  8709. inputid = ctxt->input->id;
  8710. nbatts = 0;
  8711. nratts = 0;
  8712. nbdef = 0;
  8713. nbNs = 0;
  8714. attval = 0;
  8715. /* Forget any namespaces added during an earlier parse of this element. */
  8716. ctxt->nsNr = nsNr;
  8717. localname = xmlParseQName(ctxt, &prefix);
  8718. if (localname == NULL) {
  8719. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  8720. "StartTag: invalid element name\n");
  8721. return(NULL);
  8722. }
  8723. *tlen = ctxt->input->cur - ctxt->input->base - cur;
  8724. /*
  8725. * Now parse the attributes, it ends up with the ending
  8726. *
  8727. * (S Attribute)* S?
  8728. */
  8729. SKIP_BLANKS;
  8730. GROW;
  8731. while (((RAW != '>') &&
  8732. ((RAW != '/') || (NXT(1) != '>')) &&
  8733. (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
  8734. const xmlChar *q = CUR_PTR;
  8735. unsigned int cons = ctxt->input->consumed;
  8736. int len = -1, alloc = 0;
  8737. attname = xmlParseAttribute2(ctxt, prefix, localname,
  8738. &aprefix, &attvalue, &len, &alloc);
  8739. if ((attname == NULL) || (attvalue == NULL))
  8740. goto next_attr;
  8741. if (len < 0) len = xmlStrlen(attvalue);
  8742. if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
  8743. const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
  8744. xmlURIPtr uri;
  8745. if (URL == NULL) {
  8746. xmlErrMemory(ctxt, "dictionary allocation failure");
  8747. if ((attvalue != NULL) && (alloc != 0))
  8748. xmlFree(attvalue);
  8749. localname = NULL;
  8750. goto done;
  8751. }
  8752. if (*URL != 0) {
  8753. uri = xmlParseURI((const char *) URL);
  8754. if (uri == NULL) {
  8755. xmlNsErr(ctxt, XML_WAR_NS_URI,
  8756. "xmlns: '%s' is not a valid URI\n",
  8757. URL, NULL, NULL);
  8758. } else {
  8759. if (uri->scheme == NULL) {
  8760. xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
  8761. "xmlns: URI %s is not absolute\n",
  8762. URL, NULL, NULL);
  8763. }
  8764. xmlFreeURI(uri);
  8765. }
  8766. if (URL == ctxt->str_xml_ns) {
  8767. if (attname != ctxt->str_xml) {
  8768. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8769. "xml namespace URI cannot be the default namespace\n",
  8770. NULL, NULL, NULL);
  8771. }
  8772. goto next_attr;
  8773. }
  8774. if ((len == 29) &&
  8775. (xmlStrEqual(URL,
  8776. BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
  8777. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8778. "reuse of the xmlns namespace name is forbidden\n",
  8779. NULL, NULL, NULL);
  8780. goto next_attr;
  8781. }
  8782. }
  8783. /*
  8784. * check that it's not a defined namespace
  8785. */
  8786. for (j = 1;j <= nbNs;j++)
  8787. if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
  8788. break;
  8789. if (j <= nbNs)
  8790. xmlErrAttributeDup(ctxt, NULL, attname);
  8791. else
  8792. if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
  8793. } else if (aprefix == ctxt->str_xmlns) {
  8794. const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
  8795. xmlURIPtr uri;
  8796. if (attname == ctxt->str_xml) {
  8797. if (URL != ctxt->str_xml_ns) {
  8798. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8799. "xml namespace prefix mapped to wrong URI\n",
  8800. NULL, NULL, NULL);
  8801. }
  8802. /*
  8803. * Do not keep a namespace definition node
  8804. */
  8805. goto next_attr;
  8806. }
  8807. if (URL == ctxt->str_xml_ns) {
  8808. if (attname != ctxt->str_xml) {
  8809. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8810. "xml namespace URI mapped to wrong prefix\n",
  8811. NULL, NULL, NULL);
  8812. }
  8813. goto next_attr;
  8814. }
  8815. if (attname == ctxt->str_xmlns) {
  8816. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8817. "redefinition of the xmlns prefix is forbidden\n",
  8818. NULL, NULL, NULL);
  8819. goto next_attr;
  8820. }
  8821. if ((len == 29) &&
  8822. (xmlStrEqual(URL,
  8823. BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
  8824. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8825. "reuse of the xmlns namespace name is forbidden\n",
  8826. NULL, NULL, NULL);
  8827. goto next_attr;
  8828. }
  8829. if ((URL == NULL) || (URL[0] == 0)) {
  8830. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8831. "xmlns:%s: Empty XML namespace is not allowed\n",
  8832. attname, NULL, NULL);
  8833. goto next_attr;
  8834. } else {
  8835. uri = xmlParseURI((const char *) URL);
  8836. if (uri == NULL) {
  8837. xmlNsErr(ctxt, XML_WAR_NS_URI,
  8838. "xmlns:%s: '%s' is not a valid URI\n",
  8839. attname, URL, NULL);
  8840. } else {
  8841. if ((ctxt->pedantic) && (uri->scheme == NULL)) {
  8842. xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
  8843. "xmlns:%s: URI %s is not absolute\n",
  8844. attname, URL, NULL);
  8845. }
  8846. xmlFreeURI(uri);
  8847. }
  8848. }
  8849. /*
  8850. * check that it's not a defined namespace
  8851. */
  8852. for (j = 1;j <= nbNs;j++)
  8853. if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
  8854. break;
  8855. if (j <= nbNs)
  8856. xmlErrAttributeDup(ctxt, aprefix, attname);
  8857. else
  8858. if (nsPush(ctxt, attname, URL) > 0) nbNs++;
  8859. } else {
  8860. /*
  8861. * Add the pair to atts
  8862. */
  8863. if ((atts == NULL) || (nbatts + 5 > maxatts)) {
  8864. if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
  8865. goto next_attr;
  8866. }
  8867. maxatts = ctxt->maxatts;
  8868. atts = ctxt->atts;
  8869. }
  8870. ctxt->attallocs[nratts++] = alloc;
  8871. atts[nbatts++] = attname;
  8872. atts[nbatts++] = aprefix;
  8873. /*
  8874. * The namespace URI field is used temporarily to point at the
  8875. * base of the current input buffer for non-alloced attributes.
  8876. * When the input buffer is reallocated, all the pointers become
  8877. * invalid, but they can be reconstructed later.
  8878. */
  8879. if (alloc)
  8880. atts[nbatts++] = NULL;
  8881. else
  8882. atts[nbatts++] = ctxt->input->base;
  8883. atts[nbatts++] = attvalue;
  8884. attvalue += len;
  8885. atts[nbatts++] = attvalue;
  8886. /*
  8887. * tag if some deallocation is needed
  8888. */
  8889. if (alloc != 0) attval = 1;
  8890. attvalue = NULL; /* moved into atts */
  8891. }
  8892. next_attr:
  8893. if ((attvalue != NULL) && (alloc != 0)) {
  8894. xmlFree(attvalue);
  8895. attvalue = NULL;
  8896. }
  8897. GROW
  8898. if (ctxt->instate == XML_PARSER_EOF)
  8899. break;
  8900. if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
  8901. break;
  8902. if (SKIP_BLANKS == 0) {
  8903. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  8904. "attributes construct error\n");
  8905. break;
  8906. }
  8907. if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
  8908. (attname == NULL) && (attvalue == NULL)) {
  8909. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  8910. "xmlParseStartTag: problem parsing attributes\n");
  8911. break;
  8912. }
  8913. GROW;
  8914. }
  8915. if (ctxt->input->id != inputid) {
  8916. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  8917. "Unexpected change of input\n");
  8918. localname = NULL;
  8919. goto done;
  8920. }
  8921. /* Reconstruct attribute value pointers. */
  8922. for (i = 0, j = 0; j < nratts; i += 5, j++) {
  8923. if (atts[i+2] != NULL) {
  8924. /*
  8925. * Arithmetic on dangling pointers is technically undefined
  8926. * behavior, but well...
  8927. */
  8928. ptrdiff_t offset = ctxt->input->base - atts[i+2];
  8929. atts[i+2] = NULL; /* Reset repurposed namespace URI */
  8930. atts[i+3] += offset; /* value */
  8931. atts[i+4] += offset; /* valuend */
  8932. }
  8933. }
  8934. /*
  8935. * The attributes defaulting
  8936. */
  8937. if (ctxt->attsDefault != NULL) {
  8938. xmlDefAttrsPtr defaults;
  8939. defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
  8940. if (defaults != NULL) {
  8941. for (i = 0;i < defaults->nbAttrs;i++) {
  8942. attname = defaults->values[5 * i];
  8943. aprefix = defaults->values[5 * i + 1];
  8944. /*
  8945. * special work for namespaces defaulted defs
  8946. */
  8947. if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
  8948. /*
  8949. * check that it's not a defined namespace
  8950. */
  8951. for (j = 1;j <= nbNs;j++)
  8952. if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
  8953. break;
  8954. if (j <= nbNs) continue;
  8955. nsname = xmlGetNamespace(ctxt, NULL);
  8956. if (nsname != defaults->values[5 * i + 2]) {
  8957. if (nsPush(ctxt, NULL,
  8958. defaults->values[5 * i + 2]) > 0)
  8959. nbNs++;
  8960. }
  8961. } else if (aprefix == ctxt->str_xmlns) {
  8962. /*
  8963. * check that it's not a defined namespace
  8964. */
  8965. for (j = 1;j <= nbNs;j++)
  8966. if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
  8967. break;
  8968. if (j <= nbNs) continue;
  8969. nsname = xmlGetNamespace(ctxt, attname);
  8970. if (nsname != defaults->values[2]) {
  8971. if (nsPush(ctxt, attname,
  8972. defaults->values[5 * i + 2]) > 0)
  8973. nbNs++;
  8974. }
  8975. } else {
  8976. /*
  8977. * check that it's not a defined attribute
  8978. */
  8979. for (j = 0;j < nbatts;j+=5) {
  8980. if ((attname == atts[j]) && (aprefix == atts[j+1]))
  8981. break;
  8982. }
  8983. if (j < nbatts) continue;
  8984. if ((atts == NULL) || (nbatts + 5 > maxatts)) {
  8985. if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
  8986. localname = NULL;
  8987. goto done;
  8988. }
  8989. maxatts = ctxt->maxatts;
  8990. atts = ctxt->atts;
  8991. }
  8992. atts[nbatts++] = attname;
  8993. atts[nbatts++] = aprefix;
  8994. if (aprefix == NULL)
  8995. atts[nbatts++] = NULL;
  8996. else
  8997. atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
  8998. atts[nbatts++] = defaults->values[5 * i + 2];
  8999. atts[nbatts++] = defaults->values[5 * i + 3];
  9000. if ((ctxt->standalone == 1) &&
  9001. (defaults->values[5 * i + 4] != NULL)) {
  9002. xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
  9003. "standalone: attribute %s on %s defaulted from external subset\n",
  9004. attname, localname);
  9005. }
  9006. nbdef++;
  9007. }
  9008. }
  9009. }
  9010. }
  9011. /*
  9012. * The attributes checkings
  9013. */
  9014. for (i = 0; i < nbatts;i += 5) {
  9015. /*
  9016. * The default namespace does not apply to attribute names.
  9017. */
  9018. if (atts[i + 1] != NULL) {
  9019. nsname = xmlGetNamespace(ctxt, atts[i + 1]);
  9020. if (nsname == NULL) {
  9021. xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
  9022. "Namespace prefix %s for %s on %s is not defined\n",
  9023. atts[i + 1], atts[i], localname);
  9024. }
  9025. atts[i + 2] = nsname;
  9026. } else
  9027. nsname = NULL;
  9028. /*
  9029. * [ WFC: Unique Att Spec ]
  9030. * No attribute name may appear more than once in the same
  9031. * start-tag or empty-element tag.
  9032. * As extended by the Namespace in XML REC.
  9033. */
  9034. for (j = 0; j < i;j += 5) {
  9035. if (atts[i] == atts[j]) {
  9036. if (atts[i+1] == atts[j+1]) {
  9037. xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
  9038. break;
  9039. }
  9040. if ((nsname != NULL) && (atts[j + 2] == nsname)) {
  9041. xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
  9042. "Namespaced Attribute %s in '%s' redefined\n",
  9043. atts[i], nsname, NULL);
  9044. break;
  9045. }
  9046. }
  9047. }
  9048. }
  9049. nsname = xmlGetNamespace(ctxt, prefix);
  9050. if ((prefix != NULL) && (nsname == NULL)) {
  9051. xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
  9052. "Namespace prefix %s on %s is not defined\n",
  9053. prefix, localname, NULL);
  9054. }
  9055. *pref = prefix;
  9056. *URI = nsname;
  9057. /*
  9058. * SAX: Start of Element !
  9059. */
  9060. if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
  9061. (!ctxt->disableSAX)) {
  9062. if (nbNs > 0)
  9063. ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
  9064. nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
  9065. nbatts / 5, nbdef, atts);
  9066. else
  9067. ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
  9068. nsname, 0, NULL, nbatts / 5, nbdef, atts);
  9069. }
  9070. done:
  9071. /*
  9072. * Free up attribute allocated strings if needed
  9073. */
  9074. if (attval != 0) {
  9075. for (i = 3,j = 0; j < nratts;i += 5,j++)
  9076. if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
  9077. xmlFree((xmlChar *) atts[i]);
  9078. }
  9079. return(localname);
  9080. }
  9081. /**
  9082. * xmlParseEndTag2:
  9083. * @ctxt: an XML parser context
  9084. * @line: line of the start tag
  9085. * @nsNr: number of namespaces on the start tag
  9086. *
  9087. * parse an end of tag
  9088. *
  9089. * [42] ETag ::= '</' Name S? '>'
  9090. *
  9091. * With namespace
  9092. *
  9093. * [NS 9] ETag ::= '</' QName S? '>'
  9094. */
  9095. static void
  9096. xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
  9097. const xmlChar *name;
  9098. GROW;
  9099. if ((RAW != '<') || (NXT(1) != '/')) {
  9100. xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
  9101. return;
  9102. }
  9103. SKIP(2);
  9104. if (tag->prefix == NULL)
  9105. name = xmlParseNameAndCompare(ctxt, ctxt->name);
  9106. else
  9107. name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
  9108. /*
  9109. * We should definitely be at the ending "S? '>'" part
  9110. */
  9111. GROW;
  9112. if (ctxt->instate == XML_PARSER_EOF)
  9113. return;
  9114. SKIP_BLANKS;
  9115. if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
  9116. xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
  9117. } else
  9118. NEXT1;
  9119. /*
  9120. * [ WFC: Element Type Match ]
  9121. * The Name in an element's end-tag must match the element type in the
  9122. * start-tag.
  9123. *
  9124. */
  9125. if (name != (xmlChar*)1) {
  9126. if (name == NULL) name = BAD_CAST "unparsable";
  9127. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
  9128. "Opening and ending tag mismatch: %s line %d and %s\n",
  9129. ctxt->name, tag->line, name);
  9130. }
  9131. /*
  9132. * SAX: End of Tag
  9133. */
  9134. if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
  9135. (!ctxt->disableSAX))
  9136. ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
  9137. tag->URI);
  9138. spacePop(ctxt);
  9139. if (tag->nsNr != 0)
  9140. nsPop(ctxt, tag->nsNr);
  9141. }
  9142. /**
  9143. * xmlParseCDSect:
  9144. * @ctxt: an XML parser context
  9145. *
  9146. * Parse escaped pure raw content.
  9147. *
  9148. * [18] CDSect ::= CDStart CData CDEnd
  9149. *
  9150. * [19] CDStart ::= '<![CDATA['
  9151. *
  9152. * [20] Data ::= (Char* - (Char* ']]>' Char*))
  9153. *
  9154. * [21] CDEnd ::= ']]>'
  9155. */
  9156. void
  9157. xmlParseCDSect(xmlParserCtxtPtr ctxt) {
  9158. xmlChar *buf = NULL;
  9159. int len = 0;
  9160. int size = XML_PARSER_BUFFER_SIZE;
  9161. int r, rl;
  9162. int s, sl;
  9163. int cur, l;
  9164. int count = 0;
  9165. /* Check 2.6.0 was NXT(0) not RAW */
  9166. if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
  9167. SKIP(9);
  9168. } else
  9169. return;
  9170. ctxt->instate = XML_PARSER_CDATA_SECTION;
  9171. r = CUR_CHAR(rl);
  9172. if (!IS_CHAR(r)) {
  9173. xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
  9174. ctxt->instate = XML_PARSER_CONTENT;
  9175. return;
  9176. }
  9177. NEXTL(rl);
  9178. s = CUR_CHAR(sl);
  9179. if (!IS_CHAR(s)) {
  9180. xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
  9181. ctxt->instate = XML_PARSER_CONTENT;
  9182. return;
  9183. }
  9184. NEXTL(sl);
  9185. cur = CUR_CHAR(l);
  9186. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  9187. if (buf == NULL) {
  9188. xmlErrMemory(ctxt, NULL);
  9189. return;
  9190. }
  9191. while (IS_CHAR(cur) &&
  9192. ((r != ']') || (s != ']') || (cur != '>'))) {
  9193. if (len + 5 >= size) {
  9194. xmlChar *tmp;
  9195. if ((size > XML_MAX_TEXT_LENGTH) &&
  9196. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  9197. xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
  9198. "CData section too big found", NULL);
  9199. xmlFree (buf);
  9200. return;
  9201. }
  9202. tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
  9203. if (tmp == NULL) {
  9204. xmlFree(buf);
  9205. xmlErrMemory(ctxt, NULL);
  9206. return;
  9207. }
  9208. buf = tmp;
  9209. size *= 2;
  9210. }
  9211. COPY_BUF(rl,buf,len,r);
  9212. r = s;
  9213. rl = sl;
  9214. s = cur;
  9215. sl = l;
  9216. count++;
  9217. if (count > 50) {
  9218. SHRINK;
  9219. GROW;
  9220. if (ctxt->instate == XML_PARSER_EOF) {
  9221. xmlFree(buf);
  9222. return;
  9223. }
  9224. count = 0;
  9225. }
  9226. NEXTL(l);
  9227. cur = CUR_CHAR(l);
  9228. }
  9229. buf[len] = 0;
  9230. ctxt->instate = XML_PARSER_CONTENT;
  9231. if (cur != '>') {
  9232. xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
  9233. "CData section not finished\n%.50s\n", buf);
  9234. xmlFree(buf);
  9235. return;
  9236. }
  9237. NEXTL(l);
  9238. /*
  9239. * OK the buffer is to be consumed as cdata.
  9240. */
  9241. if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
  9242. if (ctxt->sax->cdataBlock != NULL)
  9243. ctxt->sax->cdataBlock(ctxt->userData, buf, len);
  9244. else if (ctxt->sax->characters != NULL)
  9245. ctxt->sax->characters(ctxt->userData, buf, len);
  9246. }
  9247. xmlFree(buf);
  9248. }
  9249. /**
  9250. * xmlParseContentInternal:
  9251. * @ctxt: an XML parser context
  9252. *
  9253. * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
  9254. * unexpected EOF to the caller.
  9255. */
  9256. static void
  9257. xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
  9258. int nameNr = ctxt->nameNr;
  9259. GROW;
  9260. while ((RAW != 0) &&
  9261. (ctxt->instate != XML_PARSER_EOF)) {
  9262. const xmlChar *test = CUR_PTR;
  9263. unsigned int cons = ctxt->input->consumed;
  9264. const xmlChar *cur = ctxt->input->cur;
  9265. /*
  9266. * First case : a Processing Instruction.
  9267. */
  9268. if ((*cur == '<') && (cur[1] == '?')) {
  9269. xmlParsePI(ctxt);
  9270. }
  9271. /*
  9272. * Second case : a CDSection
  9273. */
  9274. /* 2.6.0 test was *cur not RAW */
  9275. else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
  9276. xmlParseCDSect(ctxt);
  9277. }
  9278. /*
  9279. * Third case : a comment
  9280. */
  9281. else if ((*cur == '<') && (NXT(1) == '!') &&
  9282. (NXT(2) == '-') && (NXT(3) == '-')) {
  9283. xmlParseComment(ctxt);
  9284. ctxt->instate = XML_PARSER_CONTENT;
  9285. }
  9286. /*
  9287. * Fourth case : a sub-element.
  9288. */
  9289. else if (*cur == '<') {
  9290. if (NXT(1) == '/') {
  9291. if (ctxt->nameNr <= nameNr)
  9292. break;
  9293. xmlParseElementEnd(ctxt);
  9294. } else {
  9295. xmlParseElementStart(ctxt);
  9296. }
  9297. }
  9298. /*
  9299. * Fifth case : a reference. If if has not been resolved,
  9300. * parsing returns it's Name, create the node
  9301. */
  9302. else if (*cur == '&') {
  9303. xmlParseReference(ctxt);
  9304. }
  9305. /*
  9306. * Last case, text. Note that References are handled directly.
  9307. */
  9308. else {
  9309. xmlParseCharData(ctxt, 0);
  9310. }
  9311. GROW;
  9312. SHRINK;
  9313. if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
  9314. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  9315. "detected an error in element content\n");
  9316. xmlHaltParser(ctxt);
  9317. break;
  9318. }
  9319. }
  9320. }
  9321. /**
  9322. * xmlParseContent:
  9323. * @ctxt: an XML parser context
  9324. *
  9325. * Parse a content sequence. Stops at EOF or '</'.
  9326. *
  9327. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  9328. */
  9329. void
  9330. xmlParseContent(xmlParserCtxtPtr ctxt) {
  9331. int nameNr = ctxt->nameNr;
  9332. xmlParseContentInternal(ctxt);
  9333. if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
  9334. const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
  9335. int line = ctxt->pushTab[ctxt->nameNr - 1].line;
  9336. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
  9337. "Premature end of data in tag %s line %d\n",
  9338. name, line, NULL);
  9339. }
  9340. }
  9341. /**
  9342. * xmlParseElement:
  9343. * @ctxt: an XML parser context
  9344. *
  9345. * parse an XML element
  9346. *
  9347. * [39] element ::= EmptyElemTag | STag content ETag
  9348. *
  9349. * [ WFC: Element Type Match ]
  9350. * The Name in an element's end-tag must match the element type in the
  9351. * start-tag.
  9352. *
  9353. */
  9354. void
  9355. xmlParseElement(xmlParserCtxtPtr ctxt) {
  9356. if (xmlParseElementStart(ctxt) != 0)
  9357. return;
  9358. xmlParseContentInternal(ctxt);
  9359. if (ctxt->instate == XML_PARSER_EOF)
  9360. return;
  9361. if (CUR == 0) {
  9362. const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
  9363. int line = ctxt->pushTab[ctxt->nameNr - 1].line;
  9364. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
  9365. "Premature end of data in tag %s line %d\n",
  9366. name, line, NULL);
  9367. return;
  9368. }
  9369. xmlParseElementEnd(ctxt);
  9370. }
  9371. /**
  9372. * xmlParseElementStart:
  9373. * @ctxt: an XML parser context
  9374. *
  9375. * Parse the start of an XML element. Returns -1 in case of error, 0 if an
  9376. * opening tag was parsed, 1 if an empty element was parsed.
  9377. */
  9378. static int
  9379. xmlParseElementStart(xmlParserCtxtPtr ctxt) {
  9380. const xmlChar *name;
  9381. const xmlChar *prefix = NULL;
  9382. const xmlChar *URI = NULL;
  9383. xmlParserNodeInfo node_info;
  9384. int line, tlen = 0;
  9385. xmlNodePtr ret;
  9386. int nsNr = ctxt->nsNr;
  9387. if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
  9388. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  9389. xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
  9390. "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
  9391. xmlParserMaxDepth);
  9392. xmlHaltParser(ctxt);
  9393. return(-1);
  9394. }
  9395. /* Capture start position */
  9396. if (ctxt->record_info) {
  9397. node_info.begin_pos = ctxt->input->consumed +
  9398. (CUR_PTR - ctxt->input->base);
  9399. node_info.begin_line = ctxt->input->line;
  9400. }
  9401. if (ctxt->spaceNr == 0)
  9402. spacePush(ctxt, -1);
  9403. else if (*ctxt->space == -2)
  9404. spacePush(ctxt, -1);
  9405. else
  9406. spacePush(ctxt, *ctxt->space);
  9407. line = ctxt->input->line;
  9408. #ifdef LIBXML_SAX1_ENABLED
  9409. if (ctxt->sax2)
  9410. #endif /* LIBXML_SAX1_ENABLED */
  9411. name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
  9412. #ifdef LIBXML_SAX1_ENABLED
  9413. else
  9414. name = xmlParseStartTag(ctxt);
  9415. #endif /* LIBXML_SAX1_ENABLED */
  9416. if (ctxt->instate == XML_PARSER_EOF)
  9417. return(-1);
  9418. if (name == NULL) {
  9419. spacePop(ctxt);
  9420. return(-1);
  9421. }
  9422. nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
  9423. ret = ctxt->node;
  9424. #ifdef LIBXML_VALID_ENABLED
  9425. /*
  9426. * [ VC: Root Element Type ]
  9427. * The Name in the document type declaration must match the element
  9428. * type of the root element.
  9429. */
  9430. if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
  9431. ctxt->node && (ctxt->node == ctxt->myDoc->children))
  9432. ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
  9433. #endif /* LIBXML_VALID_ENABLED */
  9434. /*
  9435. * Check for an Empty Element.
  9436. */
  9437. if ((RAW == '/') && (NXT(1) == '>')) {
  9438. SKIP(2);
  9439. if (ctxt->sax2) {
  9440. if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
  9441. (!ctxt->disableSAX))
  9442. ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
  9443. #ifdef LIBXML_SAX1_ENABLED
  9444. } else {
  9445. if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
  9446. (!ctxt->disableSAX))
  9447. ctxt->sax->endElement(ctxt->userData, name);
  9448. #endif /* LIBXML_SAX1_ENABLED */
  9449. }
  9450. namePop(ctxt);
  9451. spacePop(ctxt);
  9452. if (nsNr != ctxt->nsNr)
  9453. nsPop(ctxt, ctxt->nsNr - nsNr);
  9454. if ( ret != NULL && ctxt->record_info ) {
  9455. node_info.end_pos = ctxt->input->consumed +
  9456. (CUR_PTR - ctxt->input->base);
  9457. node_info.end_line = ctxt->input->line;
  9458. node_info.node = ret;
  9459. xmlParserAddNodeInfo(ctxt, &node_info);
  9460. }
  9461. return(1);
  9462. }
  9463. if (RAW == '>') {
  9464. NEXT1;
  9465. } else {
  9466. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
  9467. "Couldn't find end of Start Tag %s line %d\n",
  9468. name, line, NULL);
  9469. /*
  9470. * end of parsing of this node.
  9471. */
  9472. nodePop(ctxt);
  9473. namePop(ctxt);
  9474. spacePop(ctxt);
  9475. if (nsNr != ctxt->nsNr)
  9476. nsPop(ctxt, ctxt->nsNr - nsNr);
  9477. /*
  9478. * Capture end position and add node
  9479. */
  9480. if ( ret != NULL && ctxt->record_info ) {
  9481. node_info.end_pos = ctxt->input->consumed +
  9482. (CUR_PTR - ctxt->input->base);
  9483. node_info.end_line = ctxt->input->line;
  9484. node_info.node = ret;
  9485. xmlParserAddNodeInfo(ctxt, &node_info);
  9486. }
  9487. return(-1);
  9488. }
  9489. return(0);
  9490. }
  9491. /**
  9492. * xmlParseElementEnd:
  9493. * @ctxt: an XML parser context
  9494. *
  9495. * Parse the end of an XML element.
  9496. */
  9497. static void
  9498. xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
  9499. xmlParserNodeInfo node_info;
  9500. xmlNodePtr ret = ctxt->node;
  9501. if (ctxt->nameNr <= 0)
  9502. return;
  9503. /*
  9504. * parse the end of tag: '</' should be here.
  9505. */
  9506. if (ctxt->sax2) {
  9507. xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
  9508. namePop(ctxt);
  9509. }
  9510. #ifdef LIBXML_SAX1_ENABLED
  9511. else
  9512. xmlParseEndTag1(ctxt, 0);
  9513. #endif /* LIBXML_SAX1_ENABLED */
  9514. /*
  9515. * Capture end position and add node
  9516. */
  9517. if ( ret != NULL && ctxt->record_info ) {
  9518. node_info.end_pos = ctxt->input->consumed +
  9519. (CUR_PTR - ctxt->input->base);
  9520. node_info.end_line = ctxt->input->line;
  9521. node_info.node = ret;
  9522. xmlParserAddNodeInfo(ctxt, &node_info);
  9523. }
  9524. }
  9525. /**
  9526. * xmlParseVersionNum:
  9527. * @ctxt: an XML parser context
  9528. *
  9529. * parse the XML version value.
  9530. *
  9531. * [26] VersionNum ::= '1.' [0-9]+
  9532. *
  9533. * In practice allow [0-9].[0-9]+ at that level
  9534. *
  9535. * Returns the string giving the XML version number, or NULL
  9536. */
  9537. xmlChar *
  9538. xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
  9539. xmlChar *buf = NULL;
  9540. int len = 0;
  9541. int size = 10;
  9542. xmlChar cur;
  9543. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  9544. if (buf == NULL) {
  9545. xmlErrMemory(ctxt, NULL);
  9546. return(NULL);
  9547. }
  9548. cur = CUR;
  9549. if (!((cur >= '0') && (cur <= '9'))) {
  9550. xmlFree(buf);
  9551. return(NULL);
  9552. }
  9553. buf[len++] = cur;
  9554. NEXT;
  9555. cur=CUR;
  9556. if (cur != '.') {
  9557. xmlFree(buf);
  9558. return(NULL);
  9559. }
  9560. buf[len++] = cur;
  9561. NEXT;
  9562. cur=CUR;
  9563. while ((cur >= '0') && (cur <= '9')) {
  9564. if (len + 1 >= size) {
  9565. xmlChar *tmp;
  9566. size *= 2;
  9567. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  9568. if (tmp == NULL) {
  9569. xmlFree(buf);
  9570. xmlErrMemory(ctxt, NULL);
  9571. return(NULL);
  9572. }
  9573. buf = tmp;
  9574. }
  9575. buf[len++] = cur;
  9576. NEXT;
  9577. cur=CUR;
  9578. }
  9579. buf[len] = 0;
  9580. return(buf);
  9581. }
  9582. /**
  9583. * xmlParseVersionInfo:
  9584. * @ctxt: an XML parser context
  9585. *
  9586. * parse the XML version.
  9587. *
  9588. * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
  9589. *
  9590. * [25] Eq ::= S? '=' S?
  9591. *
  9592. * Returns the version string, e.g. "1.0"
  9593. */
  9594. xmlChar *
  9595. xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
  9596. xmlChar *version = NULL;
  9597. if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
  9598. SKIP(7);
  9599. SKIP_BLANKS;
  9600. if (RAW != '=') {
  9601. xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
  9602. return(NULL);
  9603. }
  9604. NEXT;
  9605. SKIP_BLANKS;
  9606. if (RAW == '"') {
  9607. NEXT;
  9608. version = xmlParseVersionNum(ctxt);
  9609. if (RAW != '"') {
  9610. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9611. } else
  9612. NEXT;
  9613. } else if (RAW == '\''){
  9614. NEXT;
  9615. version = xmlParseVersionNum(ctxt);
  9616. if (RAW != '\'') {
  9617. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9618. } else
  9619. NEXT;
  9620. } else {
  9621. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
  9622. }
  9623. }
  9624. return(version);
  9625. }
  9626. /**
  9627. * xmlParseEncName:
  9628. * @ctxt: an XML parser context
  9629. *
  9630. * parse the XML encoding name
  9631. *
  9632. * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  9633. *
  9634. * Returns the encoding name value or NULL
  9635. */
  9636. xmlChar *
  9637. xmlParseEncName(xmlParserCtxtPtr ctxt) {
  9638. xmlChar *buf = NULL;
  9639. int len = 0;
  9640. int size = 10;
  9641. xmlChar cur;
  9642. cur = CUR;
  9643. if (((cur >= 'a') && (cur <= 'z')) ||
  9644. ((cur >= 'A') && (cur <= 'Z'))) {
  9645. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  9646. if (buf == NULL) {
  9647. xmlErrMemory(ctxt, NULL);
  9648. return(NULL);
  9649. }
  9650. buf[len++] = cur;
  9651. NEXT;
  9652. cur = CUR;
  9653. while (((cur >= 'a') && (cur <= 'z')) ||
  9654. ((cur >= 'A') && (cur <= 'Z')) ||
  9655. ((cur >= '0') && (cur <= '9')) ||
  9656. (cur == '.') || (cur == '_') ||
  9657. (cur == '-')) {
  9658. if (len + 1 >= size) {
  9659. xmlChar *tmp;
  9660. size *= 2;
  9661. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  9662. if (tmp == NULL) {
  9663. xmlErrMemory(ctxt, NULL);
  9664. xmlFree(buf);
  9665. return(NULL);
  9666. }
  9667. buf = tmp;
  9668. }
  9669. buf[len++] = cur;
  9670. NEXT;
  9671. cur = CUR;
  9672. if (cur == 0) {
  9673. SHRINK;
  9674. GROW;
  9675. cur = CUR;
  9676. }
  9677. }
  9678. buf[len] = 0;
  9679. } else {
  9680. xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
  9681. }
  9682. return(buf);
  9683. }
  9684. /**
  9685. * xmlParseEncodingDecl:
  9686. * @ctxt: an XML parser context
  9687. *
  9688. * parse the XML encoding declaration
  9689. *
  9690. * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
  9691. *
  9692. * this setups the conversion filters.
  9693. *
  9694. * Returns the encoding value or NULL
  9695. */
  9696. const xmlChar *
  9697. xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
  9698. xmlChar *encoding = NULL;
  9699. SKIP_BLANKS;
  9700. if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
  9701. SKIP(8);
  9702. SKIP_BLANKS;
  9703. if (RAW != '=') {
  9704. xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
  9705. return(NULL);
  9706. }
  9707. NEXT;
  9708. SKIP_BLANKS;
  9709. if (RAW == '"') {
  9710. NEXT;
  9711. encoding = xmlParseEncName(ctxt);
  9712. if (RAW != '"') {
  9713. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9714. xmlFree((xmlChar *) encoding);
  9715. return(NULL);
  9716. } else
  9717. NEXT;
  9718. } else if (RAW == '\''){
  9719. NEXT;
  9720. encoding = xmlParseEncName(ctxt);
  9721. if (RAW != '\'') {
  9722. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9723. xmlFree((xmlChar *) encoding);
  9724. return(NULL);
  9725. } else
  9726. NEXT;
  9727. } else {
  9728. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
  9729. }
  9730. /*
  9731. * Non standard parsing, allowing the user to ignore encoding
  9732. */
  9733. if (ctxt->options & XML_PARSE_IGNORE_ENC) {
  9734. xmlFree((xmlChar *) encoding);
  9735. return(NULL);
  9736. }
  9737. /*
  9738. * UTF-16 encoding switch has already taken place at this stage,
  9739. * more over the little-endian/big-endian selection is already done
  9740. */
  9741. if ((encoding != NULL) &&
  9742. ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
  9743. (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
  9744. /*
  9745. * If no encoding was passed to the parser, that we are
  9746. * using UTF-16 and no decoder is present i.e. the
  9747. * document is apparently UTF-8 compatible, then raise an
  9748. * encoding mismatch fatal error
  9749. */
  9750. if ((ctxt->encoding == NULL) &&
  9751. (ctxt->input->buf != NULL) &&
  9752. (ctxt->input->buf->encoder == NULL)) {
  9753. xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
  9754. "Document labelled UTF-16 but has UTF-8 content\n");
  9755. }
  9756. if (ctxt->encoding != NULL)
  9757. xmlFree((xmlChar *) ctxt->encoding);
  9758. ctxt->encoding = encoding;
  9759. }
  9760. /*
  9761. * UTF-8 encoding is handled natively
  9762. */
  9763. else if ((encoding != NULL) &&
  9764. ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
  9765. (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
  9766. if (ctxt->encoding != NULL)
  9767. xmlFree((xmlChar *) ctxt->encoding);
  9768. ctxt->encoding = encoding;
  9769. }
  9770. else if (encoding != NULL) {
  9771. xmlCharEncodingHandlerPtr handler;
  9772. if (ctxt->input->encoding != NULL)
  9773. xmlFree((xmlChar *) ctxt->input->encoding);
  9774. ctxt->input->encoding = encoding;
  9775. handler = xmlFindCharEncodingHandler((const char *) encoding);
  9776. if (handler != NULL) {
  9777. if (xmlSwitchToEncoding(ctxt, handler) < 0) {
  9778. /* failed to convert */
  9779. ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
  9780. return(NULL);
  9781. }
  9782. } else {
  9783. xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
  9784. "Unsupported encoding %s\n", encoding);
  9785. return(NULL);
  9786. }
  9787. }
  9788. }
  9789. return(encoding);
  9790. }
  9791. /**
  9792. * xmlParseSDDecl:
  9793. * @ctxt: an XML parser context
  9794. *
  9795. * parse the XML standalone declaration
  9796. *
  9797. * [32] SDDecl ::= S 'standalone' Eq
  9798. * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
  9799. *
  9800. * [ VC: Standalone Document Declaration ]
  9801. * TODO The standalone document declaration must have the value "no"
  9802. * if any external markup declarations contain declarations of:
  9803. * - attributes with default values, if elements to which these
  9804. * attributes apply appear in the document without specifications
  9805. * of values for these attributes, or
  9806. * - entities (other than amp, lt, gt, apos, quot), if references
  9807. * to those entities appear in the document, or
  9808. * - attributes with values subject to normalization, where the
  9809. * attribute appears in the document with a value which will change
  9810. * as a result of normalization, or
  9811. * - element types with element content, if white space occurs directly
  9812. * within any instance of those types.
  9813. *
  9814. * Returns:
  9815. * 1 if standalone="yes"
  9816. * 0 if standalone="no"
  9817. * -2 if standalone attribute is missing or invalid
  9818. * (A standalone value of -2 means that the XML declaration was found,
  9819. * but no value was specified for the standalone attribute).
  9820. */
  9821. int
  9822. xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
  9823. int standalone = -2;
  9824. SKIP_BLANKS;
  9825. if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
  9826. SKIP(10);
  9827. SKIP_BLANKS;
  9828. if (RAW != '=') {
  9829. xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
  9830. return(standalone);
  9831. }
  9832. NEXT;
  9833. SKIP_BLANKS;
  9834. if (RAW == '\''){
  9835. NEXT;
  9836. if ((RAW == 'n') && (NXT(1) == 'o')) {
  9837. standalone = 0;
  9838. SKIP(2);
  9839. } else if ((RAW == 'y') && (NXT(1) == 'e') &&
  9840. (NXT(2) == 's')) {
  9841. standalone = 1;
  9842. SKIP(3);
  9843. } else {
  9844. xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
  9845. }
  9846. if (RAW != '\'') {
  9847. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9848. } else
  9849. NEXT;
  9850. } else if (RAW == '"'){
  9851. NEXT;
  9852. if ((RAW == 'n') && (NXT(1) == 'o')) {
  9853. standalone = 0;
  9854. SKIP(2);
  9855. } else if ((RAW == 'y') && (NXT(1) == 'e') &&
  9856. (NXT(2) == 's')) {
  9857. standalone = 1;
  9858. SKIP(3);
  9859. } else {
  9860. xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
  9861. }
  9862. if (RAW != '"') {
  9863. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9864. } else
  9865. NEXT;
  9866. } else {
  9867. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
  9868. }
  9869. }
  9870. return(standalone);
  9871. }
  9872. /**
  9873. * xmlParseXMLDecl:
  9874. * @ctxt: an XML parser context
  9875. *
  9876. * parse an XML declaration header
  9877. *
  9878. * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  9879. */
  9880. void
  9881. xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
  9882. xmlChar *version;
  9883. /*
  9884. * This value for standalone indicates that the document has an
  9885. * XML declaration but it does not have a standalone attribute.
  9886. * It will be overwritten later if a standalone attribute is found.
  9887. */
  9888. ctxt->input->standalone = -2;
  9889. /*
  9890. * We know that '<?xml' is here.
  9891. */
  9892. SKIP(5);
  9893. if (!IS_BLANK_CH(RAW)) {
  9894. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  9895. "Blank needed after '<?xml'\n");
  9896. }
  9897. SKIP_BLANKS;
  9898. /*
  9899. * We must have the VersionInfo here.
  9900. */
  9901. version = xmlParseVersionInfo(ctxt);
  9902. if (version == NULL) {
  9903. xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
  9904. } else {
  9905. if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
  9906. /*
  9907. * Changed here for XML-1.0 5th edition
  9908. */
  9909. if (ctxt->options & XML_PARSE_OLD10) {
  9910. xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
  9911. "Unsupported version '%s'\n",
  9912. version);
  9913. } else {
  9914. if ((version[0] == '1') && ((version[1] == '.'))) {
  9915. xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
  9916. "Unsupported version '%s'\n",
  9917. version, NULL);
  9918. } else {
  9919. xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
  9920. "Unsupported version '%s'\n",
  9921. version);
  9922. }
  9923. }
  9924. }
  9925. if (ctxt->version != NULL)
  9926. xmlFree((void *) ctxt->version);
  9927. ctxt->version = version;
  9928. }
  9929. /*
  9930. * We may have the encoding declaration
  9931. */
  9932. if (!IS_BLANK_CH(RAW)) {
  9933. if ((RAW == '?') && (NXT(1) == '>')) {
  9934. SKIP(2);
  9935. return;
  9936. }
  9937. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
  9938. }
  9939. xmlParseEncodingDecl(ctxt);
  9940. if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
  9941. (ctxt->instate == XML_PARSER_EOF)) {
  9942. /*
  9943. * The XML REC instructs us to stop parsing right here
  9944. */
  9945. return;
  9946. }
  9947. /*
  9948. * We may have the standalone status.
  9949. */
  9950. if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
  9951. if ((RAW == '?') && (NXT(1) == '>')) {
  9952. SKIP(2);
  9953. return;
  9954. }
  9955. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
  9956. }
  9957. /*
  9958. * We can grow the input buffer freely at that point
  9959. */
  9960. GROW;
  9961. SKIP_BLANKS;
  9962. ctxt->input->standalone = xmlParseSDDecl(ctxt);
  9963. SKIP_BLANKS;
  9964. if ((RAW == '?') && (NXT(1) == '>')) {
  9965. SKIP(2);
  9966. } else if (RAW == '>') {
  9967. /* Deprecated old WD ... */
  9968. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
  9969. NEXT;
  9970. } else {
  9971. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
  9972. MOVETO_ENDTAG(CUR_PTR);
  9973. NEXT;
  9974. }
  9975. }
  9976. /**
  9977. * xmlParseMisc:
  9978. * @ctxt: an XML parser context
  9979. *
  9980. * parse an XML Misc* optional field.
  9981. *
  9982. * [27] Misc ::= Comment | PI | S
  9983. */
  9984. void
  9985. xmlParseMisc(xmlParserCtxtPtr ctxt) {
  9986. while (ctxt->instate != XML_PARSER_EOF) {
  9987. SKIP_BLANKS;
  9988. GROW;
  9989. if ((RAW == '<') && (NXT(1) == '?')) {
  9990. xmlParsePI(ctxt);
  9991. } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
  9992. xmlParseComment(ctxt);
  9993. } else {
  9994. break;
  9995. }
  9996. }
  9997. }
  9998. /**
  9999. * xmlParseDocument:
  10000. * @ctxt: an XML parser context
  10001. *
  10002. * parse an XML document (and build a tree if using the standard SAX
  10003. * interface).
  10004. *
  10005. * [1] document ::= prolog element Misc*
  10006. *
  10007. * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
  10008. *
  10009. * Returns 0, -1 in case of error. the parser context is augmented
  10010. * as a result of the parsing.
  10011. */
  10012. int
  10013. xmlParseDocument(xmlParserCtxtPtr ctxt) {
  10014. xmlChar start[4];
  10015. xmlCharEncoding enc;
  10016. xmlInitParser();
  10017. if ((ctxt == NULL) || (ctxt->input == NULL))
  10018. return(-1);
  10019. GROW;
  10020. /*
  10021. * SAX: detecting the level.
  10022. */
  10023. xmlDetectSAX2(ctxt);
  10024. /*
  10025. * SAX: beginning of the document processing.
  10026. */
  10027. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10028. ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
  10029. if (ctxt->instate == XML_PARSER_EOF)
  10030. return(-1);
  10031. if ((ctxt->encoding == NULL) &&
  10032. ((ctxt->input->end - ctxt->input->cur) >= 4)) {
  10033. /*
  10034. * Get the 4 first bytes and decode the charset
  10035. * if enc != XML_CHAR_ENCODING_NONE
  10036. * plug some encoding conversion routines.
  10037. */
  10038. start[0] = RAW;
  10039. start[1] = NXT(1);
  10040. start[2] = NXT(2);
  10041. start[3] = NXT(3);
  10042. enc = xmlDetectCharEncoding(&start[0], 4);
  10043. if (enc != XML_CHAR_ENCODING_NONE) {
  10044. xmlSwitchEncoding(ctxt, enc);
  10045. }
  10046. }
  10047. if (CUR == 0) {
  10048. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
  10049. return(-1);
  10050. }
  10051. /*
  10052. * Check for the XMLDecl in the Prolog.
  10053. * do not GROW here to avoid the detected encoder to decode more
  10054. * than just the first line, unless the amount of data is really
  10055. * too small to hold "<?xml version="1.0" encoding="foo"
  10056. */
  10057. if ((ctxt->input->end - ctxt->input->cur) < 35) {
  10058. GROW;
  10059. }
  10060. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  10061. /*
  10062. * Note that we will switch encoding on the fly.
  10063. */
  10064. xmlParseXMLDecl(ctxt);
  10065. if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
  10066. (ctxt->instate == XML_PARSER_EOF)) {
  10067. /*
  10068. * The XML REC instructs us to stop parsing right here
  10069. */
  10070. return(-1);
  10071. }
  10072. ctxt->standalone = ctxt->input->standalone;
  10073. SKIP_BLANKS;
  10074. } else {
  10075. ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
  10076. }
  10077. if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
  10078. ctxt->sax->startDocument(ctxt->userData);
  10079. if (ctxt->instate == XML_PARSER_EOF)
  10080. return(-1);
  10081. if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
  10082. (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
  10083. ctxt->myDoc->compression = ctxt->input->buf->compressed;
  10084. }
  10085. /*
  10086. * The Misc part of the Prolog
  10087. */
  10088. xmlParseMisc(ctxt);
  10089. /*
  10090. * Then possibly doc type declaration(s) and more Misc
  10091. * (doctypedecl Misc*)?
  10092. */
  10093. GROW;
  10094. if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
  10095. ctxt->inSubset = 1;
  10096. xmlParseDocTypeDecl(ctxt);
  10097. if (RAW == '[') {
  10098. ctxt->instate = XML_PARSER_DTD;
  10099. xmlParseInternalSubset(ctxt);
  10100. if (ctxt->instate == XML_PARSER_EOF)
  10101. return(-1);
  10102. }
  10103. /*
  10104. * Create and update the external subset.
  10105. */
  10106. ctxt->inSubset = 2;
  10107. if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
  10108. (!ctxt->disableSAX))
  10109. ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
  10110. ctxt->extSubSystem, ctxt->extSubURI);
  10111. if (ctxt->instate == XML_PARSER_EOF)
  10112. return(-1);
  10113. ctxt->inSubset = 0;
  10114. xmlCleanSpecialAttr(ctxt);
  10115. ctxt->instate = XML_PARSER_PROLOG;
  10116. xmlParseMisc(ctxt);
  10117. }
  10118. /*
  10119. * Time to start parsing the tree itself
  10120. */
  10121. GROW;
  10122. if (RAW != '<') {
  10123. xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
  10124. "Start tag expected, '<' not found\n");
  10125. } else {
  10126. ctxt->instate = XML_PARSER_CONTENT;
  10127. xmlParseElement(ctxt);
  10128. ctxt->instate = XML_PARSER_EPILOG;
  10129. /*
  10130. * The Misc part at the end
  10131. */
  10132. xmlParseMisc(ctxt);
  10133. if (RAW != 0) {
  10134. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
  10135. }
  10136. ctxt->instate = XML_PARSER_EOF;
  10137. }
  10138. /*
  10139. * SAX: end of the document processing.
  10140. */
  10141. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10142. ctxt->sax->endDocument(ctxt->userData);
  10143. /*
  10144. * Remove locally kept entity definitions if the tree was not built
  10145. */
  10146. if ((ctxt->myDoc != NULL) &&
  10147. (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
  10148. xmlFreeDoc(ctxt->myDoc);
  10149. ctxt->myDoc = NULL;
  10150. }
  10151. if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
  10152. ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
  10153. if (ctxt->valid)
  10154. ctxt->myDoc->properties |= XML_DOC_DTDVALID;
  10155. if (ctxt->nsWellFormed)
  10156. ctxt->myDoc->properties |= XML_DOC_NSVALID;
  10157. if (ctxt->options & XML_PARSE_OLD10)
  10158. ctxt->myDoc->properties |= XML_DOC_OLD10;
  10159. }
  10160. if (! ctxt->wellFormed) {
  10161. ctxt->valid = 0;
  10162. return(-1);
  10163. }
  10164. return(0);
  10165. }
  10166. /**
  10167. * xmlParseExtParsedEnt:
  10168. * @ctxt: an XML parser context
  10169. *
  10170. * parse a general parsed entity
  10171. * An external general parsed entity is well-formed if it matches the
  10172. * production labeled extParsedEnt.
  10173. *
  10174. * [78] extParsedEnt ::= TextDecl? content
  10175. *
  10176. * Returns 0, -1 in case of error. the parser context is augmented
  10177. * as a result of the parsing.
  10178. */
  10179. int
  10180. xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
  10181. xmlChar start[4];
  10182. xmlCharEncoding enc;
  10183. if ((ctxt == NULL) || (ctxt->input == NULL))
  10184. return(-1);
  10185. xmlDefaultSAXHandlerInit();
  10186. xmlDetectSAX2(ctxt);
  10187. GROW;
  10188. /*
  10189. * SAX: beginning of the document processing.
  10190. */
  10191. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10192. ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
  10193. /*
  10194. * Get the 4 first bytes and decode the charset
  10195. * if enc != XML_CHAR_ENCODING_NONE
  10196. * plug some encoding conversion routines.
  10197. */
  10198. if ((ctxt->input->end - ctxt->input->cur) >= 4) {
  10199. start[0] = RAW;
  10200. start[1] = NXT(1);
  10201. start[2] = NXT(2);
  10202. start[3] = NXT(3);
  10203. enc = xmlDetectCharEncoding(start, 4);
  10204. if (enc != XML_CHAR_ENCODING_NONE) {
  10205. xmlSwitchEncoding(ctxt, enc);
  10206. }
  10207. }
  10208. if (CUR == 0) {
  10209. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
  10210. }
  10211. /*
  10212. * Check for the XMLDecl in the Prolog.
  10213. */
  10214. GROW;
  10215. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  10216. /*
  10217. * Note that we will switch encoding on the fly.
  10218. */
  10219. xmlParseXMLDecl(ctxt);
  10220. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  10221. /*
  10222. * The XML REC instructs us to stop parsing right here
  10223. */
  10224. return(-1);
  10225. }
  10226. SKIP_BLANKS;
  10227. } else {
  10228. ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
  10229. }
  10230. if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
  10231. ctxt->sax->startDocument(ctxt->userData);
  10232. if (ctxt->instate == XML_PARSER_EOF)
  10233. return(-1);
  10234. /*
  10235. * Doing validity checking on chunk doesn't make sense
  10236. */
  10237. ctxt->instate = XML_PARSER_CONTENT;
  10238. ctxt->validate = 0;
  10239. ctxt->loadsubset = 0;
  10240. ctxt->depth = 0;
  10241. xmlParseContent(ctxt);
  10242. if (ctxt->instate == XML_PARSER_EOF)
  10243. return(-1);
  10244. if ((RAW == '<') && (NXT(1) == '/')) {
  10245. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  10246. } else if (RAW != 0) {
  10247. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  10248. }
  10249. /*
  10250. * SAX: end of the document processing.
  10251. */
  10252. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10253. ctxt->sax->endDocument(ctxt->userData);
  10254. if (! ctxt->wellFormed) return(-1);
  10255. return(0);
  10256. }
  10257. #ifdef LIBXML_PUSH_ENABLED
  10258. /************************************************************************
  10259. * *
  10260. * Progressive parsing interfaces *
  10261. * *
  10262. ************************************************************************/
  10263. /**
  10264. * xmlParseLookupSequence:
  10265. * @ctxt: an XML parser context
  10266. * @first: the first char to lookup
  10267. * @next: the next char to lookup or zero
  10268. * @third: the next char to lookup or zero
  10269. *
  10270. * Try to find if a sequence (first, next, third) or just (first next) or
  10271. * (first) is available in the input stream.
  10272. * This function has a side effect of (possibly) incrementing ctxt->checkIndex
  10273. * to avoid rescanning sequences of bytes, it DOES change the state of the
  10274. * parser, do not use liberally.
  10275. *
  10276. * Returns the index to the current parsing point if the full sequence
  10277. * is available, -1 otherwise.
  10278. */
  10279. static int
  10280. xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
  10281. xmlChar next, xmlChar third) {
  10282. int base, len;
  10283. xmlParserInputPtr in;
  10284. const xmlChar *buf;
  10285. in = ctxt->input;
  10286. if (in == NULL) return(-1);
  10287. base = in->cur - in->base;
  10288. if (base < 0) return(-1);
  10289. if (ctxt->checkIndex > base)
  10290. base = ctxt->checkIndex;
  10291. if (in->buf == NULL) {
  10292. buf = in->base;
  10293. len = in->length;
  10294. } else {
  10295. buf = xmlBufContent(in->buf->buffer);
  10296. len = xmlBufUse(in->buf->buffer);
  10297. }
  10298. /* take into account the sequence length */
  10299. if (third) len -= 2;
  10300. else if (next) len --;
  10301. for (;base < len;base++) {
  10302. if (buf[base] == first) {
  10303. if (third != 0) {
  10304. if ((buf[base + 1] != next) ||
  10305. (buf[base + 2] != third)) continue;
  10306. } else if (next != 0) {
  10307. if (buf[base + 1] != next) continue;
  10308. }
  10309. ctxt->checkIndex = 0;
  10310. #ifdef DEBUG_PUSH
  10311. if (next == 0)
  10312. xmlGenericError(xmlGenericErrorContext,
  10313. "PP: lookup '%c' found at %d\n",
  10314. first, base);
  10315. else if (third == 0)
  10316. xmlGenericError(xmlGenericErrorContext,
  10317. "PP: lookup '%c%c' found at %d\n",
  10318. first, next, base);
  10319. else
  10320. xmlGenericError(xmlGenericErrorContext,
  10321. "PP: lookup '%c%c%c' found at %d\n",
  10322. first, next, third, base);
  10323. #endif
  10324. return(base - (in->cur - in->base));
  10325. }
  10326. }
  10327. ctxt->checkIndex = base;
  10328. #ifdef DEBUG_PUSH
  10329. if (next == 0)
  10330. xmlGenericError(xmlGenericErrorContext,
  10331. "PP: lookup '%c' failed\n", first);
  10332. else if (third == 0)
  10333. xmlGenericError(xmlGenericErrorContext,
  10334. "PP: lookup '%c%c' failed\n", first, next);
  10335. else
  10336. xmlGenericError(xmlGenericErrorContext,
  10337. "PP: lookup '%c%c%c' failed\n", first, next, third);
  10338. #endif
  10339. return(-1);
  10340. }
  10341. /**
  10342. * xmlParseGetLasts:
  10343. * @ctxt: an XML parser context
  10344. * @lastlt: pointer to store the last '<' from the input
  10345. * @lastgt: pointer to store the last '>' from the input
  10346. *
  10347. * Lookup the last < and > in the current chunk
  10348. */
  10349. static void
  10350. xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
  10351. const xmlChar **lastgt) {
  10352. const xmlChar *tmp;
  10353. if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
  10354. xmlGenericError(xmlGenericErrorContext,
  10355. "Internal error: xmlParseGetLasts\n");
  10356. return;
  10357. }
  10358. if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
  10359. tmp = ctxt->input->end;
  10360. tmp--;
  10361. while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
  10362. if (tmp < ctxt->input->base) {
  10363. *lastlt = NULL;
  10364. *lastgt = NULL;
  10365. } else {
  10366. *lastlt = tmp;
  10367. tmp++;
  10368. while ((tmp < ctxt->input->end) && (*tmp != '>')) {
  10369. if (*tmp == '\'') {
  10370. tmp++;
  10371. while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
  10372. if (tmp < ctxt->input->end) tmp++;
  10373. } else if (*tmp == '"') {
  10374. tmp++;
  10375. while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
  10376. if (tmp < ctxt->input->end) tmp++;
  10377. } else
  10378. tmp++;
  10379. }
  10380. if (tmp < ctxt->input->end)
  10381. *lastgt = tmp;
  10382. else {
  10383. tmp = *lastlt;
  10384. tmp--;
  10385. while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
  10386. if (tmp >= ctxt->input->base)
  10387. *lastgt = tmp;
  10388. else
  10389. *lastgt = NULL;
  10390. }
  10391. }
  10392. } else {
  10393. *lastlt = NULL;
  10394. *lastgt = NULL;
  10395. }
  10396. }
  10397. /**
  10398. * xmlCheckCdataPush:
  10399. * @cur: pointer to the block of characters
  10400. * @len: length of the block in bytes
  10401. * @complete: 1 if complete CDATA block is passed in, 0 if partial block
  10402. *
  10403. * Check that the block of characters is okay as SCdata content [20]
  10404. *
  10405. * Returns the number of bytes to pass if okay, a negative index where an
  10406. * UTF-8 error occurred otherwise
  10407. */
  10408. static int
  10409. xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
  10410. int ix;
  10411. unsigned char c;
  10412. int codepoint;
  10413. if ((utf == NULL) || (len <= 0))
  10414. return(0);
  10415. for (ix = 0; ix < len;) { /* string is 0-terminated */
  10416. c = utf[ix];
  10417. if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
  10418. if (c >= 0x20)
  10419. ix++;
  10420. else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
  10421. ix++;
  10422. else
  10423. return(-ix);
  10424. } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
  10425. if (ix + 2 > len) return(complete ? -ix : ix);
  10426. if ((utf[ix+1] & 0xc0 ) != 0x80)
  10427. return(-ix);
  10428. codepoint = (utf[ix] & 0x1f) << 6;
  10429. codepoint |= utf[ix+1] & 0x3f;
  10430. if (!xmlIsCharQ(codepoint))
  10431. return(-ix);
  10432. ix += 2;
  10433. } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
  10434. if (ix + 3 > len) return(complete ? -ix : ix);
  10435. if (((utf[ix+1] & 0xc0) != 0x80) ||
  10436. ((utf[ix+2] & 0xc0) != 0x80))
  10437. return(-ix);
  10438. codepoint = (utf[ix] & 0xf) << 12;
  10439. codepoint |= (utf[ix+1] & 0x3f) << 6;
  10440. codepoint |= utf[ix+2] & 0x3f;
  10441. if (!xmlIsCharQ(codepoint))
  10442. return(-ix);
  10443. ix += 3;
  10444. } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
  10445. if (ix + 4 > len) return(complete ? -ix : ix);
  10446. if (((utf[ix+1] & 0xc0) != 0x80) ||
  10447. ((utf[ix+2] & 0xc0) != 0x80) ||
  10448. ((utf[ix+3] & 0xc0) != 0x80))
  10449. return(-ix);
  10450. codepoint = (utf[ix] & 0x7) << 18;
  10451. codepoint |= (utf[ix+1] & 0x3f) << 12;
  10452. codepoint |= (utf[ix+2] & 0x3f) << 6;
  10453. codepoint |= utf[ix+3] & 0x3f;
  10454. if (!xmlIsCharQ(codepoint))
  10455. return(-ix);
  10456. ix += 4;
  10457. } else /* unknown encoding */
  10458. return(-ix);
  10459. }
  10460. return(ix);
  10461. }
  10462. /**
  10463. * xmlParseTryOrFinish:
  10464. * @ctxt: an XML parser context
  10465. * @terminate: last chunk indicator
  10466. *
  10467. * Try to progress on parsing
  10468. *
  10469. * Returns zero if no parsing was possible
  10470. */
  10471. static int
  10472. xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
  10473. int ret = 0;
  10474. int avail, tlen;
  10475. xmlChar cur, next;
  10476. const xmlChar *lastlt, *lastgt;
  10477. if (ctxt->input == NULL)
  10478. return(0);
  10479. #ifdef DEBUG_PUSH
  10480. switch (ctxt->instate) {
  10481. case XML_PARSER_EOF:
  10482. xmlGenericError(xmlGenericErrorContext,
  10483. "PP: try EOF\n"); break;
  10484. case XML_PARSER_START:
  10485. xmlGenericError(xmlGenericErrorContext,
  10486. "PP: try START\n"); break;
  10487. case XML_PARSER_MISC:
  10488. xmlGenericError(xmlGenericErrorContext,
  10489. "PP: try MISC\n");break;
  10490. case XML_PARSER_COMMENT:
  10491. xmlGenericError(xmlGenericErrorContext,
  10492. "PP: try COMMENT\n");break;
  10493. case XML_PARSER_PROLOG:
  10494. xmlGenericError(xmlGenericErrorContext,
  10495. "PP: try PROLOG\n");break;
  10496. case XML_PARSER_START_TAG:
  10497. xmlGenericError(xmlGenericErrorContext,
  10498. "PP: try START_TAG\n");break;
  10499. case XML_PARSER_CONTENT:
  10500. xmlGenericError(xmlGenericErrorContext,
  10501. "PP: try CONTENT\n");break;
  10502. case XML_PARSER_CDATA_SECTION:
  10503. xmlGenericError(xmlGenericErrorContext,
  10504. "PP: try CDATA_SECTION\n");break;
  10505. case XML_PARSER_END_TAG:
  10506. xmlGenericError(xmlGenericErrorContext,
  10507. "PP: try END_TAG\n");break;
  10508. case XML_PARSER_ENTITY_DECL:
  10509. xmlGenericError(xmlGenericErrorContext,
  10510. "PP: try ENTITY_DECL\n");break;
  10511. case XML_PARSER_ENTITY_VALUE:
  10512. xmlGenericError(xmlGenericErrorContext,
  10513. "PP: try ENTITY_VALUE\n");break;
  10514. case XML_PARSER_ATTRIBUTE_VALUE:
  10515. xmlGenericError(xmlGenericErrorContext,
  10516. "PP: try ATTRIBUTE_VALUE\n");break;
  10517. case XML_PARSER_DTD:
  10518. xmlGenericError(xmlGenericErrorContext,
  10519. "PP: try DTD\n");break;
  10520. case XML_PARSER_EPILOG:
  10521. xmlGenericError(xmlGenericErrorContext,
  10522. "PP: try EPILOG\n");break;
  10523. case XML_PARSER_PI:
  10524. xmlGenericError(xmlGenericErrorContext,
  10525. "PP: try PI\n");break;
  10526. case XML_PARSER_IGNORE:
  10527. xmlGenericError(xmlGenericErrorContext,
  10528. "PP: try IGNORE\n");break;
  10529. }
  10530. #endif
  10531. if ((ctxt->input != NULL) &&
  10532. (ctxt->input->cur - ctxt->input->base > 4096)) {
  10533. xmlSHRINK(ctxt);
  10534. ctxt->checkIndex = 0;
  10535. }
  10536. xmlParseGetLasts(ctxt, &lastlt, &lastgt);
  10537. while (ctxt->instate != XML_PARSER_EOF) {
  10538. if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
  10539. return(0);
  10540. if (ctxt->input == NULL) break;
  10541. if (ctxt->input->buf == NULL)
  10542. avail = ctxt->input->length -
  10543. (ctxt->input->cur - ctxt->input->base);
  10544. else {
  10545. /*
  10546. * If we are operating on converted input, try to flush
  10547. * remaining chars to avoid them stalling in the non-converted
  10548. * buffer. But do not do this in document start where
  10549. * encoding="..." may not have been read and we work on a
  10550. * guessed encoding.
  10551. */
  10552. if ((ctxt->instate != XML_PARSER_START) &&
  10553. (ctxt->input->buf->raw != NULL) &&
  10554. (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
  10555. size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
  10556. ctxt->input);
  10557. size_t current = ctxt->input->cur - ctxt->input->base;
  10558. xmlParserInputBufferPush(ctxt->input->buf, 0, "");
  10559. xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
  10560. base, current);
  10561. }
  10562. avail = xmlBufUse(ctxt->input->buf->buffer) -
  10563. (ctxt->input->cur - ctxt->input->base);
  10564. }
  10565. if (avail < 1)
  10566. goto done;
  10567. switch (ctxt->instate) {
  10568. case XML_PARSER_EOF:
  10569. /*
  10570. * Document parsing is done !
  10571. */
  10572. goto done;
  10573. case XML_PARSER_START:
  10574. if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
  10575. xmlChar start[4];
  10576. xmlCharEncoding enc;
  10577. /*
  10578. * Very first chars read from the document flow.
  10579. */
  10580. if (avail < 4)
  10581. goto done;
  10582. /*
  10583. * Get the 4 first bytes and decode the charset
  10584. * if enc != XML_CHAR_ENCODING_NONE
  10585. * plug some encoding conversion routines,
  10586. * else xmlSwitchEncoding will set to (default)
  10587. * UTF8.
  10588. */
  10589. start[0] = RAW;
  10590. start[1] = NXT(1);
  10591. start[2] = NXT(2);
  10592. start[3] = NXT(3);
  10593. enc = xmlDetectCharEncoding(start, 4);
  10594. xmlSwitchEncoding(ctxt, enc);
  10595. break;
  10596. }
  10597. if (avail < 2)
  10598. goto done;
  10599. cur = ctxt->input->cur[0];
  10600. next = ctxt->input->cur[1];
  10601. if (cur == 0) {
  10602. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10603. ctxt->sax->setDocumentLocator(ctxt->userData,
  10604. &xmlDefaultSAXLocator);
  10605. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
  10606. xmlHaltParser(ctxt);
  10607. #ifdef DEBUG_PUSH
  10608. xmlGenericError(xmlGenericErrorContext,
  10609. "PP: entering EOF\n");
  10610. #endif
  10611. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10612. ctxt->sax->endDocument(ctxt->userData);
  10613. goto done;
  10614. }
  10615. if ((cur == '<') && (next == '?')) {
  10616. /* PI or XML decl */
  10617. if (avail < 5) return(ret);
  10618. if ((!terminate) &&
  10619. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
  10620. return(ret);
  10621. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10622. ctxt->sax->setDocumentLocator(ctxt->userData,
  10623. &xmlDefaultSAXLocator);
  10624. if ((ctxt->input->cur[2] == 'x') &&
  10625. (ctxt->input->cur[3] == 'm') &&
  10626. (ctxt->input->cur[4] == 'l') &&
  10627. (IS_BLANK_CH(ctxt->input->cur[5]))) {
  10628. ret += 5;
  10629. #ifdef DEBUG_PUSH
  10630. xmlGenericError(xmlGenericErrorContext,
  10631. "PP: Parsing XML Decl\n");
  10632. #endif
  10633. xmlParseXMLDecl(ctxt);
  10634. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  10635. /*
  10636. * The XML REC instructs us to stop parsing right
  10637. * here
  10638. */
  10639. xmlHaltParser(ctxt);
  10640. return(0);
  10641. }
  10642. ctxt->standalone = ctxt->input->standalone;
  10643. if ((ctxt->encoding == NULL) &&
  10644. (ctxt->input->encoding != NULL))
  10645. ctxt->encoding = xmlStrdup(ctxt->input->encoding);
  10646. if ((ctxt->sax) && (ctxt->sax->startDocument) &&
  10647. (!ctxt->disableSAX))
  10648. ctxt->sax->startDocument(ctxt->userData);
  10649. ctxt->instate = XML_PARSER_MISC;
  10650. #ifdef DEBUG_PUSH
  10651. xmlGenericError(xmlGenericErrorContext,
  10652. "PP: entering MISC\n");
  10653. #endif
  10654. } else {
  10655. ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
  10656. if ((ctxt->sax) && (ctxt->sax->startDocument) &&
  10657. (!ctxt->disableSAX))
  10658. ctxt->sax->startDocument(ctxt->userData);
  10659. ctxt->instate = XML_PARSER_MISC;
  10660. #ifdef DEBUG_PUSH
  10661. xmlGenericError(xmlGenericErrorContext,
  10662. "PP: entering MISC\n");
  10663. #endif
  10664. }
  10665. } else {
  10666. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10667. ctxt->sax->setDocumentLocator(ctxt->userData,
  10668. &xmlDefaultSAXLocator);
  10669. ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
  10670. if (ctxt->version == NULL) {
  10671. xmlErrMemory(ctxt, NULL);
  10672. break;
  10673. }
  10674. if ((ctxt->sax) && (ctxt->sax->startDocument) &&
  10675. (!ctxt->disableSAX))
  10676. ctxt->sax->startDocument(ctxt->userData);
  10677. ctxt->instate = XML_PARSER_MISC;
  10678. #ifdef DEBUG_PUSH
  10679. xmlGenericError(xmlGenericErrorContext,
  10680. "PP: entering MISC\n");
  10681. #endif
  10682. }
  10683. break;
  10684. case XML_PARSER_START_TAG: {
  10685. const xmlChar *name;
  10686. const xmlChar *prefix = NULL;
  10687. const xmlChar *URI = NULL;
  10688. int line = ctxt->input->line;
  10689. int nsNr = ctxt->nsNr;
  10690. if ((avail < 2) && (ctxt->inputNr == 1))
  10691. goto done;
  10692. cur = ctxt->input->cur[0];
  10693. if (cur != '<') {
  10694. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
  10695. xmlHaltParser(ctxt);
  10696. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10697. ctxt->sax->endDocument(ctxt->userData);
  10698. goto done;
  10699. }
  10700. if (!terminate) {
  10701. if (ctxt->progressive) {
  10702. /* > can be found unescaped in attribute values */
  10703. if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
  10704. goto done;
  10705. } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
  10706. goto done;
  10707. }
  10708. }
  10709. if (ctxt->spaceNr == 0)
  10710. spacePush(ctxt, -1);
  10711. else if (*ctxt->space == -2)
  10712. spacePush(ctxt, -1);
  10713. else
  10714. spacePush(ctxt, *ctxt->space);
  10715. #ifdef LIBXML_SAX1_ENABLED
  10716. if (ctxt->sax2)
  10717. #endif /* LIBXML_SAX1_ENABLED */
  10718. name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
  10719. #ifdef LIBXML_SAX1_ENABLED
  10720. else
  10721. name = xmlParseStartTag(ctxt);
  10722. #endif /* LIBXML_SAX1_ENABLED */
  10723. if (ctxt->instate == XML_PARSER_EOF)
  10724. goto done;
  10725. if (name == NULL) {
  10726. spacePop(ctxt);
  10727. xmlHaltParser(ctxt);
  10728. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10729. ctxt->sax->endDocument(ctxt->userData);
  10730. goto done;
  10731. }
  10732. #ifdef LIBXML_VALID_ENABLED
  10733. /*
  10734. * [ VC: Root Element Type ]
  10735. * The Name in the document type declaration must match
  10736. * the element type of the root element.
  10737. */
  10738. if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
  10739. ctxt->node && (ctxt->node == ctxt->myDoc->children))
  10740. ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
  10741. #endif /* LIBXML_VALID_ENABLED */
  10742. /*
  10743. * Check for an Empty Element.
  10744. */
  10745. if ((RAW == '/') && (NXT(1) == '>')) {
  10746. SKIP(2);
  10747. if (ctxt->sax2) {
  10748. if ((ctxt->sax != NULL) &&
  10749. (ctxt->sax->endElementNs != NULL) &&
  10750. (!ctxt->disableSAX))
  10751. ctxt->sax->endElementNs(ctxt->userData, name,
  10752. prefix, URI);
  10753. if (ctxt->nsNr - nsNr > 0)
  10754. nsPop(ctxt, ctxt->nsNr - nsNr);
  10755. #ifdef LIBXML_SAX1_ENABLED
  10756. } else {
  10757. if ((ctxt->sax != NULL) &&
  10758. (ctxt->sax->endElement != NULL) &&
  10759. (!ctxt->disableSAX))
  10760. ctxt->sax->endElement(ctxt->userData, name);
  10761. #endif /* LIBXML_SAX1_ENABLED */
  10762. }
  10763. if (ctxt->instate == XML_PARSER_EOF)
  10764. goto done;
  10765. spacePop(ctxt);
  10766. if (ctxt->nameNr == 0) {
  10767. ctxt->instate = XML_PARSER_EPILOG;
  10768. } else {
  10769. ctxt->instate = XML_PARSER_CONTENT;
  10770. }
  10771. ctxt->progressive = 1;
  10772. break;
  10773. }
  10774. if (RAW == '>') {
  10775. NEXT;
  10776. } else {
  10777. xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
  10778. "Couldn't find end of Start Tag %s\n",
  10779. name);
  10780. nodePop(ctxt);
  10781. spacePop(ctxt);
  10782. }
  10783. nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
  10784. ctxt->instate = XML_PARSER_CONTENT;
  10785. ctxt->progressive = 1;
  10786. break;
  10787. }
  10788. case XML_PARSER_CONTENT: {
  10789. const xmlChar *test;
  10790. unsigned int cons;
  10791. if ((avail < 2) && (ctxt->inputNr == 1))
  10792. goto done;
  10793. cur = ctxt->input->cur[0];
  10794. next = ctxt->input->cur[1];
  10795. test = CUR_PTR;
  10796. cons = ctxt->input->consumed;
  10797. if ((cur == '<') && (next == '/')) {
  10798. ctxt->instate = XML_PARSER_END_TAG;
  10799. break;
  10800. } else if ((cur == '<') && (next == '?')) {
  10801. if ((!terminate) &&
  10802. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
  10803. ctxt->progressive = XML_PARSER_PI;
  10804. goto done;
  10805. }
  10806. xmlParsePI(ctxt);
  10807. ctxt->instate = XML_PARSER_CONTENT;
  10808. ctxt->progressive = 1;
  10809. } else if ((cur == '<') && (next != '!')) {
  10810. ctxt->instate = XML_PARSER_START_TAG;
  10811. break;
  10812. } else if ((cur == '<') && (next == '!') &&
  10813. (ctxt->input->cur[2] == '-') &&
  10814. (ctxt->input->cur[3] == '-')) {
  10815. int term;
  10816. if (avail < 4)
  10817. goto done;
  10818. ctxt->input->cur += 4;
  10819. term = xmlParseLookupSequence(ctxt, '-', '-', '>');
  10820. ctxt->input->cur -= 4;
  10821. if ((!terminate) && (term < 0)) {
  10822. ctxt->progressive = XML_PARSER_COMMENT;
  10823. goto done;
  10824. }
  10825. xmlParseComment(ctxt);
  10826. ctxt->instate = XML_PARSER_CONTENT;
  10827. ctxt->progressive = 1;
  10828. } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
  10829. (ctxt->input->cur[2] == '[') &&
  10830. (ctxt->input->cur[3] == 'C') &&
  10831. (ctxt->input->cur[4] == 'D') &&
  10832. (ctxt->input->cur[5] == 'A') &&
  10833. (ctxt->input->cur[6] == 'T') &&
  10834. (ctxt->input->cur[7] == 'A') &&
  10835. (ctxt->input->cur[8] == '[')) {
  10836. SKIP(9);
  10837. ctxt->instate = XML_PARSER_CDATA_SECTION;
  10838. break;
  10839. } else if ((cur == '<') && (next == '!') &&
  10840. (avail < 9)) {
  10841. goto done;
  10842. } else if (cur == '&') {
  10843. if ((!terminate) &&
  10844. (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
  10845. goto done;
  10846. xmlParseReference(ctxt);
  10847. } else {
  10848. /* TODO Avoid the extra copy, handle directly !!! */
  10849. /*
  10850. * Goal of the following test is:
  10851. * - minimize calls to the SAX 'character' callback
  10852. * when they are mergeable
  10853. * - handle an problem for isBlank when we only parse
  10854. * a sequence of blank chars and the next one is
  10855. * not available to check against '<' presence.
  10856. * - tries to homogenize the differences in SAX
  10857. * callbacks between the push and pull versions
  10858. * of the parser.
  10859. */
  10860. if ((ctxt->inputNr == 1) &&
  10861. (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
  10862. if (!terminate) {
  10863. if (ctxt->progressive) {
  10864. if ((lastlt == NULL) ||
  10865. (ctxt->input->cur > lastlt))
  10866. goto done;
  10867. } else if (xmlParseLookupSequence(ctxt,
  10868. '<', 0, 0) < 0) {
  10869. goto done;
  10870. }
  10871. }
  10872. }
  10873. ctxt->checkIndex = 0;
  10874. xmlParseCharData(ctxt, 0);
  10875. }
  10876. if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
  10877. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  10878. "detected an error in element content\n");
  10879. xmlHaltParser(ctxt);
  10880. break;
  10881. }
  10882. break;
  10883. }
  10884. case XML_PARSER_END_TAG:
  10885. if (avail < 2)
  10886. goto done;
  10887. if (!terminate) {
  10888. if (ctxt->progressive) {
  10889. /* > can be found unescaped in attribute values */
  10890. if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
  10891. goto done;
  10892. } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
  10893. goto done;
  10894. }
  10895. }
  10896. if (ctxt->sax2) {
  10897. xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
  10898. nameNsPop(ctxt);
  10899. }
  10900. #ifdef LIBXML_SAX1_ENABLED
  10901. else
  10902. xmlParseEndTag1(ctxt, 0);
  10903. #endif /* LIBXML_SAX1_ENABLED */
  10904. if (ctxt->instate == XML_PARSER_EOF) {
  10905. /* Nothing */
  10906. } else if (ctxt->nameNr == 0) {
  10907. ctxt->instate = XML_PARSER_EPILOG;
  10908. } else {
  10909. ctxt->instate = XML_PARSER_CONTENT;
  10910. }
  10911. break;
  10912. case XML_PARSER_CDATA_SECTION: {
  10913. /*
  10914. * The Push mode need to have the SAX callback for
  10915. * cdataBlock merge back contiguous callbacks.
  10916. */
  10917. int base;
  10918. base = xmlParseLookupSequence(ctxt, ']', ']', '>');
  10919. if (base < 0) {
  10920. if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
  10921. int tmp;
  10922. tmp = xmlCheckCdataPush(ctxt->input->cur,
  10923. XML_PARSER_BIG_BUFFER_SIZE, 0);
  10924. if (tmp < 0) {
  10925. tmp = -tmp;
  10926. ctxt->input->cur += tmp;
  10927. goto encoding_error;
  10928. }
  10929. if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
  10930. if (ctxt->sax->cdataBlock != NULL)
  10931. ctxt->sax->cdataBlock(ctxt->userData,
  10932. ctxt->input->cur, tmp);
  10933. else if (ctxt->sax->characters != NULL)
  10934. ctxt->sax->characters(ctxt->userData,
  10935. ctxt->input->cur, tmp);
  10936. }
  10937. if (ctxt->instate == XML_PARSER_EOF)
  10938. goto done;
  10939. SKIPL(tmp);
  10940. ctxt->checkIndex = 0;
  10941. }
  10942. goto done;
  10943. } else {
  10944. int tmp;
  10945. tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
  10946. if ((tmp < 0) || (tmp != base)) {
  10947. tmp = -tmp;
  10948. ctxt->input->cur += tmp;
  10949. goto encoding_error;
  10950. }
  10951. if ((ctxt->sax != NULL) && (base == 0) &&
  10952. (ctxt->sax->cdataBlock != NULL) &&
  10953. (!ctxt->disableSAX)) {
  10954. /*
  10955. * Special case to provide identical behaviour
  10956. * between pull and push parsers on enpty CDATA
  10957. * sections
  10958. */
  10959. if ((ctxt->input->cur - ctxt->input->base >= 9) &&
  10960. (!strncmp((const char *)&ctxt->input->cur[-9],
  10961. "<![CDATA[", 9)))
  10962. ctxt->sax->cdataBlock(ctxt->userData,
  10963. BAD_CAST "", 0);
  10964. } else if ((ctxt->sax != NULL) && (base > 0) &&
  10965. (!ctxt->disableSAX)) {
  10966. if (ctxt->sax->cdataBlock != NULL)
  10967. ctxt->sax->cdataBlock(ctxt->userData,
  10968. ctxt->input->cur, base);
  10969. else if (ctxt->sax->characters != NULL)
  10970. ctxt->sax->characters(ctxt->userData,
  10971. ctxt->input->cur, base);
  10972. }
  10973. if (ctxt->instate == XML_PARSER_EOF)
  10974. goto done;
  10975. SKIPL(base + 3);
  10976. ctxt->checkIndex = 0;
  10977. ctxt->instate = XML_PARSER_CONTENT;
  10978. #ifdef DEBUG_PUSH
  10979. xmlGenericError(xmlGenericErrorContext,
  10980. "PP: entering CONTENT\n");
  10981. #endif
  10982. }
  10983. break;
  10984. }
  10985. case XML_PARSER_MISC:
  10986. SKIP_BLANKS;
  10987. if (ctxt->input->buf == NULL)
  10988. avail = ctxt->input->length -
  10989. (ctxt->input->cur - ctxt->input->base);
  10990. else
  10991. avail = xmlBufUse(ctxt->input->buf->buffer) -
  10992. (ctxt->input->cur - ctxt->input->base);
  10993. if (avail < 2)
  10994. goto done;
  10995. cur = ctxt->input->cur[0];
  10996. next = ctxt->input->cur[1];
  10997. if ((cur == '<') && (next == '?')) {
  10998. if ((!terminate) &&
  10999. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
  11000. ctxt->progressive = XML_PARSER_PI;
  11001. goto done;
  11002. }
  11003. #ifdef DEBUG_PUSH
  11004. xmlGenericError(xmlGenericErrorContext,
  11005. "PP: Parsing PI\n");
  11006. #endif
  11007. xmlParsePI(ctxt);
  11008. if (ctxt->instate == XML_PARSER_EOF)
  11009. goto done;
  11010. ctxt->instate = XML_PARSER_MISC;
  11011. ctxt->progressive = 1;
  11012. ctxt->checkIndex = 0;
  11013. } else if ((cur == '<') && (next == '!') &&
  11014. (ctxt->input->cur[2] == '-') &&
  11015. (ctxt->input->cur[3] == '-')) {
  11016. if ((!terminate) &&
  11017. (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
  11018. ctxt->progressive = XML_PARSER_COMMENT;
  11019. goto done;
  11020. }
  11021. #ifdef DEBUG_PUSH
  11022. xmlGenericError(xmlGenericErrorContext,
  11023. "PP: Parsing Comment\n");
  11024. #endif
  11025. xmlParseComment(ctxt);
  11026. if (ctxt->instate == XML_PARSER_EOF)
  11027. goto done;
  11028. ctxt->instate = XML_PARSER_MISC;
  11029. ctxt->progressive = 1;
  11030. ctxt->checkIndex = 0;
  11031. } else if ((cur == '<') && (next == '!') &&
  11032. (ctxt->input->cur[2] == 'D') &&
  11033. (ctxt->input->cur[3] == 'O') &&
  11034. (ctxt->input->cur[4] == 'C') &&
  11035. (ctxt->input->cur[5] == 'T') &&
  11036. (ctxt->input->cur[6] == 'Y') &&
  11037. (ctxt->input->cur[7] == 'P') &&
  11038. (ctxt->input->cur[8] == 'E')) {
  11039. if ((!terminate) &&
  11040. (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
  11041. ctxt->progressive = XML_PARSER_DTD;
  11042. goto done;
  11043. }
  11044. #ifdef DEBUG_PUSH
  11045. xmlGenericError(xmlGenericErrorContext,
  11046. "PP: Parsing internal subset\n");
  11047. #endif
  11048. ctxt->inSubset = 1;
  11049. ctxt->progressive = 0;
  11050. ctxt->checkIndex = 0;
  11051. xmlParseDocTypeDecl(ctxt);
  11052. if (ctxt->instate == XML_PARSER_EOF)
  11053. goto done;
  11054. if (RAW == '[') {
  11055. ctxt->instate = XML_PARSER_DTD;
  11056. #ifdef DEBUG_PUSH
  11057. xmlGenericError(xmlGenericErrorContext,
  11058. "PP: entering DTD\n");
  11059. #endif
  11060. } else {
  11061. /*
  11062. * Create and update the external subset.
  11063. */
  11064. ctxt->inSubset = 2;
  11065. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  11066. (ctxt->sax->externalSubset != NULL))
  11067. ctxt->sax->externalSubset(ctxt->userData,
  11068. ctxt->intSubName, ctxt->extSubSystem,
  11069. ctxt->extSubURI);
  11070. ctxt->inSubset = 0;
  11071. xmlCleanSpecialAttr(ctxt);
  11072. ctxt->instate = XML_PARSER_PROLOG;
  11073. #ifdef DEBUG_PUSH
  11074. xmlGenericError(xmlGenericErrorContext,
  11075. "PP: entering PROLOG\n");
  11076. #endif
  11077. }
  11078. } else if ((cur == '<') && (next == '!') &&
  11079. (avail < 9)) {
  11080. goto done;
  11081. } else {
  11082. ctxt->instate = XML_PARSER_START_TAG;
  11083. ctxt->progressive = XML_PARSER_START_TAG;
  11084. xmlParseGetLasts(ctxt, &lastlt, &lastgt);
  11085. #ifdef DEBUG_PUSH
  11086. xmlGenericError(xmlGenericErrorContext,
  11087. "PP: entering START_TAG\n");
  11088. #endif
  11089. }
  11090. break;
  11091. case XML_PARSER_PROLOG:
  11092. SKIP_BLANKS;
  11093. if (ctxt->input->buf == NULL)
  11094. avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
  11095. else
  11096. avail = xmlBufUse(ctxt->input->buf->buffer) -
  11097. (ctxt->input->cur - ctxt->input->base);
  11098. if (avail < 2)
  11099. goto done;
  11100. cur = ctxt->input->cur[0];
  11101. next = ctxt->input->cur[1];
  11102. if ((cur == '<') && (next == '?')) {
  11103. if ((!terminate) &&
  11104. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
  11105. ctxt->progressive = XML_PARSER_PI;
  11106. goto done;
  11107. }
  11108. #ifdef DEBUG_PUSH
  11109. xmlGenericError(xmlGenericErrorContext,
  11110. "PP: Parsing PI\n");
  11111. #endif
  11112. xmlParsePI(ctxt);
  11113. if (ctxt->instate == XML_PARSER_EOF)
  11114. goto done;
  11115. ctxt->instate = XML_PARSER_PROLOG;
  11116. ctxt->progressive = 1;
  11117. } else if ((cur == '<') && (next == '!') &&
  11118. (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
  11119. if ((!terminate) &&
  11120. (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
  11121. ctxt->progressive = XML_PARSER_COMMENT;
  11122. goto done;
  11123. }
  11124. #ifdef DEBUG_PUSH
  11125. xmlGenericError(xmlGenericErrorContext,
  11126. "PP: Parsing Comment\n");
  11127. #endif
  11128. xmlParseComment(ctxt);
  11129. if (ctxt->instate == XML_PARSER_EOF)
  11130. goto done;
  11131. ctxt->instate = XML_PARSER_PROLOG;
  11132. ctxt->progressive = 1;
  11133. } else if ((cur == '<') && (next == '!') &&
  11134. (avail < 4)) {
  11135. goto done;
  11136. } else {
  11137. ctxt->instate = XML_PARSER_START_TAG;
  11138. if (ctxt->progressive == 0)
  11139. ctxt->progressive = XML_PARSER_START_TAG;
  11140. xmlParseGetLasts(ctxt, &lastlt, &lastgt);
  11141. #ifdef DEBUG_PUSH
  11142. xmlGenericError(xmlGenericErrorContext,
  11143. "PP: entering START_TAG\n");
  11144. #endif
  11145. }
  11146. break;
  11147. case XML_PARSER_EPILOG:
  11148. SKIP_BLANKS;
  11149. if (ctxt->input->buf == NULL)
  11150. avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
  11151. else
  11152. avail = xmlBufUse(ctxt->input->buf->buffer) -
  11153. (ctxt->input->cur - ctxt->input->base);
  11154. if (avail < 2)
  11155. goto done;
  11156. cur = ctxt->input->cur[0];
  11157. next = ctxt->input->cur[1];
  11158. if ((cur == '<') && (next == '?')) {
  11159. if ((!terminate) &&
  11160. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
  11161. ctxt->progressive = XML_PARSER_PI;
  11162. goto done;
  11163. }
  11164. #ifdef DEBUG_PUSH
  11165. xmlGenericError(xmlGenericErrorContext,
  11166. "PP: Parsing PI\n");
  11167. #endif
  11168. xmlParsePI(ctxt);
  11169. if (ctxt->instate == XML_PARSER_EOF)
  11170. goto done;
  11171. ctxt->instate = XML_PARSER_EPILOG;
  11172. ctxt->progressive = 1;
  11173. } else if ((cur == '<') && (next == '!') &&
  11174. (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
  11175. if ((!terminate) &&
  11176. (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
  11177. ctxt->progressive = XML_PARSER_COMMENT;
  11178. goto done;
  11179. }
  11180. #ifdef DEBUG_PUSH
  11181. xmlGenericError(xmlGenericErrorContext,
  11182. "PP: Parsing Comment\n");
  11183. #endif
  11184. xmlParseComment(ctxt);
  11185. if (ctxt->instate == XML_PARSER_EOF)
  11186. goto done;
  11187. ctxt->instate = XML_PARSER_EPILOG;
  11188. ctxt->progressive = 1;
  11189. } else if ((cur == '<') && (next == '!') &&
  11190. (avail < 4)) {
  11191. goto done;
  11192. } else {
  11193. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
  11194. xmlHaltParser(ctxt);
  11195. #ifdef DEBUG_PUSH
  11196. xmlGenericError(xmlGenericErrorContext,
  11197. "PP: entering EOF\n");
  11198. #endif
  11199. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  11200. ctxt->sax->endDocument(ctxt->userData);
  11201. goto done;
  11202. }
  11203. break;
  11204. case XML_PARSER_DTD: {
  11205. /*
  11206. * Sorry but progressive parsing of the internal subset
  11207. * is not expected to be supported. We first check that
  11208. * the full content of the internal subset is available and
  11209. * the parsing is launched only at that point.
  11210. * Internal subset ends up with "']' S? '>'" in an unescaped
  11211. * section and not in a ']]>' sequence which are conditional
  11212. * sections (whoever argued to keep that crap in XML deserve
  11213. * a place in hell !).
  11214. */
  11215. int base, i;
  11216. xmlChar *buf;
  11217. xmlChar quote = 0;
  11218. size_t use;
  11219. base = ctxt->input->cur - ctxt->input->base;
  11220. if (base < 0) return(0);
  11221. if (ctxt->checkIndex > base)
  11222. base = ctxt->checkIndex;
  11223. buf = xmlBufContent(ctxt->input->buf->buffer);
  11224. use = xmlBufUse(ctxt->input->buf->buffer);
  11225. for (;(unsigned int) base < use; base++) {
  11226. if (quote != 0) {
  11227. if (buf[base] == quote)
  11228. quote = 0;
  11229. continue;
  11230. }
  11231. if ((quote == 0) && (buf[base] == '<')) {
  11232. int found = 0;
  11233. /* special handling of comments */
  11234. if (((unsigned int) base + 4 < use) &&
  11235. (buf[base + 1] == '!') &&
  11236. (buf[base + 2] == '-') &&
  11237. (buf[base + 3] == '-')) {
  11238. for (;(unsigned int) base + 3 < use; base++) {
  11239. if ((buf[base] == '-') &&
  11240. (buf[base + 1] == '-') &&
  11241. (buf[base + 2] == '>')) {
  11242. found = 1;
  11243. base += 2;
  11244. break;
  11245. }
  11246. }
  11247. if (!found) {
  11248. #if 0
  11249. fprintf(stderr, "unfinished comment\n");
  11250. #endif
  11251. break; /* for */
  11252. }
  11253. continue;
  11254. }
  11255. }
  11256. if (buf[base] == '"') {
  11257. quote = '"';
  11258. continue;
  11259. }
  11260. if (buf[base] == '\'') {
  11261. quote = '\'';
  11262. continue;
  11263. }
  11264. if (buf[base] == ']') {
  11265. #if 0
  11266. fprintf(stderr, "%c%c%c%c: ", buf[base],
  11267. buf[base + 1], buf[base + 2], buf[base + 3]);
  11268. #endif
  11269. if ((unsigned int) base +1 >= use)
  11270. break;
  11271. if (buf[base + 1] == ']') {
  11272. /* conditional crap, skip both ']' ! */
  11273. base++;
  11274. continue;
  11275. }
  11276. for (i = 1; (unsigned int) base + i < use; i++) {
  11277. if (buf[base + i] == '>') {
  11278. #if 0
  11279. fprintf(stderr, "found\n");
  11280. #endif
  11281. goto found_end_int_subset;
  11282. }
  11283. if (!IS_BLANK_CH(buf[base + i])) {
  11284. #if 0
  11285. fprintf(stderr, "not found\n");
  11286. #endif
  11287. goto not_end_of_int_subset;
  11288. }
  11289. }
  11290. #if 0
  11291. fprintf(stderr, "end of stream\n");
  11292. #endif
  11293. break;
  11294. }
  11295. not_end_of_int_subset:
  11296. continue; /* for */
  11297. }
  11298. /*
  11299. * We didn't found the end of the Internal subset
  11300. */
  11301. if (quote == 0)
  11302. ctxt->checkIndex = base;
  11303. else
  11304. ctxt->checkIndex = 0;
  11305. #ifdef DEBUG_PUSH
  11306. if (next == 0)
  11307. xmlGenericError(xmlGenericErrorContext,
  11308. "PP: lookup of int subset end filed\n");
  11309. #endif
  11310. goto done;
  11311. found_end_int_subset:
  11312. ctxt->checkIndex = 0;
  11313. xmlParseInternalSubset(ctxt);
  11314. if (ctxt->instate == XML_PARSER_EOF)
  11315. goto done;
  11316. ctxt->inSubset = 2;
  11317. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  11318. (ctxt->sax->externalSubset != NULL))
  11319. ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
  11320. ctxt->extSubSystem, ctxt->extSubURI);
  11321. ctxt->inSubset = 0;
  11322. xmlCleanSpecialAttr(ctxt);
  11323. if (ctxt->instate == XML_PARSER_EOF)
  11324. goto done;
  11325. ctxt->instate = XML_PARSER_PROLOG;
  11326. ctxt->checkIndex = 0;
  11327. #ifdef DEBUG_PUSH
  11328. xmlGenericError(xmlGenericErrorContext,
  11329. "PP: entering PROLOG\n");
  11330. #endif
  11331. break;
  11332. }
  11333. case XML_PARSER_COMMENT:
  11334. xmlGenericError(xmlGenericErrorContext,
  11335. "PP: internal error, state == COMMENT\n");
  11336. ctxt->instate = XML_PARSER_CONTENT;
  11337. #ifdef DEBUG_PUSH
  11338. xmlGenericError(xmlGenericErrorContext,
  11339. "PP: entering CONTENT\n");
  11340. #endif
  11341. break;
  11342. case XML_PARSER_IGNORE:
  11343. xmlGenericError(xmlGenericErrorContext,
  11344. "PP: internal error, state == IGNORE");
  11345. ctxt->instate = XML_PARSER_DTD;
  11346. #ifdef DEBUG_PUSH
  11347. xmlGenericError(xmlGenericErrorContext,
  11348. "PP: entering DTD\n");
  11349. #endif
  11350. break;
  11351. case XML_PARSER_PI:
  11352. xmlGenericError(xmlGenericErrorContext,
  11353. "PP: internal error, state == PI\n");
  11354. ctxt->instate = XML_PARSER_CONTENT;
  11355. #ifdef DEBUG_PUSH
  11356. xmlGenericError(xmlGenericErrorContext,
  11357. "PP: entering CONTENT\n");
  11358. #endif
  11359. break;
  11360. case XML_PARSER_ENTITY_DECL:
  11361. xmlGenericError(xmlGenericErrorContext,
  11362. "PP: internal error, state == ENTITY_DECL\n");
  11363. ctxt->instate = XML_PARSER_DTD;
  11364. #ifdef DEBUG_PUSH
  11365. xmlGenericError(xmlGenericErrorContext,
  11366. "PP: entering DTD\n");
  11367. #endif
  11368. break;
  11369. case XML_PARSER_ENTITY_VALUE:
  11370. xmlGenericError(xmlGenericErrorContext,
  11371. "PP: internal error, state == ENTITY_VALUE\n");
  11372. ctxt->instate = XML_PARSER_CONTENT;
  11373. #ifdef DEBUG_PUSH
  11374. xmlGenericError(xmlGenericErrorContext,
  11375. "PP: entering DTD\n");
  11376. #endif
  11377. break;
  11378. case XML_PARSER_ATTRIBUTE_VALUE:
  11379. xmlGenericError(xmlGenericErrorContext,
  11380. "PP: internal error, state == ATTRIBUTE_VALUE\n");
  11381. ctxt->instate = XML_PARSER_START_TAG;
  11382. #ifdef DEBUG_PUSH
  11383. xmlGenericError(xmlGenericErrorContext,
  11384. "PP: entering START_TAG\n");
  11385. #endif
  11386. break;
  11387. case XML_PARSER_SYSTEM_LITERAL:
  11388. xmlGenericError(xmlGenericErrorContext,
  11389. "PP: internal error, state == SYSTEM_LITERAL\n");
  11390. ctxt->instate = XML_PARSER_START_TAG;
  11391. #ifdef DEBUG_PUSH
  11392. xmlGenericError(xmlGenericErrorContext,
  11393. "PP: entering START_TAG\n");
  11394. #endif
  11395. break;
  11396. case XML_PARSER_PUBLIC_LITERAL:
  11397. xmlGenericError(xmlGenericErrorContext,
  11398. "PP: internal error, state == PUBLIC_LITERAL\n");
  11399. ctxt->instate = XML_PARSER_START_TAG;
  11400. #ifdef DEBUG_PUSH
  11401. xmlGenericError(xmlGenericErrorContext,
  11402. "PP: entering START_TAG\n");
  11403. #endif
  11404. break;
  11405. }
  11406. }
  11407. done:
  11408. #ifdef DEBUG_PUSH
  11409. xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
  11410. #endif
  11411. return(ret);
  11412. encoding_error:
  11413. {
  11414. char buffer[150];
  11415. snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
  11416. ctxt->input->cur[0], ctxt->input->cur[1],
  11417. ctxt->input->cur[2], ctxt->input->cur[3]);
  11418. __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
  11419. "Input is not proper UTF-8, indicate encoding !\n%s",
  11420. BAD_CAST buffer, NULL);
  11421. }
  11422. return(0);
  11423. }
  11424. /**
  11425. * xmlParseCheckTransition:
  11426. * @ctxt: an XML parser context
  11427. * @chunk: a char array
  11428. * @size: the size in byte of the chunk
  11429. *
  11430. * Check depending on the current parser state if the chunk given must be
  11431. * processed immediately or one need more data to advance on parsing.
  11432. *
  11433. * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
  11434. */
  11435. static int
  11436. xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
  11437. if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
  11438. return(-1);
  11439. if (ctxt->instate == XML_PARSER_START_TAG) {
  11440. if (memchr(chunk, '>', size) != NULL)
  11441. return(1);
  11442. return(0);
  11443. }
  11444. if (ctxt->progressive == XML_PARSER_COMMENT) {
  11445. if (memchr(chunk, '>', size) != NULL)
  11446. return(1);
  11447. return(0);
  11448. }
  11449. if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
  11450. if (memchr(chunk, '>', size) != NULL)
  11451. return(1);
  11452. return(0);
  11453. }
  11454. if (ctxt->progressive == XML_PARSER_PI) {
  11455. if (memchr(chunk, '>', size) != NULL)
  11456. return(1);
  11457. return(0);
  11458. }
  11459. if (ctxt->instate == XML_PARSER_END_TAG) {
  11460. if (memchr(chunk, '>', size) != NULL)
  11461. return(1);
  11462. return(0);
  11463. }
  11464. if ((ctxt->progressive == XML_PARSER_DTD) ||
  11465. (ctxt->instate == XML_PARSER_DTD)) {
  11466. if (memchr(chunk, '>', size) != NULL)
  11467. return(1);
  11468. return(0);
  11469. }
  11470. return(1);
  11471. }
  11472. /**
  11473. * xmlParseChunk:
  11474. * @ctxt: an XML parser context
  11475. * @chunk: an char array
  11476. * @size: the size in byte of the chunk
  11477. * @terminate: last chunk indicator
  11478. *
  11479. * Parse a Chunk of memory
  11480. *
  11481. * Returns zero if no error, the xmlParserErrors otherwise.
  11482. */
  11483. int
  11484. xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
  11485. int terminate) {
  11486. int end_in_lf = 0;
  11487. int remain = 0;
  11488. size_t old_avail = 0;
  11489. size_t avail = 0;
  11490. if (ctxt == NULL)
  11491. return(XML_ERR_INTERNAL_ERROR);
  11492. if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
  11493. return(ctxt->errNo);
  11494. if (ctxt->instate == XML_PARSER_EOF)
  11495. return(-1);
  11496. if (ctxt->instate == XML_PARSER_START)
  11497. xmlDetectSAX2(ctxt);
  11498. if ((size > 0) && (chunk != NULL) && (!terminate) &&
  11499. (chunk[size - 1] == '\r')) {
  11500. end_in_lf = 1;
  11501. size--;
  11502. }
  11503. xmldecl_done:
  11504. if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
  11505. (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
  11506. size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
  11507. size_t cur = ctxt->input->cur - ctxt->input->base;
  11508. int res;
  11509. old_avail = xmlBufUse(ctxt->input->buf->buffer);
  11510. /*
  11511. * Specific handling if we autodetected an encoding, we should not
  11512. * push more than the first line ... which depend on the encoding
  11513. * And only push the rest once the final encoding was detected
  11514. */
  11515. if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
  11516. (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
  11517. unsigned int len = 45;
  11518. if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
  11519. BAD_CAST "UTF-16")) ||
  11520. (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
  11521. BAD_CAST "UTF16")))
  11522. len = 90;
  11523. else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
  11524. BAD_CAST "UCS-4")) ||
  11525. (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
  11526. BAD_CAST "UCS4")))
  11527. len = 180;
  11528. if (ctxt->input->buf->rawconsumed < len)
  11529. len -= ctxt->input->buf->rawconsumed;
  11530. /*
  11531. * Change size for reading the initial declaration only
  11532. * if size is greater than len. Otherwise, memmove in xmlBufferAdd
  11533. * will blindly copy extra bytes from memory.
  11534. */
  11535. if ((unsigned int) size > len) {
  11536. remain = size - len;
  11537. size = len;
  11538. } else {
  11539. remain = 0;
  11540. }
  11541. }
  11542. res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
  11543. xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
  11544. if (res < 0) {
  11545. ctxt->errNo = XML_PARSER_EOF;
  11546. xmlHaltParser(ctxt);
  11547. return (XML_PARSER_EOF);
  11548. }
  11549. #ifdef DEBUG_PUSH
  11550. xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
  11551. #endif
  11552. } else if (ctxt->instate != XML_PARSER_EOF) {
  11553. if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
  11554. xmlParserInputBufferPtr in = ctxt->input->buf;
  11555. if ((in->encoder != NULL) && (in->buffer != NULL) &&
  11556. (in->raw != NULL)) {
  11557. int nbchars;
  11558. size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
  11559. size_t current = ctxt->input->cur - ctxt->input->base;
  11560. nbchars = xmlCharEncInput(in, terminate);
  11561. xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
  11562. if (nbchars < 0) {
  11563. /* TODO 2.6.0 */
  11564. xmlGenericError(xmlGenericErrorContext,
  11565. "xmlParseChunk: encoder error\n");
  11566. xmlHaltParser(ctxt);
  11567. return(XML_ERR_INVALID_ENCODING);
  11568. }
  11569. }
  11570. }
  11571. }
  11572. if (remain != 0) {
  11573. xmlParseTryOrFinish(ctxt, 0);
  11574. } else {
  11575. if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
  11576. avail = xmlBufUse(ctxt->input->buf->buffer);
  11577. /*
  11578. * Depending on the current state it may not be such
  11579. * a good idea to try parsing if there is nothing in the chunk
  11580. * which would be worth doing a parser state transition and we
  11581. * need to wait for more data
  11582. */
  11583. if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
  11584. (old_avail == 0) || (avail == 0) ||
  11585. (xmlParseCheckTransition(ctxt,
  11586. (const char *)&ctxt->input->base[old_avail],
  11587. avail - old_avail)))
  11588. xmlParseTryOrFinish(ctxt, terminate);
  11589. }
  11590. if (ctxt->instate == XML_PARSER_EOF)
  11591. return(ctxt->errNo);
  11592. if ((ctxt->input != NULL) &&
  11593. (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
  11594. ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
  11595. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  11596. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
  11597. xmlHaltParser(ctxt);
  11598. }
  11599. if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
  11600. return(ctxt->errNo);
  11601. if (remain != 0) {
  11602. chunk += size;
  11603. size = remain;
  11604. remain = 0;
  11605. goto xmldecl_done;
  11606. }
  11607. if ((end_in_lf == 1) && (ctxt->input != NULL) &&
  11608. (ctxt->input->buf != NULL)) {
  11609. size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
  11610. ctxt->input);
  11611. size_t current = ctxt->input->cur - ctxt->input->base;
  11612. xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
  11613. xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
  11614. base, current);
  11615. }
  11616. if (terminate) {
  11617. /*
  11618. * Check for termination
  11619. */
  11620. int cur_avail = 0;
  11621. if (ctxt->input != NULL) {
  11622. if (ctxt->input->buf == NULL)
  11623. cur_avail = ctxt->input->length -
  11624. (ctxt->input->cur - ctxt->input->base);
  11625. else
  11626. cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
  11627. (ctxt->input->cur - ctxt->input->base);
  11628. }
  11629. if ((ctxt->instate != XML_PARSER_EOF) &&
  11630. (ctxt->instate != XML_PARSER_EPILOG)) {
  11631. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
  11632. }
  11633. if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
  11634. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
  11635. }
  11636. if (ctxt->instate != XML_PARSER_EOF) {
  11637. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  11638. ctxt->sax->endDocument(ctxt->userData);
  11639. }
  11640. ctxt->instate = XML_PARSER_EOF;
  11641. }
  11642. if (ctxt->wellFormed == 0)
  11643. return((xmlParserErrors) ctxt->errNo);
  11644. else
  11645. return(0);
  11646. }
  11647. /************************************************************************
  11648. * *
  11649. * I/O front end functions to the parser *
  11650. * *
  11651. ************************************************************************/
  11652. /**
  11653. * xmlCreatePushParserCtxt:
  11654. * @sax: a SAX handler
  11655. * @user_data: The user data returned on SAX callbacks
  11656. * @chunk: a pointer to an array of chars
  11657. * @size: number of chars in the array
  11658. * @filename: an optional file name or URI
  11659. *
  11660. * Create a parser context for using the XML parser in push mode.
  11661. * If @buffer and @size are non-NULL, the data is used to detect
  11662. * the encoding. The remaining characters will be parsed so they
  11663. * don't need to be fed in again through xmlParseChunk.
  11664. * To allow content encoding detection, @size should be >= 4
  11665. * The value of @filename is used for fetching external entities
  11666. * and error/warning reports.
  11667. *
  11668. * Returns the new parser context or NULL
  11669. */
  11670. xmlParserCtxtPtr
  11671. xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
  11672. const char *chunk, int size, const char *filename) {
  11673. xmlParserCtxtPtr ctxt;
  11674. xmlParserInputPtr inputStream;
  11675. xmlParserInputBufferPtr buf;
  11676. xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
  11677. /*
  11678. * plug some encoding conversion routines
  11679. */
  11680. if ((chunk != NULL) && (size >= 4))
  11681. enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
  11682. buf = xmlAllocParserInputBuffer(enc);
  11683. if (buf == NULL) return(NULL);
  11684. ctxt = xmlNewParserCtxt();
  11685. if (ctxt == NULL) {
  11686. xmlErrMemory(NULL, "creating parser: out of memory\n");
  11687. xmlFreeParserInputBuffer(buf);
  11688. return(NULL);
  11689. }
  11690. ctxt->dictNames = 1;
  11691. if (sax != NULL) {
  11692. #ifdef LIBXML_SAX1_ENABLED
  11693. if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
  11694. #endif /* LIBXML_SAX1_ENABLED */
  11695. xmlFree(ctxt->sax);
  11696. ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
  11697. if (ctxt->sax == NULL) {
  11698. xmlErrMemory(ctxt, NULL);
  11699. xmlFreeParserInputBuffer(buf);
  11700. xmlFreeParserCtxt(ctxt);
  11701. return(NULL);
  11702. }
  11703. memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
  11704. if (sax->initialized == XML_SAX2_MAGIC)
  11705. memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
  11706. else
  11707. memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
  11708. if (user_data != NULL)
  11709. ctxt->userData = user_data;
  11710. }
  11711. if (filename == NULL) {
  11712. ctxt->directory = NULL;
  11713. } else {
  11714. ctxt->directory = xmlParserGetDirectory(filename);
  11715. }
  11716. inputStream = xmlNewInputStream(ctxt);
  11717. if (inputStream == NULL) {
  11718. xmlFreeParserCtxt(ctxt);
  11719. xmlFreeParserInputBuffer(buf);
  11720. return(NULL);
  11721. }
  11722. if (filename == NULL)
  11723. inputStream->filename = NULL;
  11724. else {
  11725. inputStream->filename = (char *)
  11726. xmlCanonicPath((const xmlChar *) filename);
  11727. if (inputStream->filename == NULL) {
  11728. xmlFreeParserCtxt(ctxt);
  11729. xmlFreeParserInputBuffer(buf);
  11730. return(NULL);
  11731. }
  11732. }
  11733. inputStream->buf = buf;
  11734. xmlBufResetInput(inputStream->buf->buffer, inputStream);
  11735. inputPush(ctxt, inputStream);
  11736. /*
  11737. * If the caller didn't provide an initial 'chunk' for determining
  11738. * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
  11739. * that it can be automatically determined later
  11740. */
  11741. if ((size == 0) || (chunk == NULL)) {
  11742. ctxt->charset = XML_CHAR_ENCODING_NONE;
  11743. } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
  11744. size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
  11745. size_t cur = ctxt->input->cur - ctxt->input->base;
  11746. xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
  11747. xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
  11748. #ifdef DEBUG_PUSH
  11749. xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
  11750. #endif
  11751. }
  11752. if (enc != XML_CHAR_ENCODING_NONE) {
  11753. xmlSwitchEncoding(ctxt, enc);
  11754. }
  11755. return(ctxt);
  11756. }
  11757. #endif /* LIBXML_PUSH_ENABLED */
  11758. /**
  11759. * xmlHaltParser:
  11760. * @ctxt: an XML parser context
  11761. *
  11762. * Blocks further parser processing don't override error
  11763. * for internal use
  11764. */
  11765. static void
  11766. xmlHaltParser(xmlParserCtxtPtr ctxt) {
  11767. if (ctxt == NULL)
  11768. return;
  11769. ctxt->instate = XML_PARSER_EOF;
  11770. ctxt->disableSAX = 1;
  11771. while (ctxt->inputNr > 1)
  11772. xmlFreeInputStream(inputPop(ctxt));
  11773. if (ctxt->input != NULL) {
  11774. /*
  11775. * in case there was a specific allocation deallocate before
  11776. * overriding base
  11777. */
  11778. if (ctxt->input->free != NULL) {
  11779. ctxt->input->free((xmlChar *) ctxt->input->base);
  11780. ctxt->input->free = NULL;
  11781. }
  11782. if (ctxt->input->buf != NULL) {
  11783. xmlFreeParserInputBuffer(ctxt->input->buf);
  11784. ctxt->input->buf = NULL;
  11785. }
  11786. ctxt->input->cur = BAD_CAST"";
  11787. ctxt->input->length = 0;
  11788. ctxt->input->base = ctxt->input->cur;
  11789. ctxt->input->end = ctxt->input->cur;
  11790. }
  11791. }
  11792. /**
  11793. * xmlStopParser:
  11794. * @ctxt: an XML parser context
  11795. *
  11796. * Blocks further parser processing
  11797. */
  11798. void
  11799. xmlStopParser(xmlParserCtxtPtr ctxt) {
  11800. if (ctxt == NULL)
  11801. return;
  11802. xmlHaltParser(ctxt);
  11803. ctxt->errNo = XML_ERR_USER_STOP;
  11804. }
  11805. /**
  11806. * xmlCreateIOParserCtxt:
  11807. * @sax: a SAX handler
  11808. * @user_data: The user data returned on SAX callbacks
  11809. * @ioread: an I/O read function
  11810. * @ioclose: an I/O close function
  11811. * @ioctx: an I/O handler
  11812. * @enc: the charset encoding if known
  11813. *
  11814. * Create a parser context for using the XML parser with an existing
  11815. * I/O stream
  11816. *
  11817. * Returns the new parser context or NULL
  11818. */
  11819. xmlParserCtxtPtr
  11820. xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
  11821. xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
  11822. void *ioctx, xmlCharEncoding enc) {
  11823. xmlParserCtxtPtr ctxt;
  11824. xmlParserInputPtr inputStream;
  11825. xmlParserInputBufferPtr buf;
  11826. if (ioread == NULL) return(NULL);
  11827. buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
  11828. if (buf == NULL) {
  11829. if (ioclose != NULL)
  11830. ioclose(ioctx);
  11831. return (NULL);
  11832. }
  11833. ctxt = xmlNewParserCtxt();
  11834. if (ctxt == NULL) {
  11835. xmlFreeParserInputBuffer(buf);
  11836. return(NULL);
  11837. }
  11838. if (sax != NULL) {
  11839. #ifdef LIBXML_SAX1_ENABLED
  11840. if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
  11841. #endif /* LIBXML_SAX1_ENABLED */
  11842. xmlFree(ctxt->sax);
  11843. ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
  11844. if (ctxt->sax == NULL) {
  11845. xmlFreeParserInputBuffer(buf);
  11846. xmlErrMemory(ctxt, NULL);
  11847. xmlFreeParserCtxt(ctxt);
  11848. return(NULL);
  11849. }
  11850. memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
  11851. if (sax->initialized == XML_SAX2_MAGIC)
  11852. memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
  11853. else
  11854. memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
  11855. if (user_data != NULL)
  11856. ctxt->userData = user_data;
  11857. }
  11858. inputStream = xmlNewIOInputStream(ctxt, buf, enc);
  11859. if (inputStream == NULL) {
  11860. xmlFreeParserCtxt(ctxt);
  11861. return(NULL);
  11862. }
  11863. inputPush(ctxt, inputStream);
  11864. return(ctxt);
  11865. }
  11866. #ifdef LIBXML_VALID_ENABLED
  11867. /************************************************************************
  11868. * *
  11869. * Front ends when parsing a DTD *
  11870. * *
  11871. ************************************************************************/
  11872. /**
  11873. * xmlIOParseDTD:
  11874. * @sax: the SAX handler block or NULL
  11875. * @input: an Input Buffer
  11876. * @enc: the charset encoding if known
  11877. *
  11878. * Load and parse a DTD
  11879. *
  11880. * Returns the resulting xmlDtdPtr or NULL in case of error.
  11881. * @input will be freed by the function in any case.
  11882. */
  11883. xmlDtdPtr
  11884. xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
  11885. xmlCharEncoding enc) {
  11886. xmlDtdPtr ret = NULL;
  11887. xmlParserCtxtPtr ctxt;
  11888. xmlParserInputPtr pinput = NULL;
  11889. xmlChar start[4];
  11890. if (input == NULL)
  11891. return(NULL);
  11892. ctxt = xmlNewParserCtxt();
  11893. if (ctxt == NULL) {
  11894. xmlFreeParserInputBuffer(input);
  11895. return(NULL);
  11896. }
  11897. /* We are loading a DTD */
  11898. ctxt->options |= XML_PARSE_DTDLOAD;
  11899. /*
  11900. * Set-up the SAX context
  11901. */
  11902. if (sax != NULL) {
  11903. if (ctxt->sax != NULL)
  11904. xmlFree(ctxt->sax);
  11905. ctxt->sax = sax;
  11906. ctxt->userData = ctxt;
  11907. }
  11908. xmlDetectSAX2(ctxt);
  11909. /*
  11910. * generate a parser input from the I/O handler
  11911. */
  11912. pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  11913. if (pinput == NULL) {
  11914. if (sax != NULL) ctxt->sax = NULL;
  11915. xmlFreeParserInputBuffer(input);
  11916. xmlFreeParserCtxt(ctxt);
  11917. return(NULL);
  11918. }
  11919. /*
  11920. * plug some encoding conversion routines here.
  11921. */
  11922. if (xmlPushInput(ctxt, pinput) < 0) {
  11923. if (sax != NULL) ctxt->sax = NULL;
  11924. xmlFreeParserCtxt(ctxt);
  11925. return(NULL);
  11926. }
  11927. if (enc != XML_CHAR_ENCODING_NONE) {
  11928. xmlSwitchEncoding(ctxt, enc);
  11929. }
  11930. pinput->filename = NULL;
  11931. pinput->line = 1;
  11932. pinput->col = 1;
  11933. pinput->base = ctxt->input->cur;
  11934. pinput->cur = ctxt->input->cur;
  11935. pinput->free = NULL;
  11936. /*
  11937. * let's parse that entity knowing it's an external subset.
  11938. */
  11939. ctxt->inSubset = 2;
  11940. ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
  11941. if (ctxt->myDoc == NULL) {
  11942. xmlErrMemory(ctxt, "New Doc failed");
  11943. return(NULL);
  11944. }
  11945. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  11946. ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
  11947. BAD_CAST "none", BAD_CAST "none");
  11948. if ((enc == XML_CHAR_ENCODING_NONE) &&
  11949. ((ctxt->input->end - ctxt->input->cur) >= 4)) {
  11950. /*
  11951. * Get the 4 first bytes and decode the charset
  11952. * if enc != XML_CHAR_ENCODING_NONE
  11953. * plug some encoding conversion routines.
  11954. */
  11955. start[0] = RAW;
  11956. start[1] = NXT(1);
  11957. start[2] = NXT(2);
  11958. start[3] = NXT(3);
  11959. enc = xmlDetectCharEncoding(start, 4);
  11960. if (enc != XML_CHAR_ENCODING_NONE) {
  11961. xmlSwitchEncoding(ctxt, enc);
  11962. }
  11963. }
  11964. xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
  11965. if (ctxt->myDoc != NULL) {
  11966. if (ctxt->wellFormed) {
  11967. ret = ctxt->myDoc->extSubset;
  11968. ctxt->myDoc->extSubset = NULL;
  11969. if (ret != NULL) {
  11970. xmlNodePtr tmp;
  11971. ret->doc = NULL;
  11972. tmp = ret->children;
  11973. while (tmp != NULL) {
  11974. tmp->doc = NULL;
  11975. tmp = tmp->next;
  11976. }
  11977. }
  11978. } else {
  11979. ret = NULL;
  11980. }
  11981. xmlFreeDoc(ctxt->myDoc);
  11982. ctxt->myDoc = NULL;
  11983. }
  11984. if (sax != NULL) ctxt->sax = NULL;
  11985. xmlFreeParserCtxt(ctxt);
  11986. return(ret);
  11987. }
  11988. /**
  11989. * xmlSAXParseDTD:
  11990. * @sax: the SAX handler block
  11991. * @ExternalID: a NAME* containing the External ID of the DTD
  11992. * @SystemID: a NAME* containing the URL to the DTD
  11993. *
  11994. * Load and parse an external subset.
  11995. *
  11996. * Returns the resulting xmlDtdPtr or NULL in case of error.
  11997. */
  11998. xmlDtdPtr
  11999. xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
  12000. const xmlChar *SystemID) {
  12001. xmlDtdPtr ret = NULL;
  12002. xmlParserCtxtPtr ctxt;
  12003. xmlParserInputPtr input = NULL;
  12004. xmlCharEncoding enc;
  12005. xmlChar* systemIdCanonic;
  12006. if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
  12007. ctxt = xmlNewParserCtxt();
  12008. if (ctxt == NULL) {
  12009. return(NULL);
  12010. }
  12011. /* We are loading a DTD */
  12012. ctxt->options |= XML_PARSE_DTDLOAD;
  12013. /*
  12014. * Set-up the SAX context
  12015. */
  12016. if (sax != NULL) {
  12017. if (ctxt->sax != NULL)
  12018. xmlFree(ctxt->sax);
  12019. ctxt->sax = sax;
  12020. ctxt->userData = ctxt;
  12021. }
  12022. /*
  12023. * Canonicalise the system ID
  12024. */
  12025. systemIdCanonic = xmlCanonicPath(SystemID);
  12026. if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
  12027. xmlFreeParserCtxt(ctxt);
  12028. return(NULL);
  12029. }
  12030. /*
  12031. * Ask the Entity resolver to load the damn thing
  12032. */
  12033. if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
  12034. input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
  12035. systemIdCanonic);
  12036. if (input == NULL) {
  12037. if (sax != NULL) ctxt->sax = NULL;
  12038. xmlFreeParserCtxt(ctxt);
  12039. if (systemIdCanonic != NULL)
  12040. xmlFree(systemIdCanonic);
  12041. return(NULL);
  12042. }
  12043. /*
  12044. * plug some encoding conversion routines here.
  12045. */
  12046. if (xmlPushInput(ctxt, input) < 0) {
  12047. if (sax != NULL) ctxt->sax = NULL;
  12048. xmlFreeParserCtxt(ctxt);
  12049. if (systemIdCanonic != NULL)
  12050. xmlFree(systemIdCanonic);
  12051. return(NULL);
  12052. }
  12053. if ((ctxt->input->end - ctxt->input->cur) >= 4) {
  12054. enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
  12055. xmlSwitchEncoding(ctxt, enc);
  12056. }
  12057. if (input->filename == NULL)
  12058. input->filename = (char *) systemIdCanonic;
  12059. else
  12060. xmlFree(systemIdCanonic);
  12061. input->line = 1;
  12062. input->col = 1;
  12063. input->base = ctxt->input->cur;
  12064. input->cur = ctxt->input->cur;
  12065. input->free = NULL;
  12066. /*
  12067. * let's parse that entity knowing it's an external subset.
  12068. */
  12069. ctxt->inSubset = 2;
  12070. ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
  12071. if (ctxt->myDoc == NULL) {
  12072. xmlErrMemory(ctxt, "New Doc failed");
  12073. if (sax != NULL) ctxt->sax = NULL;
  12074. xmlFreeParserCtxt(ctxt);
  12075. return(NULL);
  12076. }
  12077. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  12078. ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
  12079. ExternalID, SystemID);
  12080. xmlParseExternalSubset(ctxt, ExternalID, SystemID);
  12081. if (ctxt->myDoc != NULL) {
  12082. if (ctxt->wellFormed) {
  12083. ret = ctxt->myDoc->extSubset;
  12084. ctxt->myDoc->extSubset = NULL;
  12085. if (ret != NULL) {
  12086. xmlNodePtr tmp;
  12087. ret->doc = NULL;
  12088. tmp = ret->children;
  12089. while (tmp != NULL) {
  12090. tmp->doc = NULL;
  12091. tmp = tmp->next;
  12092. }
  12093. }
  12094. } else {
  12095. ret = NULL;
  12096. }
  12097. xmlFreeDoc(ctxt->myDoc);
  12098. ctxt->myDoc = NULL;
  12099. }
  12100. if (sax != NULL) ctxt->sax = NULL;
  12101. xmlFreeParserCtxt(ctxt);
  12102. return(ret);
  12103. }
  12104. /**
  12105. * xmlParseDTD:
  12106. * @ExternalID: a NAME* containing the External ID of the DTD
  12107. * @SystemID: a NAME* containing the URL to the DTD
  12108. *
  12109. * Load and parse an external subset.
  12110. *
  12111. * Returns the resulting xmlDtdPtr or NULL in case of error.
  12112. */
  12113. xmlDtdPtr
  12114. xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
  12115. return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
  12116. }
  12117. #endif /* LIBXML_VALID_ENABLED */
  12118. /************************************************************************
  12119. * *
  12120. * Front ends when parsing an Entity *
  12121. * *
  12122. ************************************************************************/
  12123. /**
  12124. * xmlParseCtxtExternalEntity:
  12125. * @ctx: the existing parsing context
  12126. * @URL: the URL for the entity to load
  12127. * @ID: the System ID for the entity to load
  12128. * @lst: the return value for the set of parsed nodes
  12129. *
  12130. * Parse an external general entity within an existing parsing context
  12131. * An external general parsed entity is well-formed if it matches the
  12132. * production labeled extParsedEnt.
  12133. *
  12134. * [78] extParsedEnt ::= TextDecl? content
  12135. *
  12136. * Returns 0 if the entity is well formed, -1 in case of args problem and
  12137. * the parser error code otherwise
  12138. */
  12139. int
  12140. xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
  12141. const xmlChar *ID, xmlNodePtr *lst) {
  12142. void *userData;
  12143. if (ctx == NULL) return(-1);
  12144. /*
  12145. * If the user provided their own SAX callbacks, then reuse the
  12146. * userData callback field, otherwise the expected setup in a
  12147. * DOM builder is to have userData == ctxt
  12148. */
  12149. if (ctx->userData == ctx)
  12150. userData = NULL;
  12151. else
  12152. userData = ctx->userData;
  12153. return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
  12154. userData, ctx->depth + 1,
  12155. URL, ID, lst);
  12156. }
  12157. /**
  12158. * xmlParseExternalEntityPrivate:
  12159. * @doc: the document the chunk pertains to
  12160. * @oldctxt: the previous parser context if available
  12161. * @sax: the SAX handler block (possibly NULL)
  12162. * @user_data: The user data returned on SAX callbacks (possibly NULL)
  12163. * @depth: Used for loop detection, use 0
  12164. * @URL: the URL for the entity to load
  12165. * @ID: the System ID for the entity to load
  12166. * @list: the return value for the set of parsed nodes
  12167. *
  12168. * Private version of xmlParseExternalEntity()
  12169. *
  12170. * Returns 0 if the entity is well formed, -1 in case of args problem and
  12171. * the parser error code otherwise
  12172. */
  12173. static xmlParserErrors
  12174. xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
  12175. xmlSAXHandlerPtr sax,
  12176. void *user_data, int depth, const xmlChar *URL,
  12177. const xmlChar *ID, xmlNodePtr *list) {
  12178. xmlParserCtxtPtr ctxt;
  12179. xmlDocPtr newDoc;
  12180. xmlNodePtr newRoot;
  12181. xmlSAXHandlerPtr oldsax = NULL;
  12182. xmlParserErrors ret = XML_ERR_OK;
  12183. xmlChar start[4];
  12184. xmlCharEncoding enc;
  12185. if (((depth > 40) &&
  12186. ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
  12187. (depth > 1024)) {
  12188. return(XML_ERR_ENTITY_LOOP);
  12189. }
  12190. if (list != NULL)
  12191. *list = NULL;
  12192. if ((URL == NULL) && (ID == NULL))
  12193. return(XML_ERR_INTERNAL_ERROR);
  12194. if (doc == NULL)
  12195. return(XML_ERR_INTERNAL_ERROR);
  12196. ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
  12197. if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
  12198. ctxt->userData = ctxt;
  12199. if (sax != NULL) {
  12200. oldsax = ctxt->sax;
  12201. ctxt->sax = sax;
  12202. if (user_data != NULL)
  12203. ctxt->userData = user_data;
  12204. }
  12205. xmlDetectSAX2(ctxt);
  12206. newDoc = xmlNewDoc(BAD_CAST "1.0");
  12207. if (newDoc == NULL) {
  12208. xmlFreeParserCtxt(ctxt);
  12209. return(XML_ERR_INTERNAL_ERROR);
  12210. }
  12211. newDoc->properties = XML_DOC_INTERNAL;
  12212. if (doc) {
  12213. newDoc->intSubset = doc->intSubset;
  12214. newDoc->extSubset = doc->extSubset;
  12215. if (doc->dict) {
  12216. newDoc->dict = doc->dict;
  12217. xmlDictReference(newDoc->dict);
  12218. }
  12219. if (doc->URL != NULL) {
  12220. newDoc->URL = xmlStrdup(doc->URL);
  12221. }
  12222. }
  12223. newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
  12224. if (newRoot == NULL) {
  12225. if (sax != NULL)
  12226. ctxt->sax = oldsax;
  12227. xmlFreeParserCtxt(ctxt);
  12228. newDoc->intSubset = NULL;
  12229. newDoc->extSubset = NULL;
  12230. xmlFreeDoc(newDoc);
  12231. return(XML_ERR_INTERNAL_ERROR);
  12232. }
  12233. xmlAddChild((xmlNodePtr) newDoc, newRoot);
  12234. nodePush(ctxt, newDoc->children);
  12235. if (doc == NULL) {
  12236. ctxt->myDoc = newDoc;
  12237. } else {
  12238. ctxt->myDoc = doc;
  12239. newRoot->doc = doc;
  12240. }
  12241. /*
  12242. * Get the 4 first bytes and decode the charset
  12243. * if enc != XML_CHAR_ENCODING_NONE
  12244. * plug some encoding conversion routines.
  12245. */
  12246. GROW;
  12247. if ((ctxt->input->end - ctxt->input->cur) >= 4) {
  12248. start[0] = RAW;
  12249. start[1] = NXT(1);
  12250. start[2] = NXT(2);
  12251. start[3] = NXT(3);
  12252. enc = xmlDetectCharEncoding(start, 4);
  12253. if (enc != XML_CHAR_ENCODING_NONE) {
  12254. xmlSwitchEncoding(ctxt, enc);
  12255. }
  12256. }
  12257. /*
  12258. * Parse a possible text declaration first
  12259. */
  12260. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  12261. xmlParseTextDecl(ctxt);
  12262. /*
  12263. * An XML-1.0 document can't reference an entity not XML-1.0
  12264. */
  12265. if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
  12266. (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
  12267. xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
  12268. "Version mismatch between document and entity\n");
  12269. }
  12270. }
  12271. ctxt->instate = XML_PARSER_CONTENT;
  12272. ctxt->depth = depth;
  12273. if (oldctxt != NULL) {
  12274. ctxt->_private = oldctxt->_private;
  12275. ctxt->loadsubset = oldctxt->loadsubset;
  12276. ctxt->validate = oldctxt->validate;
  12277. ctxt->valid = oldctxt->valid;
  12278. ctxt->replaceEntities = oldctxt->replaceEntities;
  12279. if (oldctxt->validate) {
  12280. ctxt->vctxt.error = oldctxt->vctxt.error;
  12281. ctxt->vctxt.warning = oldctxt->vctxt.warning;
  12282. ctxt->vctxt.userData = oldctxt->vctxt.userData;
  12283. }
  12284. ctxt->external = oldctxt->external;
  12285. if (ctxt->dict) xmlDictFree(ctxt->dict);
  12286. ctxt->dict = oldctxt->dict;
  12287. ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
  12288. ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
  12289. ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
  12290. ctxt->dictNames = oldctxt->dictNames;
  12291. ctxt->attsDefault = oldctxt->attsDefault;
  12292. ctxt->attsSpecial = oldctxt->attsSpecial;
  12293. ctxt->linenumbers = oldctxt->linenumbers;
  12294. ctxt->record_info = oldctxt->record_info;
  12295. ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
  12296. ctxt->node_seq.length = oldctxt->node_seq.length;
  12297. ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
  12298. } else {
  12299. /*
  12300. * Doing validity checking on chunk without context
  12301. * doesn't make sense
  12302. */
  12303. ctxt->_private = NULL;
  12304. ctxt->validate = 0;
  12305. ctxt->external = 2;
  12306. ctxt->loadsubset = 0;
  12307. }
  12308. xmlParseContent(ctxt);
  12309. if ((RAW == '<') && (NXT(1) == '/')) {
  12310. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12311. } else if (RAW != 0) {
  12312. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  12313. }
  12314. if (ctxt->node != newDoc->children) {
  12315. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12316. }
  12317. if (!ctxt->wellFormed) {
  12318. if (ctxt->errNo == 0)
  12319. ret = XML_ERR_INTERNAL_ERROR;
  12320. else
  12321. ret = (xmlParserErrors)ctxt->errNo;
  12322. } else {
  12323. if (list != NULL) {
  12324. xmlNodePtr cur;
  12325. /*
  12326. * Return the newly created nodeset after unlinking it from
  12327. * they pseudo parent.
  12328. */
  12329. cur = newDoc->children->children;
  12330. *list = cur;
  12331. while (cur != NULL) {
  12332. cur->parent = NULL;
  12333. cur = cur->next;
  12334. }
  12335. newDoc->children->children = NULL;
  12336. }
  12337. ret = XML_ERR_OK;
  12338. }
  12339. /*
  12340. * Record in the parent context the number of entities replacement
  12341. * done when parsing that reference.
  12342. */
  12343. if (oldctxt != NULL)
  12344. oldctxt->nbentities += ctxt->nbentities;
  12345. /*
  12346. * Also record the size of the entity parsed
  12347. */
  12348. if (ctxt->input != NULL && oldctxt != NULL) {
  12349. oldctxt->sizeentities += ctxt->input->consumed;
  12350. oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
  12351. }
  12352. /*
  12353. * And record the last error if any
  12354. */
  12355. if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
  12356. xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
  12357. if (sax != NULL)
  12358. ctxt->sax = oldsax;
  12359. if (oldctxt != NULL) {
  12360. ctxt->dict = NULL;
  12361. ctxt->attsDefault = NULL;
  12362. ctxt->attsSpecial = NULL;
  12363. oldctxt->validate = ctxt->validate;
  12364. oldctxt->valid = ctxt->valid;
  12365. oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
  12366. oldctxt->node_seq.length = ctxt->node_seq.length;
  12367. oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
  12368. }
  12369. ctxt->node_seq.maximum = 0;
  12370. ctxt->node_seq.length = 0;
  12371. ctxt->node_seq.buffer = NULL;
  12372. xmlFreeParserCtxt(ctxt);
  12373. newDoc->intSubset = NULL;
  12374. newDoc->extSubset = NULL;
  12375. xmlFreeDoc(newDoc);
  12376. return(ret);
  12377. }
  12378. #ifdef LIBXML_SAX1_ENABLED
  12379. /**
  12380. * xmlParseExternalEntity:
  12381. * @doc: the document the chunk pertains to
  12382. * @sax: the SAX handler block (possibly NULL)
  12383. * @user_data: The user data returned on SAX callbacks (possibly NULL)
  12384. * @depth: Used for loop detection, use 0
  12385. * @URL: the URL for the entity to load
  12386. * @ID: the System ID for the entity to load
  12387. * @lst: the return value for the set of parsed nodes
  12388. *
  12389. * Parse an external general entity
  12390. * An external general parsed entity is well-formed if it matches the
  12391. * production labeled extParsedEnt.
  12392. *
  12393. * [78] extParsedEnt ::= TextDecl? content
  12394. *
  12395. * Returns 0 if the entity is well formed, -1 in case of args problem and
  12396. * the parser error code otherwise
  12397. */
  12398. int
  12399. xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
  12400. int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
  12401. return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
  12402. ID, lst));
  12403. }
  12404. /**
  12405. * xmlParseBalancedChunkMemory:
  12406. * @doc: the document the chunk pertains to (must not be NULL)
  12407. * @sax: the SAX handler block (possibly NULL)
  12408. * @user_data: The user data returned on SAX callbacks (possibly NULL)
  12409. * @depth: Used for loop detection, use 0
  12410. * @string: the input string in UTF8 or ISO-Latin (zero terminated)
  12411. * @lst: the return value for the set of parsed nodes
  12412. *
  12413. * Parse a well-balanced chunk of an XML document
  12414. * called by the parser
  12415. * The allowed sequence for the Well Balanced Chunk is the one defined by
  12416. * the content production in the XML grammar:
  12417. *
  12418. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  12419. *
  12420. * Returns 0 if the chunk is well balanced, -1 in case of args problem and
  12421. * the parser error code otherwise
  12422. */
  12423. int
  12424. xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
  12425. void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
  12426. return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
  12427. depth, string, lst, 0 );
  12428. }
  12429. #endif /* LIBXML_SAX1_ENABLED */
  12430. /**
  12431. * xmlParseBalancedChunkMemoryInternal:
  12432. * @oldctxt: the existing parsing context
  12433. * @string: the input string in UTF8 or ISO-Latin (zero terminated)
  12434. * @user_data: the user data field for the parser context
  12435. * @lst: the return value for the set of parsed nodes
  12436. *
  12437. *
  12438. * Parse a well-balanced chunk of an XML document
  12439. * called by the parser
  12440. * The allowed sequence for the Well Balanced Chunk is the one defined by
  12441. * the content production in the XML grammar:
  12442. *
  12443. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  12444. *
  12445. * Returns XML_ERR_OK if the chunk is well balanced, and the parser
  12446. * error code otherwise
  12447. *
  12448. * In case recover is set to 1, the nodelist will not be empty even if
  12449. * the parsed chunk is not well balanced.
  12450. */
  12451. static xmlParserErrors
  12452. xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
  12453. const xmlChar *string, void *user_data, xmlNodePtr *lst) {
  12454. xmlParserCtxtPtr ctxt;
  12455. xmlDocPtr newDoc = NULL;
  12456. xmlNodePtr newRoot;
  12457. xmlSAXHandlerPtr oldsax = NULL;
  12458. xmlNodePtr content = NULL;
  12459. xmlNodePtr last = NULL;
  12460. int size;
  12461. xmlParserErrors ret = XML_ERR_OK;
  12462. #ifdef SAX2
  12463. int i;
  12464. #endif
  12465. if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
  12466. (oldctxt->depth > 1024)) {
  12467. return(XML_ERR_ENTITY_LOOP);
  12468. }
  12469. if (lst != NULL)
  12470. *lst = NULL;
  12471. if (string == NULL)
  12472. return(XML_ERR_INTERNAL_ERROR);
  12473. size = xmlStrlen(string);
  12474. ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
  12475. if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
  12476. if (user_data != NULL)
  12477. ctxt->userData = user_data;
  12478. else
  12479. ctxt->userData = ctxt;
  12480. if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
  12481. ctxt->dict = oldctxt->dict;
  12482. ctxt->input_id = oldctxt->input_id + 1;
  12483. ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
  12484. ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
  12485. ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
  12486. #ifdef SAX2
  12487. /* propagate namespaces down the entity */
  12488. for (i = 0;i < oldctxt->nsNr;i += 2) {
  12489. nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
  12490. }
  12491. #endif
  12492. oldsax = ctxt->sax;
  12493. ctxt->sax = oldctxt->sax;
  12494. xmlDetectSAX2(ctxt);
  12495. ctxt->replaceEntities = oldctxt->replaceEntities;
  12496. ctxt->options = oldctxt->options;
  12497. ctxt->_private = oldctxt->_private;
  12498. if (oldctxt->myDoc == NULL) {
  12499. newDoc = xmlNewDoc(BAD_CAST "1.0");
  12500. if (newDoc == NULL) {
  12501. ctxt->sax = oldsax;
  12502. ctxt->dict = NULL;
  12503. xmlFreeParserCtxt(ctxt);
  12504. return(XML_ERR_INTERNAL_ERROR);
  12505. }
  12506. newDoc->properties = XML_DOC_INTERNAL;
  12507. newDoc->dict = ctxt->dict;
  12508. xmlDictReference(newDoc->dict);
  12509. ctxt->myDoc = newDoc;
  12510. } else {
  12511. ctxt->myDoc = oldctxt->myDoc;
  12512. content = ctxt->myDoc->children;
  12513. last = ctxt->myDoc->last;
  12514. }
  12515. newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
  12516. if (newRoot == NULL) {
  12517. ctxt->sax = oldsax;
  12518. ctxt->dict = NULL;
  12519. xmlFreeParserCtxt(ctxt);
  12520. if (newDoc != NULL) {
  12521. xmlFreeDoc(newDoc);
  12522. }
  12523. return(XML_ERR_INTERNAL_ERROR);
  12524. }
  12525. ctxt->myDoc->children = NULL;
  12526. ctxt->myDoc->last = NULL;
  12527. xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
  12528. nodePush(ctxt, ctxt->myDoc->children);
  12529. ctxt->instate = XML_PARSER_CONTENT;
  12530. ctxt->depth = oldctxt->depth + 1;
  12531. ctxt->validate = 0;
  12532. ctxt->loadsubset = oldctxt->loadsubset;
  12533. if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
  12534. /*
  12535. * ID/IDREF registration will be done in xmlValidateElement below
  12536. */
  12537. ctxt->loadsubset |= XML_SKIP_IDS;
  12538. }
  12539. ctxt->dictNames = oldctxt->dictNames;
  12540. ctxt->attsDefault = oldctxt->attsDefault;
  12541. ctxt->attsSpecial = oldctxt->attsSpecial;
  12542. xmlParseContent(ctxt);
  12543. if ((RAW == '<') && (NXT(1) == '/')) {
  12544. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12545. } else if (RAW != 0) {
  12546. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  12547. }
  12548. if (ctxt->node != ctxt->myDoc->children) {
  12549. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12550. }
  12551. if (!ctxt->wellFormed) {
  12552. if (ctxt->errNo == 0)
  12553. ret = XML_ERR_INTERNAL_ERROR;
  12554. else
  12555. ret = (xmlParserErrors)ctxt->errNo;
  12556. } else {
  12557. ret = XML_ERR_OK;
  12558. }
  12559. if ((lst != NULL) && (ret == XML_ERR_OK)) {
  12560. xmlNodePtr cur;
  12561. /*
  12562. * Return the newly created nodeset after unlinking it from
  12563. * they pseudo parent.
  12564. */
  12565. cur = ctxt->myDoc->children->children;
  12566. *lst = cur;
  12567. while (cur != NULL) {
  12568. #ifdef LIBXML_VALID_ENABLED
  12569. if ((oldctxt->validate) && (oldctxt->wellFormed) &&
  12570. (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
  12571. (cur->type == XML_ELEMENT_NODE)) {
  12572. oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
  12573. oldctxt->myDoc, cur);
  12574. }
  12575. #endif /* LIBXML_VALID_ENABLED */
  12576. cur->parent = NULL;
  12577. cur = cur->next;
  12578. }
  12579. ctxt->myDoc->children->children = NULL;
  12580. }
  12581. if (ctxt->myDoc != NULL) {
  12582. xmlFreeNode(ctxt->myDoc->children);
  12583. ctxt->myDoc->children = content;
  12584. ctxt->myDoc->last = last;
  12585. }
  12586. /*
  12587. * Record in the parent context the number of entities replacement
  12588. * done when parsing that reference.
  12589. */
  12590. if (oldctxt != NULL)
  12591. oldctxt->nbentities += ctxt->nbentities;
  12592. /*
  12593. * Also record the last error if any
  12594. */
  12595. if (ctxt->lastError.code != XML_ERR_OK)
  12596. xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
  12597. ctxt->sax = oldsax;
  12598. ctxt->dict = NULL;
  12599. ctxt->attsDefault = NULL;
  12600. ctxt->attsSpecial = NULL;
  12601. xmlFreeParserCtxt(ctxt);
  12602. if (newDoc != NULL) {
  12603. xmlFreeDoc(newDoc);
  12604. }
  12605. return(ret);
  12606. }
  12607. /**
  12608. * xmlParseInNodeContext:
  12609. * @node: the context node
  12610. * @data: the input string
  12611. * @datalen: the input string length in bytes
  12612. * @options: a combination of xmlParserOption
  12613. * @lst: the return value for the set of parsed nodes
  12614. *
  12615. * Parse a well-balanced chunk of an XML document
  12616. * within the context (DTD, namespaces, etc ...) of the given node.
  12617. *
  12618. * The allowed sequence for the data is a Well Balanced Chunk defined by
  12619. * the content production in the XML grammar:
  12620. *
  12621. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  12622. *
  12623. * Returns XML_ERR_OK if the chunk is well balanced, and the parser
  12624. * error code otherwise
  12625. */
  12626. xmlParserErrors
  12627. xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
  12628. int options, xmlNodePtr *lst) {
  12629. #ifdef SAX2
  12630. xmlParserCtxtPtr ctxt;
  12631. xmlDocPtr doc = NULL;
  12632. xmlNodePtr fake, cur;
  12633. int nsnr = 0;
  12634. xmlParserErrors ret = XML_ERR_OK;
  12635. /*
  12636. * check all input parameters, grab the document
  12637. */
  12638. if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
  12639. return(XML_ERR_INTERNAL_ERROR);
  12640. switch (node->type) {
  12641. case XML_ELEMENT_NODE:
  12642. case XML_ATTRIBUTE_NODE:
  12643. case XML_TEXT_NODE:
  12644. case XML_CDATA_SECTION_NODE:
  12645. case XML_ENTITY_REF_NODE:
  12646. case XML_PI_NODE:
  12647. case XML_COMMENT_NODE:
  12648. case XML_DOCUMENT_NODE:
  12649. case XML_HTML_DOCUMENT_NODE:
  12650. break;
  12651. default:
  12652. return(XML_ERR_INTERNAL_ERROR);
  12653. }
  12654. while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
  12655. (node->type != XML_DOCUMENT_NODE) &&
  12656. (node->type != XML_HTML_DOCUMENT_NODE))
  12657. node = node->parent;
  12658. if (node == NULL)
  12659. return(XML_ERR_INTERNAL_ERROR);
  12660. if (node->type == XML_ELEMENT_NODE)
  12661. doc = node->doc;
  12662. else
  12663. doc = (xmlDocPtr) node;
  12664. if (doc == NULL)
  12665. return(XML_ERR_INTERNAL_ERROR);
  12666. /*
  12667. * allocate a context and set-up everything not related to the
  12668. * node position in the tree
  12669. */
  12670. if (doc->type == XML_DOCUMENT_NODE)
  12671. ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
  12672. #ifdef LIBXML_HTML_ENABLED
  12673. else if (doc->type == XML_HTML_DOCUMENT_NODE) {
  12674. ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
  12675. /*
  12676. * When parsing in context, it makes no sense to add implied
  12677. * elements like html/body/etc...
  12678. */
  12679. options |= HTML_PARSE_NOIMPLIED;
  12680. }
  12681. #endif
  12682. else
  12683. return(XML_ERR_INTERNAL_ERROR);
  12684. if (ctxt == NULL)
  12685. return(XML_ERR_NO_MEMORY);
  12686. /*
  12687. * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
  12688. * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
  12689. * we must wait until the last moment to free the original one.
  12690. */
  12691. if (doc->dict != NULL) {
  12692. if (ctxt->dict != NULL)
  12693. xmlDictFree(ctxt->dict);
  12694. ctxt->dict = doc->dict;
  12695. } else
  12696. options |= XML_PARSE_NODICT;
  12697. if (doc->encoding != NULL) {
  12698. xmlCharEncodingHandlerPtr hdlr;
  12699. if (ctxt->encoding != NULL)
  12700. xmlFree((xmlChar *) ctxt->encoding);
  12701. ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
  12702. hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
  12703. if (hdlr != NULL) {
  12704. xmlSwitchToEncoding(ctxt, hdlr);
  12705. } else {
  12706. return(XML_ERR_UNSUPPORTED_ENCODING);
  12707. }
  12708. }
  12709. xmlCtxtUseOptionsInternal(ctxt, options, NULL);
  12710. xmlDetectSAX2(ctxt);
  12711. ctxt->myDoc = doc;
  12712. /* parsing in context, i.e. as within existing content */
  12713. ctxt->input_id = 2;
  12714. ctxt->instate = XML_PARSER_CONTENT;
  12715. fake = xmlNewComment(NULL);
  12716. if (fake == NULL) {
  12717. xmlFreeParserCtxt(ctxt);
  12718. return(XML_ERR_NO_MEMORY);
  12719. }
  12720. xmlAddChild(node, fake);
  12721. if (node->type == XML_ELEMENT_NODE) {
  12722. nodePush(ctxt, node);
  12723. /*
  12724. * initialize the SAX2 namespaces stack
  12725. */
  12726. cur = node;
  12727. while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
  12728. xmlNsPtr ns = cur->nsDef;
  12729. const xmlChar *iprefix, *ihref;
  12730. while (ns != NULL) {
  12731. if (ctxt->dict) {
  12732. iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
  12733. ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
  12734. } else {
  12735. iprefix = ns->prefix;
  12736. ihref = ns->href;
  12737. }
  12738. if (xmlGetNamespace(ctxt, iprefix) == NULL) {
  12739. nsPush(ctxt, iprefix, ihref);
  12740. nsnr++;
  12741. }
  12742. ns = ns->next;
  12743. }
  12744. cur = cur->parent;
  12745. }
  12746. }
  12747. if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
  12748. /*
  12749. * ID/IDREF registration will be done in xmlValidateElement below
  12750. */
  12751. ctxt->loadsubset |= XML_SKIP_IDS;
  12752. }
  12753. #ifdef LIBXML_HTML_ENABLED
  12754. if (doc->type == XML_HTML_DOCUMENT_NODE)
  12755. __htmlParseContent(ctxt);
  12756. else
  12757. #endif
  12758. xmlParseContent(ctxt);
  12759. nsPop(ctxt, nsnr);
  12760. if ((RAW == '<') && (NXT(1) == '/')) {
  12761. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12762. } else if (RAW != 0) {
  12763. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  12764. }
  12765. if ((ctxt->node != NULL) && (ctxt->node != node)) {
  12766. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12767. ctxt->wellFormed = 0;
  12768. }
  12769. if (!ctxt->wellFormed) {
  12770. if (ctxt->errNo == 0)
  12771. ret = XML_ERR_INTERNAL_ERROR;
  12772. else
  12773. ret = (xmlParserErrors)ctxt->errNo;
  12774. } else {
  12775. ret = XML_ERR_OK;
  12776. }
  12777. /*
  12778. * Return the newly created nodeset after unlinking it from
  12779. * the pseudo sibling.
  12780. */
  12781. cur = fake->next;
  12782. fake->next = NULL;
  12783. node->last = fake;
  12784. if (cur != NULL) {
  12785. cur->prev = NULL;
  12786. }
  12787. *lst = cur;
  12788. while (cur != NULL) {
  12789. cur->parent = NULL;
  12790. cur = cur->next;
  12791. }
  12792. xmlUnlinkNode(fake);
  12793. xmlFreeNode(fake);
  12794. if (ret != XML_ERR_OK) {
  12795. xmlFreeNodeList(*lst);
  12796. *lst = NULL;
  12797. }
  12798. if (doc->dict != NULL)
  12799. ctxt->dict = NULL;
  12800. xmlFreeParserCtxt(ctxt);
  12801. return(ret);
  12802. #else /* !SAX2 */
  12803. return(XML_ERR_INTERNAL_ERROR);
  12804. #endif
  12805. }
  12806. #ifdef LIBXML_SAX1_ENABLED
  12807. /**
  12808. * xmlParseBalancedChunkMemoryRecover:
  12809. * @doc: the document the chunk pertains to (must not be NULL)
  12810. * @sax: the SAX handler block (possibly NULL)
  12811. * @user_data: The user data returned on SAX callbacks (possibly NULL)
  12812. * @depth: Used for loop detection, use 0
  12813. * @string: the input string in UTF8 or ISO-Latin (zero terminated)
  12814. * @lst: the return value for the set of parsed nodes
  12815. * @recover: return nodes even if the data is broken (use 0)
  12816. *
  12817. *
  12818. * Parse a well-balanced chunk of an XML document
  12819. * called by the parser
  12820. * The allowed sequence for the Well Balanced Chunk is the one defined by
  12821. * the content production in the XML grammar:
  12822. *
  12823. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  12824. *
  12825. * Returns 0 if the chunk is well balanced, -1 in case of args problem and
  12826. * the parser error code otherwise
  12827. *
  12828. * In case recover is set to 1, the nodelist will not be empty even if
  12829. * the parsed chunk is not well balanced, assuming the parsing succeeded to
  12830. * some extent.
  12831. */
  12832. int
  12833. xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
  12834. void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
  12835. int recover) {
  12836. xmlParserCtxtPtr ctxt;
  12837. xmlDocPtr newDoc;
  12838. xmlSAXHandlerPtr oldsax = NULL;
  12839. xmlNodePtr content, newRoot;
  12840. int size;
  12841. int ret = 0;
  12842. if (depth > 40) {
  12843. return(XML_ERR_ENTITY_LOOP);
  12844. }
  12845. if (lst != NULL)
  12846. *lst = NULL;
  12847. if (string == NULL)
  12848. return(-1);
  12849. size = xmlStrlen(string);
  12850. ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
  12851. if (ctxt == NULL) return(-1);
  12852. ctxt->userData = ctxt;
  12853. if (sax != NULL) {
  12854. oldsax = ctxt->sax;
  12855. ctxt->sax = sax;
  12856. if (user_data != NULL)
  12857. ctxt->userData = user_data;
  12858. }
  12859. newDoc = xmlNewDoc(BAD_CAST "1.0");
  12860. if (newDoc == NULL) {
  12861. xmlFreeParserCtxt(ctxt);
  12862. return(-1);
  12863. }
  12864. newDoc->properties = XML_DOC_INTERNAL;
  12865. if ((doc != NULL) && (doc->dict != NULL)) {
  12866. xmlDictFree(ctxt->dict);
  12867. ctxt->dict = doc->dict;
  12868. xmlDictReference(ctxt->dict);
  12869. ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
  12870. ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
  12871. ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
  12872. ctxt->dictNames = 1;
  12873. } else {
  12874. xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
  12875. }
  12876. /* doc == NULL is only supported for historic reasons */
  12877. if (doc != NULL) {
  12878. newDoc->intSubset = doc->intSubset;
  12879. newDoc->extSubset = doc->extSubset;
  12880. }
  12881. newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
  12882. if (newRoot == NULL) {
  12883. if (sax != NULL)
  12884. ctxt->sax = oldsax;
  12885. xmlFreeParserCtxt(ctxt);
  12886. newDoc->intSubset = NULL;
  12887. newDoc->extSubset = NULL;
  12888. xmlFreeDoc(newDoc);
  12889. return(-1);
  12890. }
  12891. xmlAddChild((xmlNodePtr) newDoc, newRoot);
  12892. nodePush(ctxt, newRoot);
  12893. /* doc == NULL is only supported for historic reasons */
  12894. if (doc == NULL) {
  12895. ctxt->myDoc = newDoc;
  12896. } else {
  12897. ctxt->myDoc = newDoc;
  12898. newDoc->children->doc = doc;
  12899. /* Ensure that doc has XML spec namespace */
  12900. xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
  12901. newDoc->oldNs = doc->oldNs;
  12902. }
  12903. ctxt->instate = XML_PARSER_CONTENT;
  12904. ctxt->input_id = 2;
  12905. ctxt->depth = depth;
  12906. /*
  12907. * Doing validity checking on chunk doesn't make sense
  12908. */
  12909. ctxt->validate = 0;
  12910. ctxt->loadsubset = 0;
  12911. xmlDetectSAX2(ctxt);
  12912. if ( doc != NULL ){
  12913. content = doc->children;
  12914. doc->children = NULL;
  12915. xmlParseContent(ctxt);
  12916. doc->children = content;
  12917. }
  12918. else {
  12919. xmlParseContent(ctxt);
  12920. }
  12921. if ((RAW == '<') && (NXT(1) == '/')) {
  12922. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12923. } else if (RAW != 0) {
  12924. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  12925. }
  12926. if (ctxt->node != newDoc->children) {
  12927. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12928. }
  12929. if (!ctxt->wellFormed) {
  12930. if (ctxt->errNo == 0)
  12931. ret = 1;
  12932. else
  12933. ret = ctxt->errNo;
  12934. } else {
  12935. ret = 0;
  12936. }
  12937. if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
  12938. xmlNodePtr cur;
  12939. /*
  12940. * Return the newly created nodeset after unlinking it from
  12941. * they pseudo parent.
  12942. */
  12943. cur = newDoc->children->children;
  12944. *lst = cur;
  12945. while (cur != NULL) {
  12946. xmlSetTreeDoc(cur, doc);
  12947. cur->parent = NULL;
  12948. cur = cur->next;
  12949. }
  12950. newDoc->children->children = NULL;
  12951. }
  12952. if (sax != NULL)
  12953. ctxt->sax = oldsax;
  12954. xmlFreeParserCtxt(ctxt);
  12955. newDoc->intSubset = NULL;
  12956. newDoc->extSubset = NULL;
  12957. /* This leaks the namespace list if doc == NULL */
  12958. newDoc->oldNs = NULL;
  12959. xmlFreeDoc(newDoc);
  12960. return(ret);
  12961. }
  12962. /**
  12963. * xmlSAXParseEntity:
  12964. * @sax: the SAX handler block
  12965. * @filename: the filename
  12966. *
  12967. * parse an XML external entity out of context and build a tree.
  12968. * It use the given SAX function block to handle the parsing callback.
  12969. * If sax is NULL, fallback to the default DOM tree building routines.
  12970. *
  12971. * [78] extParsedEnt ::= TextDecl? content
  12972. *
  12973. * This correspond to a "Well Balanced" chunk
  12974. *
  12975. * Returns the resulting document tree
  12976. */
  12977. xmlDocPtr
  12978. xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
  12979. xmlDocPtr ret;
  12980. xmlParserCtxtPtr ctxt;
  12981. ctxt = xmlCreateFileParserCtxt(filename);
  12982. if (ctxt == NULL) {
  12983. return(NULL);
  12984. }
  12985. if (sax != NULL) {
  12986. if (ctxt->sax != NULL)
  12987. xmlFree(ctxt->sax);
  12988. ctxt->sax = sax;
  12989. ctxt->userData = NULL;
  12990. }
  12991. xmlParseExtParsedEnt(ctxt);
  12992. if (ctxt->wellFormed)
  12993. ret = ctxt->myDoc;
  12994. else {
  12995. ret = NULL;
  12996. xmlFreeDoc(ctxt->myDoc);
  12997. ctxt->myDoc = NULL;
  12998. }
  12999. if (sax != NULL)
  13000. ctxt->sax = NULL;
  13001. xmlFreeParserCtxt(ctxt);
  13002. return(ret);
  13003. }
  13004. /**
  13005. * xmlParseEntity:
  13006. * @filename: the filename
  13007. *
  13008. * parse an XML external entity out of context and build a tree.
  13009. *
  13010. * [78] extParsedEnt ::= TextDecl? content
  13011. *
  13012. * This correspond to a "Well Balanced" chunk
  13013. *
  13014. * Returns the resulting document tree
  13015. */
  13016. xmlDocPtr
  13017. xmlParseEntity(const char *filename) {
  13018. return(xmlSAXParseEntity(NULL, filename));
  13019. }
  13020. #endif /* LIBXML_SAX1_ENABLED */
  13021. /**
  13022. * xmlCreateEntityParserCtxtInternal:
  13023. * @URL: the entity URL
  13024. * @ID: the entity PUBLIC ID
  13025. * @base: a possible base for the target URI
  13026. * @pctx: parser context used to set options on new context
  13027. *
  13028. * Create a parser context for an external entity
  13029. * Automatic support for ZLIB/Compress compressed document is provided
  13030. * by default if found at compile-time.
  13031. *
  13032. * Returns the new parser context or NULL
  13033. */
  13034. static xmlParserCtxtPtr
  13035. xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
  13036. const xmlChar *base, xmlParserCtxtPtr pctx) {
  13037. xmlParserCtxtPtr ctxt;
  13038. xmlParserInputPtr inputStream;
  13039. char *directory = NULL;
  13040. xmlChar *uri;
  13041. ctxt = xmlNewParserCtxt();
  13042. if (ctxt == NULL) {
  13043. return(NULL);
  13044. }
  13045. if (pctx != NULL) {
  13046. ctxt->options = pctx->options;
  13047. ctxt->_private = pctx->_private;
  13048. /*
  13049. * this is a subparser of pctx, so the input_id should be
  13050. * incremented to distinguish from main entity
  13051. */
  13052. ctxt->input_id = pctx->input_id + 1;
  13053. }
  13054. /* Don't read from stdin. */
  13055. if (xmlStrcmp(URL, BAD_CAST "-") == 0)
  13056. URL = BAD_CAST "./-";
  13057. uri = xmlBuildURI(URL, base);
  13058. if (uri == NULL) {
  13059. inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
  13060. if (inputStream == NULL) {
  13061. xmlFreeParserCtxt(ctxt);
  13062. return(NULL);
  13063. }
  13064. inputPush(ctxt, inputStream);
  13065. if ((ctxt->directory == NULL) && (directory == NULL))
  13066. directory = xmlParserGetDirectory((char *)URL);
  13067. if ((ctxt->directory == NULL) && (directory != NULL))
  13068. ctxt->directory = directory;
  13069. } else {
  13070. inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
  13071. if (inputStream == NULL) {
  13072. xmlFree(uri);
  13073. xmlFreeParserCtxt(ctxt);
  13074. return(NULL);
  13075. }
  13076. inputPush(ctxt, inputStream);
  13077. if ((ctxt->directory == NULL) && (directory == NULL))
  13078. directory = xmlParserGetDirectory((char *)uri);
  13079. if ((ctxt->directory == NULL) && (directory != NULL))
  13080. ctxt->directory = directory;
  13081. xmlFree(uri);
  13082. }
  13083. return(ctxt);
  13084. }
  13085. /**
  13086. * xmlCreateEntityParserCtxt:
  13087. * @URL: the entity URL
  13088. * @ID: the entity PUBLIC ID
  13089. * @base: a possible base for the target URI
  13090. *
  13091. * Create a parser context for an external entity
  13092. * Automatic support for ZLIB/Compress compressed document is provided
  13093. * by default if found at compile-time.
  13094. *
  13095. * Returns the new parser context or NULL
  13096. */
  13097. xmlParserCtxtPtr
  13098. xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
  13099. const xmlChar *base) {
  13100. return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
  13101. }
  13102. /************************************************************************
  13103. * *
  13104. * Front ends when parsing from a file *
  13105. * *
  13106. ************************************************************************/
  13107. /**
  13108. * xmlCreateURLParserCtxt:
  13109. * @filename: the filename or URL
  13110. * @options: a combination of xmlParserOption
  13111. *
  13112. * Create a parser context for a file or URL content.
  13113. * Automatic support for ZLIB/Compress compressed document is provided
  13114. * by default if found at compile-time and for file accesses
  13115. *
  13116. * Returns the new parser context or NULL
  13117. */
  13118. xmlParserCtxtPtr
  13119. xmlCreateURLParserCtxt(const char *filename, int options)
  13120. {
  13121. xmlParserCtxtPtr ctxt;
  13122. xmlParserInputPtr inputStream;
  13123. char *directory = NULL;
  13124. ctxt = xmlNewParserCtxt();
  13125. if (ctxt == NULL) {
  13126. xmlErrMemory(NULL, "cannot allocate parser context");
  13127. return(NULL);
  13128. }
  13129. if (options)
  13130. xmlCtxtUseOptionsInternal(ctxt, options, NULL);
  13131. ctxt->linenumbers = 1;
  13132. inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
  13133. if (inputStream == NULL) {
  13134. xmlFreeParserCtxt(ctxt);
  13135. return(NULL);
  13136. }
  13137. inputPush(ctxt, inputStream);
  13138. if ((ctxt->directory == NULL) && (directory == NULL))
  13139. directory = xmlParserGetDirectory(filename);
  13140. if ((ctxt->directory == NULL) && (directory != NULL))
  13141. ctxt->directory = directory;
  13142. return(ctxt);
  13143. }
  13144. /**
  13145. * xmlCreateFileParserCtxt:
  13146. * @filename: the filename
  13147. *
  13148. * Create a parser context for a file content.
  13149. * Automatic support for ZLIB/Compress compressed document is provided
  13150. * by default if found at compile-time.
  13151. *
  13152. * Returns the new parser context or NULL
  13153. */
  13154. xmlParserCtxtPtr
  13155. xmlCreateFileParserCtxt(const char *filename)
  13156. {
  13157. return(xmlCreateURLParserCtxt(filename, 0));
  13158. }
  13159. #ifdef LIBXML_SAX1_ENABLED
  13160. /**
  13161. * xmlSAXParseFileWithData:
  13162. * @sax: the SAX handler block
  13163. * @filename: the filename
  13164. * @recovery: work in recovery mode, i.e. tries to read no Well Formed
  13165. * documents
  13166. * @data: the userdata
  13167. *
  13168. * parse an XML file and build a tree. Automatic support for ZLIB/Compress
  13169. * compressed document is provided by default if found at compile-time.
  13170. * It use the given SAX function block to handle the parsing callback.
  13171. * If sax is NULL, fallback to the default DOM tree building routines.
  13172. *
  13173. * User data (void *) is stored within the parser context in the
  13174. * context's _private member, so it is available nearly everywhere in libxml
  13175. *
  13176. * Returns the resulting document tree
  13177. */
  13178. xmlDocPtr
  13179. xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
  13180. int recovery, void *data) {
  13181. xmlDocPtr ret;
  13182. xmlParserCtxtPtr ctxt;
  13183. xmlInitParser();
  13184. ctxt = xmlCreateFileParserCtxt(filename);
  13185. if (ctxt == NULL) {
  13186. return(NULL);
  13187. }
  13188. if (sax != NULL) {
  13189. if (ctxt->sax != NULL)
  13190. xmlFree(ctxt->sax);
  13191. ctxt->sax = sax;
  13192. }
  13193. xmlDetectSAX2(ctxt);
  13194. if (data!=NULL) {
  13195. ctxt->_private = data;
  13196. }
  13197. if (ctxt->directory == NULL)
  13198. ctxt->directory = xmlParserGetDirectory(filename);
  13199. ctxt->recovery = recovery;
  13200. xmlParseDocument(ctxt);
  13201. if ((ctxt->wellFormed) || recovery) {
  13202. ret = ctxt->myDoc;
  13203. if ((ret != NULL) && (ctxt->input->buf != NULL)) {
  13204. if (ctxt->input->buf->compressed > 0)
  13205. ret->compression = 9;
  13206. else
  13207. ret->compression = ctxt->input->buf->compressed;
  13208. }
  13209. }
  13210. else {
  13211. ret = NULL;
  13212. xmlFreeDoc(ctxt->myDoc);
  13213. ctxt->myDoc = NULL;
  13214. }
  13215. if (sax != NULL)
  13216. ctxt->sax = NULL;
  13217. xmlFreeParserCtxt(ctxt);
  13218. return(ret);
  13219. }
  13220. /**
  13221. * xmlSAXParseFile:
  13222. * @sax: the SAX handler block
  13223. * @filename: the filename
  13224. * @recovery: work in recovery mode, i.e. tries to read no Well Formed
  13225. * documents
  13226. *
  13227. * parse an XML file and build a tree. Automatic support for ZLIB/Compress
  13228. * compressed document is provided by default if found at compile-time.
  13229. * It use the given SAX function block to handle the parsing callback.
  13230. * If sax is NULL, fallback to the default DOM tree building routines.
  13231. *
  13232. * Returns the resulting document tree
  13233. */
  13234. xmlDocPtr
  13235. xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
  13236. int recovery) {
  13237. return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
  13238. }
  13239. /**
  13240. * xmlRecoverDoc:
  13241. * @cur: a pointer to an array of xmlChar
  13242. *
  13243. * parse an XML in-memory document and build a tree.
  13244. * In the case the document is not Well Formed, a attempt to build a
  13245. * tree is tried anyway
  13246. *
  13247. * Returns the resulting document tree or NULL in case of failure
  13248. */
  13249. xmlDocPtr
  13250. xmlRecoverDoc(const xmlChar *cur) {
  13251. return(xmlSAXParseDoc(NULL, cur, 1));
  13252. }
  13253. /**
  13254. * xmlParseFile:
  13255. * @filename: the filename
  13256. *
  13257. * parse an XML file and build a tree. Automatic support for ZLIB/Compress
  13258. * compressed document is provided by default if found at compile-time.
  13259. *
  13260. * Returns the resulting document tree if the file was wellformed,
  13261. * NULL otherwise.
  13262. */
  13263. xmlDocPtr
  13264. xmlParseFile(const char *filename) {
  13265. return(xmlSAXParseFile(NULL, filename, 0));
  13266. }
  13267. /**
  13268. * xmlRecoverFile:
  13269. * @filename: the filename
  13270. *
  13271. * parse an XML file and build a tree. Automatic support for ZLIB/Compress
  13272. * compressed document is provided by default if found at compile-time.
  13273. * In the case the document is not Well Formed, it attempts to build
  13274. * a tree anyway
  13275. *
  13276. * Returns the resulting document tree or NULL in case of failure
  13277. */
  13278. xmlDocPtr
  13279. xmlRecoverFile(const char *filename) {
  13280. return(xmlSAXParseFile(NULL, filename, 1));
  13281. }
  13282. /**
  13283. * xmlSetupParserForBuffer:
  13284. * @ctxt: an XML parser context
  13285. * @buffer: a xmlChar * buffer
  13286. * @filename: a file name
  13287. *
  13288. * Setup the parser context to parse a new buffer; Clears any prior
  13289. * contents from the parser context. The buffer parameter must not be
  13290. * NULL, but the filename parameter can be
  13291. */
  13292. void
  13293. xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
  13294. const char* filename)
  13295. {
  13296. xmlParserInputPtr input;
  13297. if ((ctxt == NULL) || (buffer == NULL))
  13298. return;
  13299. input = xmlNewInputStream(ctxt);
  13300. if (input == NULL) {
  13301. xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
  13302. xmlClearParserCtxt(ctxt);
  13303. return;
  13304. }
  13305. xmlClearParserCtxt(ctxt);
  13306. if (filename != NULL)
  13307. input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
  13308. input->base = buffer;
  13309. input->cur = buffer;
  13310. input->end = &buffer[xmlStrlen(buffer)];
  13311. inputPush(ctxt, input);
  13312. }
  13313. /**
  13314. * xmlSAXUserParseFile:
  13315. * @sax: a SAX handler
  13316. * @user_data: The user data returned on SAX callbacks
  13317. * @filename: a file name
  13318. *
  13319. * parse an XML file and call the given SAX handler routines.
  13320. * Automatic support for ZLIB/Compress compressed document is provided
  13321. *
  13322. * Returns 0 in case of success or a error number otherwise
  13323. */
  13324. int
  13325. xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
  13326. const char *filename) {
  13327. int ret = 0;
  13328. xmlParserCtxtPtr ctxt;
  13329. ctxt = xmlCreateFileParserCtxt(filename);
  13330. if (ctxt == NULL) return -1;
  13331. if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
  13332. xmlFree(ctxt->sax);
  13333. ctxt->sax = sax;
  13334. xmlDetectSAX2(ctxt);
  13335. if (user_data != NULL)
  13336. ctxt->userData = user_data;
  13337. xmlParseDocument(ctxt);
  13338. if (ctxt->wellFormed)
  13339. ret = 0;
  13340. else {
  13341. if (ctxt->errNo != 0)
  13342. ret = ctxt->errNo;
  13343. else
  13344. ret = -1;
  13345. }
  13346. if (sax != NULL)
  13347. ctxt->sax = NULL;
  13348. if (ctxt->myDoc != NULL) {
  13349. xmlFreeDoc(ctxt->myDoc);
  13350. ctxt->myDoc = NULL;
  13351. }
  13352. xmlFreeParserCtxt(ctxt);
  13353. return ret;
  13354. }
  13355. #endif /* LIBXML_SAX1_ENABLED */
  13356. /************************************************************************
  13357. * *
  13358. * Front ends when parsing from memory *
  13359. * *
  13360. ************************************************************************/
  13361. /**
  13362. * xmlCreateMemoryParserCtxt:
  13363. * @buffer: a pointer to a char array
  13364. * @size: the size of the array
  13365. *
  13366. * Create a parser context for an XML in-memory document.
  13367. *
  13368. * Returns the new parser context or NULL
  13369. */
  13370. xmlParserCtxtPtr
  13371. xmlCreateMemoryParserCtxt(const char *buffer, int size) {
  13372. xmlParserCtxtPtr ctxt;
  13373. xmlParserInputPtr input;
  13374. xmlParserInputBufferPtr buf;
  13375. if (buffer == NULL)
  13376. return(NULL);
  13377. if (size <= 0)
  13378. return(NULL);
  13379. ctxt = xmlNewParserCtxt();
  13380. if (ctxt == NULL)
  13381. return(NULL);
  13382. /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
  13383. buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
  13384. if (buf == NULL) {
  13385. xmlFreeParserCtxt(ctxt);
  13386. return(NULL);
  13387. }
  13388. input = xmlNewInputStream(ctxt);
  13389. if (input == NULL) {
  13390. xmlFreeParserInputBuffer(buf);
  13391. xmlFreeParserCtxt(ctxt);
  13392. return(NULL);
  13393. }
  13394. input->filename = NULL;
  13395. input->buf = buf;
  13396. xmlBufResetInput(input->buf->buffer, input);
  13397. inputPush(ctxt, input);
  13398. return(ctxt);
  13399. }
  13400. #ifdef LIBXML_SAX1_ENABLED
  13401. /**
  13402. * xmlSAXParseMemoryWithData:
  13403. * @sax: the SAX handler block
  13404. * @buffer: an pointer to a char array
  13405. * @size: the size of the array
  13406. * @recovery: work in recovery mode, i.e. tries to read no Well Formed
  13407. * documents
  13408. * @data: the userdata
  13409. *
  13410. * parse an XML in-memory block and use the given SAX function block
  13411. * to handle the parsing callback. If sax is NULL, fallback to the default
  13412. * DOM tree building routines.
  13413. *
  13414. * User data (void *) is stored within the parser context in the
  13415. * context's _private member, so it is available nearly everywhere in libxml
  13416. *
  13417. * Returns the resulting document tree
  13418. */
  13419. xmlDocPtr
  13420. xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
  13421. int size, int recovery, void *data) {
  13422. xmlDocPtr ret;
  13423. xmlParserCtxtPtr ctxt;
  13424. xmlInitParser();
  13425. ctxt = xmlCreateMemoryParserCtxt(buffer, size);
  13426. if (ctxt == NULL) return(NULL);
  13427. if (sax != NULL) {
  13428. if (ctxt->sax != NULL)
  13429. xmlFree(ctxt->sax);
  13430. ctxt->sax = sax;
  13431. }
  13432. xmlDetectSAX2(ctxt);
  13433. if (data!=NULL) {
  13434. ctxt->_private=data;
  13435. }
  13436. ctxt->recovery = recovery;
  13437. xmlParseDocument(ctxt);
  13438. if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
  13439. else {
  13440. ret = NULL;
  13441. xmlFreeDoc(ctxt->myDoc);
  13442. ctxt->myDoc = NULL;
  13443. }
  13444. if (sax != NULL)
  13445. ctxt->sax = NULL;
  13446. xmlFreeParserCtxt(ctxt);
  13447. return(ret);
  13448. }
  13449. /**
  13450. * xmlSAXParseMemory:
  13451. * @sax: the SAX handler block
  13452. * @buffer: an pointer to a char array
  13453. * @size: the size of the array
  13454. * @recovery: work in recovery mode, i.e. tries to read not Well Formed
  13455. * documents
  13456. *
  13457. * parse an XML in-memory block and use the given SAX function block
  13458. * to handle the parsing callback. If sax is NULL, fallback to the default
  13459. * DOM tree building routines.
  13460. *
  13461. * Returns the resulting document tree
  13462. */
  13463. xmlDocPtr
  13464. xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
  13465. int size, int recovery) {
  13466. return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
  13467. }
  13468. /**
  13469. * xmlParseMemory:
  13470. * @buffer: an pointer to a char array
  13471. * @size: the size of the array
  13472. *
  13473. * parse an XML in-memory block and build a tree.
  13474. *
  13475. * Returns the resulting document tree
  13476. */
  13477. xmlDocPtr xmlParseMemory(const char *buffer, int size) {
  13478. return(xmlSAXParseMemory(NULL, buffer, size, 0));
  13479. }
  13480. /**
  13481. * xmlRecoverMemory:
  13482. * @buffer: an pointer to a char array
  13483. * @size: the size of the array
  13484. *
  13485. * parse an XML in-memory block and build a tree.
  13486. * In the case the document is not Well Formed, an attempt to
  13487. * build a tree is tried anyway
  13488. *
  13489. * Returns the resulting document tree or NULL in case of error
  13490. */
  13491. xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
  13492. return(xmlSAXParseMemory(NULL, buffer, size, 1));
  13493. }
  13494. /**
  13495. * xmlSAXUserParseMemory:
  13496. * @sax: a SAX handler
  13497. * @user_data: The user data returned on SAX callbacks
  13498. * @buffer: an in-memory XML document input
  13499. * @size: the length of the XML document in bytes
  13500. *
  13501. * A better SAX parsing routine.
  13502. * parse an XML in-memory buffer and call the given SAX handler routines.
  13503. *
  13504. * Returns 0 in case of success or a error number otherwise
  13505. */
  13506. int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
  13507. const char *buffer, int size) {
  13508. int ret = 0;
  13509. xmlParserCtxtPtr ctxt;
  13510. xmlInitParser();
  13511. ctxt = xmlCreateMemoryParserCtxt(buffer, size);
  13512. if (ctxt == NULL) return -1;
  13513. if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
  13514. xmlFree(ctxt->sax);
  13515. ctxt->sax = sax;
  13516. xmlDetectSAX2(ctxt);
  13517. if (user_data != NULL)
  13518. ctxt->userData = user_data;
  13519. xmlParseDocument(ctxt);
  13520. if (ctxt->wellFormed)
  13521. ret = 0;
  13522. else {
  13523. if (ctxt->errNo != 0)
  13524. ret = ctxt->errNo;
  13525. else
  13526. ret = -1;
  13527. }
  13528. if (sax != NULL)
  13529. ctxt->sax = NULL;
  13530. if (ctxt->myDoc != NULL) {
  13531. xmlFreeDoc(ctxt->myDoc);
  13532. ctxt->myDoc = NULL;
  13533. }
  13534. xmlFreeParserCtxt(ctxt);
  13535. return ret;
  13536. }
  13537. #endif /* LIBXML_SAX1_ENABLED */
  13538. /**
  13539. * xmlCreateDocParserCtxt:
  13540. * @cur: a pointer to an array of xmlChar
  13541. *
  13542. * Creates a parser context for an XML in-memory document.
  13543. *
  13544. * Returns the new parser context or NULL
  13545. */
  13546. xmlParserCtxtPtr
  13547. xmlCreateDocParserCtxt(const xmlChar *cur) {
  13548. int len;
  13549. if (cur == NULL)
  13550. return(NULL);
  13551. len = xmlStrlen(cur);
  13552. return(xmlCreateMemoryParserCtxt((const char *)cur, len));
  13553. }
  13554. #ifdef LIBXML_SAX1_ENABLED
  13555. /**
  13556. * xmlSAXParseDoc:
  13557. * @sax: the SAX handler block
  13558. * @cur: a pointer to an array of xmlChar
  13559. * @recovery: work in recovery mode, i.e. tries to read no Well Formed
  13560. * documents
  13561. *
  13562. * parse an XML in-memory document and build a tree.
  13563. * It use the given SAX function block to handle the parsing callback.
  13564. * If sax is NULL, fallback to the default DOM tree building routines.
  13565. *
  13566. * Returns the resulting document tree
  13567. */
  13568. xmlDocPtr
  13569. xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
  13570. xmlDocPtr ret;
  13571. xmlParserCtxtPtr ctxt;
  13572. xmlSAXHandlerPtr oldsax = NULL;
  13573. if (cur == NULL) return(NULL);
  13574. ctxt = xmlCreateDocParserCtxt(cur);
  13575. if (ctxt == NULL) return(NULL);
  13576. if (sax != NULL) {
  13577. oldsax = ctxt->sax;
  13578. ctxt->sax = sax;
  13579. ctxt->userData = NULL;
  13580. }
  13581. xmlDetectSAX2(ctxt);
  13582. xmlParseDocument(ctxt);
  13583. if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
  13584. else {
  13585. ret = NULL;
  13586. xmlFreeDoc(ctxt->myDoc);
  13587. ctxt->myDoc = NULL;
  13588. }
  13589. if (sax != NULL)
  13590. ctxt->sax = oldsax;
  13591. xmlFreeParserCtxt(ctxt);
  13592. return(ret);
  13593. }
  13594. /**
  13595. * xmlParseDoc:
  13596. * @cur: a pointer to an array of xmlChar
  13597. *
  13598. * parse an XML in-memory document and build a tree.
  13599. *
  13600. * Returns the resulting document tree
  13601. */
  13602. xmlDocPtr
  13603. xmlParseDoc(const xmlChar *cur) {
  13604. return(xmlSAXParseDoc(NULL, cur, 0));
  13605. }
  13606. #endif /* LIBXML_SAX1_ENABLED */
  13607. #ifdef LIBXML_LEGACY_ENABLED
  13608. /************************************************************************
  13609. * *
  13610. * Specific function to keep track of entities references *
  13611. * and used by the XSLT debugger *
  13612. * *
  13613. ************************************************************************/
  13614. static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
  13615. /**
  13616. * xmlAddEntityReference:
  13617. * @ent : A valid entity
  13618. * @firstNode : A valid first node for children of entity
  13619. * @lastNode : A valid last node of children entity
  13620. *
  13621. * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
  13622. */
  13623. static void
  13624. xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
  13625. xmlNodePtr lastNode)
  13626. {
  13627. if (xmlEntityRefFunc != NULL) {
  13628. (*xmlEntityRefFunc) (ent, firstNode, lastNode);
  13629. }
  13630. }
  13631. /**
  13632. * xmlSetEntityReferenceFunc:
  13633. * @func: A valid function
  13634. *
  13635. * Set the function to call call back when a xml reference has been made
  13636. */
  13637. void
  13638. xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
  13639. {
  13640. xmlEntityRefFunc = func;
  13641. }
  13642. #endif /* LIBXML_LEGACY_ENABLED */
  13643. /************************************************************************
  13644. * *
  13645. * Miscellaneous *
  13646. * *
  13647. ************************************************************************/
  13648. #ifdef LIBXML_XPATH_ENABLED
  13649. #include <libxml/xpath.h>
  13650. #endif
  13651. extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
  13652. static int xmlParserInitialized = 0;
  13653. /**
  13654. * xmlInitParser:
  13655. *
  13656. * Initialization function for the XML parser.
  13657. * This is not reentrant. Call once before processing in case of
  13658. * use in multithreaded programs.
  13659. */
  13660. void
  13661. xmlInitParser(void) {
  13662. if (xmlParserInitialized != 0)
  13663. return;
  13664. #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
  13665. if (xmlFree == free)
  13666. atexit(xmlCleanupParser);
  13667. #endif
  13668. #ifdef LIBXML_THREAD_ENABLED
  13669. __xmlGlobalInitMutexLock();
  13670. if (xmlParserInitialized == 0) {
  13671. #endif
  13672. xmlInitThreads();
  13673. xmlInitGlobals();
  13674. if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
  13675. (xmlGenericError == NULL))
  13676. initGenericErrorDefaultFunc(NULL);
  13677. xmlInitMemory();
  13678. xmlInitializeDict();
  13679. xmlInitCharEncodingHandlers();
  13680. xmlDefaultSAXHandlerInit();
  13681. xmlRegisterDefaultInputCallbacks();
  13682. #ifdef LIBXML_OUTPUT_ENABLED
  13683. xmlRegisterDefaultOutputCallbacks();
  13684. #endif /* LIBXML_OUTPUT_ENABLED */
  13685. #ifdef LIBXML_HTML_ENABLED
  13686. htmlInitAutoClose();
  13687. htmlDefaultSAXHandlerInit();
  13688. #endif
  13689. #ifdef LIBXML_XPATH_ENABLED
  13690. xmlXPathInit();
  13691. #endif
  13692. xmlParserInitialized = 1;
  13693. #ifdef LIBXML_THREAD_ENABLED
  13694. }
  13695. __xmlGlobalInitMutexUnlock();
  13696. #endif
  13697. }
  13698. /**
  13699. * xmlCleanupParser:
  13700. *
  13701. * This function name is somewhat misleading. It does not clean up
  13702. * parser state, it cleans up memory allocated by the library itself.
  13703. * It is a cleanup function for the XML library. It tries to reclaim all
  13704. * related global memory allocated for the library processing.
  13705. * It doesn't deallocate any document related memory. One should
  13706. * call xmlCleanupParser() only when the process has finished using
  13707. * the library and all XML/HTML documents built with it.
  13708. * See also xmlInitParser() which has the opposite function of preparing
  13709. * the library for operations.
  13710. *
  13711. * WARNING: if your application is multithreaded or has plugin support
  13712. * calling this may crash the application if another thread or
  13713. * a plugin is still using libxml2. It's sometimes very hard to
  13714. * guess if libxml2 is in use in the application, some libraries
  13715. * or plugins may use it without notice. In case of doubt abstain
  13716. * from calling this function or do it just before calling exit()
  13717. * to avoid leak reports from valgrind !
  13718. */
  13719. void
  13720. xmlCleanupParser(void) {
  13721. if (!xmlParserInitialized)
  13722. return;
  13723. xmlCleanupCharEncodingHandlers();
  13724. #ifdef LIBXML_CATALOG_ENABLED
  13725. xmlCatalogCleanup();
  13726. #endif
  13727. xmlDictCleanup();
  13728. xmlCleanupInputCallbacks();
  13729. #ifdef LIBXML_OUTPUT_ENABLED
  13730. xmlCleanupOutputCallbacks();
  13731. #endif
  13732. #ifdef LIBXML_SCHEMAS_ENABLED
  13733. xmlSchemaCleanupTypes();
  13734. xmlRelaxNGCleanupTypes();
  13735. #endif
  13736. xmlResetLastError();
  13737. xmlCleanupGlobals();
  13738. xmlCleanupThreads(); /* must be last if called not from the main thread */
  13739. xmlCleanupMemory();
  13740. xmlParserInitialized = 0;
  13741. }
  13742. #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
  13743. !defined(_WIN32)
  13744. static void
  13745. ATTRIBUTE_DESTRUCTOR
  13746. xmlDestructor(void) {
  13747. /*
  13748. * Calling custom deallocation functions in a destructor can cause
  13749. * problems, for example with Nokogiri.
  13750. */
  13751. if (xmlFree == free)
  13752. xmlCleanupParser();
  13753. }
  13754. #endif
  13755. /************************************************************************
  13756. * *
  13757. * New set (2.6.0) of simpler and more flexible APIs *
  13758. * *
  13759. ************************************************************************/
  13760. /**
  13761. * DICT_FREE:
  13762. * @str: a string
  13763. *
  13764. * Free a string if it is not owned by the "dict" dictionary in the
  13765. * current scope
  13766. */
  13767. #define DICT_FREE(str) \
  13768. if ((str) && ((!dict) || \
  13769. (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
  13770. xmlFree((char *)(str));
  13771. /**
  13772. * xmlCtxtReset:
  13773. * @ctxt: an XML parser context
  13774. *
  13775. * Reset a parser context
  13776. */
  13777. void
  13778. xmlCtxtReset(xmlParserCtxtPtr ctxt)
  13779. {
  13780. xmlParserInputPtr input;
  13781. xmlDictPtr dict;
  13782. if (ctxt == NULL)
  13783. return;
  13784. dict = ctxt->dict;
  13785. while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
  13786. xmlFreeInputStream(input);
  13787. }
  13788. ctxt->inputNr = 0;
  13789. ctxt->input = NULL;
  13790. ctxt->spaceNr = 0;
  13791. if (ctxt->spaceTab != NULL) {
  13792. ctxt->spaceTab[0] = -1;
  13793. ctxt->space = &ctxt->spaceTab[0];
  13794. } else {
  13795. ctxt->space = NULL;
  13796. }
  13797. ctxt->nodeNr = 0;
  13798. ctxt->node = NULL;
  13799. ctxt->nameNr = 0;
  13800. ctxt->name = NULL;
  13801. DICT_FREE(ctxt->version);
  13802. ctxt->version = NULL;
  13803. DICT_FREE(ctxt->encoding);
  13804. ctxt->encoding = NULL;
  13805. DICT_FREE(ctxt->directory);
  13806. ctxt->directory = NULL;
  13807. DICT_FREE(ctxt->extSubURI);
  13808. ctxt->extSubURI = NULL;
  13809. DICT_FREE(ctxt->extSubSystem);
  13810. ctxt->extSubSystem = NULL;
  13811. if (ctxt->myDoc != NULL)
  13812. xmlFreeDoc(ctxt->myDoc);
  13813. ctxt->myDoc = NULL;
  13814. ctxt->standalone = -1;
  13815. ctxt->hasExternalSubset = 0;
  13816. ctxt->hasPErefs = 0;
  13817. ctxt->html = 0;
  13818. ctxt->external = 0;
  13819. ctxt->instate = XML_PARSER_START;
  13820. ctxt->token = 0;
  13821. ctxt->wellFormed = 1;
  13822. ctxt->nsWellFormed = 1;
  13823. ctxt->disableSAX = 0;
  13824. ctxt->valid = 1;
  13825. #if 0
  13826. ctxt->vctxt.userData = ctxt;
  13827. ctxt->vctxt.error = xmlParserValidityError;
  13828. ctxt->vctxt.warning = xmlParserValidityWarning;
  13829. #endif
  13830. ctxt->record_info = 0;
  13831. ctxt->checkIndex = 0;
  13832. ctxt->inSubset = 0;
  13833. ctxt->errNo = XML_ERR_OK;
  13834. ctxt->depth = 0;
  13835. ctxt->charset = XML_CHAR_ENCODING_UTF8;
  13836. ctxt->catalogs = NULL;
  13837. ctxt->nbentities = 0;
  13838. ctxt->sizeentities = 0;
  13839. ctxt->sizeentcopy = 0;
  13840. xmlInitNodeInfoSeq(&ctxt->node_seq);
  13841. if (ctxt->attsDefault != NULL) {
  13842. xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
  13843. ctxt->attsDefault = NULL;
  13844. }
  13845. if (ctxt->attsSpecial != NULL) {
  13846. xmlHashFree(ctxt->attsSpecial, NULL);
  13847. ctxt->attsSpecial = NULL;
  13848. }
  13849. #ifdef LIBXML_CATALOG_ENABLED
  13850. if (ctxt->catalogs != NULL)
  13851. xmlCatalogFreeLocal(ctxt->catalogs);
  13852. #endif
  13853. if (ctxt->lastError.code != XML_ERR_OK)
  13854. xmlResetError(&ctxt->lastError);
  13855. }
  13856. /**
  13857. * xmlCtxtResetPush:
  13858. * @ctxt: an XML parser context
  13859. * @chunk: a pointer to an array of chars
  13860. * @size: number of chars in the array
  13861. * @filename: an optional file name or URI
  13862. * @encoding: the document encoding, or NULL
  13863. *
  13864. * Reset a push parser context
  13865. *
  13866. * Returns 0 in case of success and 1 in case of error
  13867. */
  13868. int
  13869. xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
  13870. int size, const char *filename, const char *encoding)
  13871. {
  13872. xmlParserInputPtr inputStream;
  13873. xmlParserInputBufferPtr buf;
  13874. xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
  13875. if (ctxt == NULL)
  13876. return(1);
  13877. if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
  13878. enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
  13879. buf = xmlAllocParserInputBuffer(enc);
  13880. if (buf == NULL)
  13881. return(1);
  13882. if (ctxt == NULL) {
  13883. xmlFreeParserInputBuffer(buf);
  13884. return(1);
  13885. }
  13886. xmlCtxtReset(ctxt);
  13887. if (filename == NULL) {
  13888. ctxt->directory = NULL;
  13889. } else {
  13890. ctxt->directory = xmlParserGetDirectory(filename);
  13891. }
  13892. inputStream = xmlNewInputStream(ctxt);
  13893. if (inputStream == NULL) {
  13894. xmlFreeParserInputBuffer(buf);
  13895. return(1);
  13896. }
  13897. if (filename == NULL)
  13898. inputStream->filename = NULL;
  13899. else
  13900. inputStream->filename = (char *)
  13901. xmlCanonicPath((const xmlChar *) filename);
  13902. inputStream->buf = buf;
  13903. xmlBufResetInput(buf->buffer, inputStream);
  13904. inputPush(ctxt, inputStream);
  13905. if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
  13906. (ctxt->input->buf != NULL)) {
  13907. size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
  13908. size_t cur = ctxt->input->cur - ctxt->input->base;
  13909. xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
  13910. xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
  13911. #ifdef DEBUG_PUSH
  13912. xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
  13913. #endif
  13914. }
  13915. if (encoding != NULL) {
  13916. xmlCharEncodingHandlerPtr hdlr;
  13917. if (ctxt->encoding != NULL)
  13918. xmlFree((xmlChar *) ctxt->encoding);
  13919. ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
  13920. hdlr = xmlFindCharEncodingHandler(encoding);
  13921. if (hdlr != NULL) {
  13922. xmlSwitchToEncoding(ctxt, hdlr);
  13923. } else {
  13924. xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
  13925. "Unsupported encoding %s\n", BAD_CAST encoding);
  13926. }
  13927. } else if (enc != XML_CHAR_ENCODING_NONE) {
  13928. xmlSwitchEncoding(ctxt, enc);
  13929. }
  13930. return(0);
  13931. }
  13932. /**
  13933. * xmlCtxtUseOptionsInternal:
  13934. * @ctxt: an XML parser context
  13935. * @options: a combination of xmlParserOption
  13936. * @encoding: the user provided encoding to use
  13937. *
  13938. * Applies the options to the parser context
  13939. *
  13940. * Returns 0 in case of success, the set of unknown or unimplemented options
  13941. * in case of error.
  13942. */
  13943. static int
  13944. xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
  13945. {
  13946. if (ctxt == NULL)
  13947. return(-1);
  13948. if (encoding != NULL) {
  13949. if (ctxt->encoding != NULL)
  13950. xmlFree((xmlChar *) ctxt->encoding);
  13951. ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
  13952. }
  13953. if (options & XML_PARSE_RECOVER) {
  13954. ctxt->recovery = 1;
  13955. options -= XML_PARSE_RECOVER;
  13956. ctxt->options |= XML_PARSE_RECOVER;
  13957. } else
  13958. ctxt->recovery = 0;
  13959. if (options & XML_PARSE_DTDLOAD) {
  13960. ctxt->loadsubset = XML_DETECT_IDS;
  13961. options -= XML_PARSE_DTDLOAD;
  13962. ctxt->options |= XML_PARSE_DTDLOAD;
  13963. } else
  13964. ctxt->loadsubset = 0;
  13965. if (options & XML_PARSE_DTDATTR) {
  13966. ctxt->loadsubset |= XML_COMPLETE_ATTRS;
  13967. options -= XML_PARSE_DTDATTR;
  13968. ctxt->options |= XML_PARSE_DTDATTR;
  13969. }
  13970. if (options & XML_PARSE_NOENT) {
  13971. ctxt->replaceEntities = 1;
  13972. /* ctxt->loadsubset |= XML_DETECT_IDS; */
  13973. options -= XML_PARSE_NOENT;
  13974. ctxt->options |= XML_PARSE_NOENT;
  13975. } else
  13976. ctxt->replaceEntities = 0;
  13977. if (options & XML_PARSE_PEDANTIC) {
  13978. ctxt->pedantic = 1;
  13979. options -= XML_PARSE_PEDANTIC;
  13980. ctxt->options |= XML_PARSE_PEDANTIC;
  13981. } else
  13982. ctxt->pedantic = 0;
  13983. if (options & XML_PARSE_NOBLANKS) {
  13984. ctxt->keepBlanks = 0;
  13985. ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
  13986. options -= XML_PARSE_NOBLANKS;
  13987. ctxt->options |= XML_PARSE_NOBLANKS;
  13988. } else
  13989. ctxt->keepBlanks = 1;
  13990. if (options & XML_PARSE_DTDVALID) {
  13991. ctxt->validate = 1;
  13992. if (options & XML_PARSE_NOWARNING)
  13993. ctxt->vctxt.warning = NULL;
  13994. if (options & XML_PARSE_NOERROR)
  13995. ctxt->vctxt.error = NULL;
  13996. options -= XML_PARSE_DTDVALID;
  13997. ctxt->options |= XML_PARSE_DTDVALID;
  13998. } else
  13999. ctxt->validate = 0;
  14000. if (options & XML_PARSE_NOWARNING) {
  14001. ctxt->sax->warning = NULL;
  14002. options -= XML_PARSE_NOWARNING;
  14003. }
  14004. if (options & XML_PARSE_NOERROR) {
  14005. ctxt->sax->error = NULL;
  14006. ctxt->sax->fatalError = NULL;
  14007. options -= XML_PARSE_NOERROR;
  14008. }
  14009. #ifdef LIBXML_SAX1_ENABLED
  14010. if (options & XML_PARSE_SAX1) {
  14011. ctxt->sax->startElement = xmlSAX2StartElement;
  14012. ctxt->sax->endElement = xmlSAX2EndElement;
  14013. ctxt->sax->startElementNs = NULL;
  14014. ctxt->sax->endElementNs = NULL;
  14015. ctxt->sax->initialized = 1;
  14016. options -= XML_PARSE_SAX1;
  14017. ctxt->options |= XML_PARSE_SAX1;
  14018. }
  14019. #endif /* LIBXML_SAX1_ENABLED */
  14020. if (options & XML_PARSE_NODICT) {
  14021. ctxt->dictNames = 0;
  14022. options -= XML_PARSE_NODICT;
  14023. ctxt->options |= XML_PARSE_NODICT;
  14024. } else {
  14025. ctxt->dictNames = 1;
  14026. }
  14027. if (options & XML_PARSE_NOCDATA) {
  14028. ctxt->sax->cdataBlock = NULL;
  14029. options -= XML_PARSE_NOCDATA;
  14030. ctxt->options |= XML_PARSE_NOCDATA;
  14031. }
  14032. if (options & XML_PARSE_NSCLEAN) {
  14033. ctxt->options |= XML_PARSE_NSCLEAN;
  14034. options -= XML_PARSE_NSCLEAN;
  14035. }
  14036. if (options & XML_PARSE_NONET) {
  14037. ctxt->options |= XML_PARSE_NONET;
  14038. options -= XML_PARSE_NONET;
  14039. }
  14040. if (options & XML_PARSE_COMPACT) {
  14041. ctxt->options |= XML_PARSE_COMPACT;
  14042. options -= XML_PARSE_COMPACT;
  14043. }
  14044. if (options & XML_PARSE_OLD10) {
  14045. ctxt->options |= XML_PARSE_OLD10;
  14046. options -= XML_PARSE_OLD10;
  14047. }
  14048. if (options & XML_PARSE_NOBASEFIX) {
  14049. ctxt->options |= XML_PARSE_NOBASEFIX;
  14050. options -= XML_PARSE_NOBASEFIX;
  14051. }
  14052. if (options & XML_PARSE_HUGE) {
  14053. ctxt->options |= XML_PARSE_HUGE;
  14054. options -= XML_PARSE_HUGE;
  14055. if (ctxt->dict != NULL)
  14056. xmlDictSetLimit(ctxt->dict, 0);
  14057. }
  14058. if (options & XML_PARSE_OLDSAX) {
  14059. ctxt->options |= XML_PARSE_OLDSAX;
  14060. options -= XML_PARSE_OLDSAX;
  14061. }
  14062. if (options & XML_PARSE_IGNORE_ENC) {
  14063. ctxt->options |= XML_PARSE_IGNORE_ENC;
  14064. options -= XML_PARSE_IGNORE_ENC;
  14065. }
  14066. if (options & XML_PARSE_BIG_LINES) {
  14067. ctxt->options |= XML_PARSE_BIG_LINES;
  14068. options -= XML_PARSE_BIG_LINES;
  14069. }
  14070. ctxt->linenumbers = 1;
  14071. return (options);
  14072. }
  14073. /**
  14074. * xmlCtxtUseOptions:
  14075. * @ctxt: an XML parser context
  14076. * @options: a combination of xmlParserOption
  14077. *
  14078. * Applies the options to the parser context
  14079. *
  14080. * Returns 0 in case of success, the set of unknown or unimplemented options
  14081. * in case of error.
  14082. */
  14083. int
  14084. xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
  14085. {
  14086. return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
  14087. }
  14088. /**
  14089. * xmlDoRead:
  14090. * @ctxt: an XML parser context
  14091. * @URL: the base URL to use for the document
  14092. * @encoding: the document encoding, or NULL
  14093. * @options: a combination of xmlParserOption
  14094. * @reuse: keep the context for reuse
  14095. *
  14096. * Common front-end for the xmlRead functions
  14097. *
  14098. * Returns the resulting document tree or NULL
  14099. */
  14100. static xmlDocPtr
  14101. xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
  14102. int options, int reuse)
  14103. {
  14104. xmlDocPtr ret;
  14105. xmlCtxtUseOptionsInternal(ctxt, options, encoding);
  14106. if (encoding != NULL) {
  14107. xmlCharEncodingHandlerPtr hdlr;
  14108. hdlr = xmlFindCharEncodingHandler(encoding);
  14109. if (hdlr != NULL)
  14110. xmlSwitchToEncoding(ctxt, hdlr);
  14111. }
  14112. if ((URL != NULL) && (ctxt->input != NULL) &&
  14113. (ctxt->input->filename == NULL))
  14114. ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
  14115. xmlParseDocument(ctxt);
  14116. if ((ctxt->wellFormed) || ctxt->recovery)
  14117. ret = ctxt->myDoc;
  14118. else {
  14119. ret = NULL;
  14120. if (ctxt->myDoc != NULL) {
  14121. xmlFreeDoc(ctxt->myDoc);
  14122. }
  14123. }
  14124. ctxt->myDoc = NULL;
  14125. if (!reuse) {
  14126. xmlFreeParserCtxt(ctxt);
  14127. }
  14128. return (ret);
  14129. }
  14130. /**
  14131. * xmlReadDoc:
  14132. * @cur: a pointer to a zero terminated string
  14133. * @URL: the base URL to use for the document
  14134. * @encoding: the document encoding, or NULL
  14135. * @options: a combination of xmlParserOption
  14136. *
  14137. * parse an XML in-memory document and build a tree.
  14138. *
  14139. * Returns the resulting document tree
  14140. */
  14141. xmlDocPtr
  14142. xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
  14143. {
  14144. xmlParserCtxtPtr ctxt;
  14145. if (cur == NULL)
  14146. return (NULL);
  14147. xmlInitParser();
  14148. ctxt = xmlCreateDocParserCtxt(cur);
  14149. if (ctxt == NULL)
  14150. return (NULL);
  14151. return (xmlDoRead(ctxt, URL, encoding, options, 0));
  14152. }
  14153. /**
  14154. * xmlReadFile:
  14155. * @filename: a file or URL
  14156. * @encoding: the document encoding, or NULL
  14157. * @options: a combination of xmlParserOption
  14158. *
  14159. * parse an XML file from the filesystem or the network.
  14160. *
  14161. * Returns the resulting document tree
  14162. */
  14163. xmlDocPtr
  14164. xmlReadFile(const char *filename, const char *encoding, int options)
  14165. {
  14166. xmlParserCtxtPtr ctxt;
  14167. xmlInitParser();
  14168. ctxt = xmlCreateURLParserCtxt(filename, options);
  14169. if (ctxt == NULL)
  14170. return (NULL);
  14171. return (xmlDoRead(ctxt, NULL, encoding, options, 0));
  14172. }
  14173. /**
  14174. * xmlReadMemory:
  14175. * @buffer: a pointer to a char array
  14176. * @size: the size of the array
  14177. * @URL: the base URL to use for the document
  14178. * @encoding: the document encoding, or NULL
  14179. * @options: a combination of xmlParserOption
  14180. *
  14181. * parse an XML in-memory document and build a tree.
  14182. *
  14183. * Returns the resulting document tree
  14184. */
  14185. xmlDocPtr
  14186. xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
  14187. {
  14188. xmlParserCtxtPtr ctxt;
  14189. xmlInitParser();
  14190. ctxt = xmlCreateMemoryParserCtxt(buffer, size);
  14191. if (ctxt == NULL)
  14192. return (NULL);
  14193. return (xmlDoRead(ctxt, URL, encoding, options, 0));
  14194. }
  14195. /**
  14196. * xmlReadFd:
  14197. * @fd: an open file descriptor
  14198. * @URL: the base URL to use for the document
  14199. * @encoding: the document encoding, or NULL
  14200. * @options: a combination of xmlParserOption
  14201. *
  14202. * parse an XML from a file descriptor and build a tree.
  14203. * NOTE that the file descriptor will not be closed when the
  14204. * reader is closed or reset.
  14205. *
  14206. * Returns the resulting document tree
  14207. */
  14208. xmlDocPtr
  14209. xmlReadFd(int fd, const char *URL, const char *encoding, int options)
  14210. {
  14211. xmlParserCtxtPtr ctxt;
  14212. xmlParserInputBufferPtr input;
  14213. xmlParserInputPtr stream;
  14214. if (fd < 0)
  14215. return (NULL);
  14216. xmlInitParser();
  14217. input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
  14218. if (input == NULL)
  14219. return (NULL);
  14220. input->closecallback = NULL;
  14221. ctxt = xmlNewParserCtxt();
  14222. if (ctxt == NULL) {
  14223. xmlFreeParserInputBuffer(input);
  14224. return (NULL);
  14225. }
  14226. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  14227. if (stream == NULL) {
  14228. xmlFreeParserInputBuffer(input);
  14229. xmlFreeParserCtxt(ctxt);
  14230. return (NULL);
  14231. }
  14232. inputPush(ctxt, stream);
  14233. return (xmlDoRead(ctxt, URL, encoding, options, 0));
  14234. }
  14235. /**
  14236. * xmlReadIO:
  14237. * @ioread: an I/O read function
  14238. * @ioclose: an I/O close function
  14239. * @ioctx: an I/O handler
  14240. * @URL: the base URL to use for the document
  14241. * @encoding: the document encoding, or NULL
  14242. * @options: a combination of xmlParserOption
  14243. *
  14244. * parse an XML document from I/O functions and source and build a tree.
  14245. *
  14246. * Returns the resulting document tree
  14247. */
  14248. xmlDocPtr
  14249. xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
  14250. void *ioctx, const char *URL, const char *encoding, int options)
  14251. {
  14252. xmlParserCtxtPtr ctxt;
  14253. xmlParserInputBufferPtr input;
  14254. xmlParserInputPtr stream;
  14255. if (ioread == NULL)
  14256. return (NULL);
  14257. xmlInitParser();
  14258. input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
  14259. XML_CHAR_ENCODING_NONE);
  14260. if (input == NULL) {
  14261. if (ioclose != NULL)
  14262. ioclose(ioctx);
  14263. return (NULL);
  14264. }
  14265. ctxt = xmlNewParserCtxt();
  14266. if (ctxt == NULL) {
  14267. xmlFreeParserInputBuffer(input);
  14268. return (NULL);
  14269. }
  14270. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  14271. if (stream == NULL) {
  14272. xmlFreeParserInputBuffer(input);
  14273. xmlFreeParserCtxt(ctxt);
  14274. return (NULL);
  14275. }
  14276. inputPush(ctxt, stream);
  14277. return (xmlDoRead(ctxt, URL, encoding, options, 0));
  14278. }
  14279. /**
  14280. * xmlCtxtReadDoc:
  14281. * @ctxt: an XML parser context
  14282. * @cur: a pointer to a zero terminated string
  14283. * @URL: the base URL to use for the document
  14284. * @encoding: the document encoding, or NULL
  14285. * @options: a combination of xmlParserOption
  14286. *
  14287. * parse an XML in-memory document and build a tree.
  14288. * This reuses the existing @ctxt parser context
  14289. *
  14290. * Returns the resulting document tree
  14291. */
  14292. xmlDocPtr
  14293. xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
  14294. const char *URL, const char *encoding, int options)
  14295. {
  14296. xmlParserInputPtr stream;
  14297. if (cur == NULL)
  14298. return (NULL);
  14299. if (ctxt == NULL)
  14300. return (NULL);
  14301. xmlInitParser();
  14302. xmlCtxtReset(ctxt);
  14303. stream = xmlNewStringInputStream(ctxt, cur);
  14304. if (stream == NULL) {
  14305. return (NULL);
  14306. }
  14307. inputPush(ctxt, stream);
  14308. return (xmlDoRead(ctxt, URL, encoding, options, 1));
  14309. }
  14310. /**
  14311. * xmlCtxtReadFile:
  14312. * @ctxt: an XML parser context
  14313. * @filename: a file or URL
  14314. * @encoding: the document encoding, or NULL
  14315. * @options: a combination of xmlParserOption
  14316. *
  14317. * parse an XML file from the filesystem or the network.
  14318. * This reuses the existing @ctxt parser context
  14319. *
  14320. * Returns the resulting document tree
  14321. */
  14322. xmlDocPtr
  14323. xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
  14324. const char *encoding, int options)
  14325. {
  14326. xmlParserInputPtr stream;
  14327. if (filename == NULL)
  14328. return (NULL);
  14329. if (ctxt == NULL)
  14330. return (NULL);
  14331. xmlInitParser();
  14332. xmlCtxtReset(ctxt);
  14333. stream = xmlLoadExternalEntity(filename, NULL, ctxt);
  14334. if (stream == NULL) {
  14335. return (NULL);
  14336. }
  14337. inputPush(ctxt, stream);
  14338. return (xmlDoRead(ctxt, NULL, encoding, options, 1));
  14339. }
  14340. /**
  14341. * xmlCtxtReadMemory:
  14342. * @ctxt: an XML parser context
  14343. * @buffer: a pointer to a char array
  14344. * @size: the size of the array
  14345. * @URL: the base URL to use for the document
  14346. * @encoding: the document encoding, or NULL
  14347. * @options: a combination of xmlParserOption
  14348. *
  14349. * parse an XML in-memory document and build a tree.
  14350. * This reuses the existing @ctxt parser context
  14351. *
  14352. * Returns the resulting document tree
  14353. */
  14354. xmlDocPtr
  14355. xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
  14356. const char *URL, const char *encoding, int options)
  14357. {
  14358. xmlParserInputBufferPtr input;
  14359. xmlParserInputPtr stream;
  14360. if (ctxt == NULL)
  14361. return (NULL);
  14362. if (buffer == NULL)
  14363. return (NULL);
  14364. xmlInitParser();
  14365. xmlCtxtReset(ctxt);
  14366. input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
  14367. if (input == NULL) {
  14368. return(NULL);
  14369. }
  14370. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  14371. if (stream == NULL) {
  14372. xmlFreeParserInputBuffer(input);
  14373. return(NULL);
  14374. }
  14375. inputPush(ctxt, stream);
  14376. return (xmlDoRead(ctxt, URL, encoding, options, 1));
  14377. }
  14378. /**
  14379. * xmlCtxtReadFd:
  14380. * @ctxt: an XML parser context
  14381. * @fd: an open file descriptor
  14382. * @URL: the base URL to use for the document
  14383. * @encoding: the document encoding, or NULL
  14384. * @options: a combination of xmlParserOption
  14385. *
  14386. * parse an XML from a file descriptor and build a tree.
  14387. * This reuses the existing @ctxt parser context
  14388. * NOTE that the file descriptor will not be closed when the
  14389. * reader is closed or reset.
  14390. *
  14391. * Returns the resulting document tree
  14392. */
  14393. xmlDocPtr
  14394. xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
  14395. const char *URL, const char *encoding, int options)
  14396. {
  14397. xmlParserInputBufferPtr input;
  14398. xmlParserInputPtr stream;
  14399. if (fd < 0)
  14400. return (NULL);
  14401. if (ctxt == NULL)
  14402. return (NULL);
  14403. xmlInitParser();
  14404. xmlCtxtReset(ctxt);
  14405. input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
  14406. if (input == NULL)
  14407. return (NULL);
  14408. input->closecallback = NULL;
  14409. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  14410. if (stream == NULL) {
  14411. xmlFreeParserInputBuffer(input);
  14412. return (NULL);
  14413. }
  14414. inputPush(ctxt, stream);
  14415. return (xmlDoRead(ctxt, URL, encoding, options, 1));
  14416. }
  14417. /**
  14418. * xmlCtxtReadIO:
  14419. * @ctxt: an XML parser context
  14420. * @ioread: an I/O read function
  14421. * @ioclose: an I/O close function
  14422. * @ioctx: an I/O handler
  14423. * @URL: the base URL to use for the document
  14424. * @encoding: the document encoding, or NULL
  14425. * @options: a combination of xmlParserOption
  14426. *
  14427. * parse an XML document from I/O functions and source and build a tree.
  14428. * This reuses the existing @ctxt parser context
  14429. *
  14430. * Returns the resulting document tree
  14431. */
  14432. xmlDocPtr
  14433. xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
  14434. xmlInputCloseCallback ioclose, void *ioctx,
  14435. const char *URL,
  14436. const char *encoding, int options)
  14437. {
  14438. xmlParserInputBufferPtr input;
  14439. xmlParserInputPtr stream;
  14440. if (ioread == NULL)
  14441. return (NULL);
  14442. if (ctxt == NULL)
  14443. return (NULL);
  14444. xmlInitParser();
  14445. xmlCtxtReset(ctxt);
  14446. input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
  14447. XML_CHAR_ENCODING_NONE);
  14448. if (input == NULL) {
  14449. if (ioclose != NULL)
  14450. ioclose(ioctx);
  14451. return (NULL);
  14452. }
  14453. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  14454. if (stream == NULL) {
  14455. xmlFreeParserInputBuffer(input);
  14456. return (NULL);
  14457. }
  14458. inputPush(ctxt, stream);
  14459. return (xmlDoRead(ctxt, URL, encoding, options, 1));
  14460. }
  14461. #define bottom_parser
  14462. #include "elfgcchack.h"