Parser.php 156 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965
  1. <?php
  2. /**
  3. * @defgroup Parser Parser
  4. *
  5. * @file
  6. * @ingroup Parser
  7. * File for Parser and related classes
  8. */
  9. /**
  10. * PHP Parser - Processes wiki markup (which uses a more user-friendly
  11. * syntax, such as "[[link]]" for making links), and provides a one-way
  12. * transformation of that wiki markup it into XHTML output / markup
  13. * (which in turn the browser understands, and can display).
  14. *
  15. * <pre>
  16. * There are five main entry points into the Parser class:
  17. * parse()
  18. * produces HTML output
  19. * preSaveTransform().
  20. * produces altered wiki markup.
  21. * preprocess()
  22. * removes HTML comments and expands templates
  23. * cleanSig()
  24. * Cleans a signature before saving it to preferences
  25. * extractSections()
  26. * Extracts sections from an article for section editing
  27. *
  28. * Globals used:
  29. * objects: $wgLang, $wgContLang
  30. *
  31. * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  32. *
  33. * settings:
  34. * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  35. * $wgNamespacesWithSubpages, $wgAllowExternalImages*,
  36. * $wgLocaltimezone, $wgAllowSpecialInclusion*,
  37. * $wgMaxArticleSize*
  38. *
  39. * * only within ParserOptions
  40. * </pre>
  41. *
  42. * @ingroup Parser
  43. */
  44. class Parser
  45. {
  46. /**
  47. * Update this version number when the ParserOutput format
  48. * changes in an incompatible way, so the parser cache
  49. * can automatically discard old data.
  50. */
  51. const VERSION = '1.6.4';
  52. # Flags for Parser::setFunctionHook
  53. # Also available as global constants from Defines.php
  54. const SFH_NO_HASH = 1;
  55. const SFH_OBJECT_ARGS = 2;
  56. # Constants needed for external link processing
  57. # Everything except bracket, space, or control characters
  58. const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]';
  59. const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)
  60. \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx';
  61. // State constants for the definition list colon extraction
  62. const COLON_STATE_TEXT = 0;
  63. const COLON_STATE_TAG = 1;
  64. const COLON_STATE_TAGSTART = 2;
  65. const COLON_STATE_CLOSETAG = 3;
  66. const COLON_STATE_TAGSLASH = 4;
  67. const COLON_STATE_COMMENT = 5;
  68. const COLON_STATE_COMMENTDASH = 6;
  69. const COLON_STATE_COMMENTDASHDASH = 7;
  70. // Flags for preprocessToDom
  71. const PTD_FOR_INCLUSION = 1;
  72. // Allowed values for $this->mOutputType
  73. // Parameter to startExternalParse().
  74. const OT_HTML = 1;
  75. const OT_WIKI = 2;
  76. const OT_PREPROCESS = 3;
  77. const OT_MSG = 3;
  78. // Marker Suffix needs to be accessible staticly.
  79. const MARKER_SUFFIX = "-QINU\x7f";
  80. /**#@+
  81. * @private
  82. */
  83. # Persistent:
  84. var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables,
  85. $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex, $mPreprocessor,
  86. $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList, $mVarCache, $mConf;
  87. # Cleared with clearState():
  88. var $mOutput, $mAutonumber, $mDTopen, $mStripState;
  89. var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  90. var $mLinkHolders, $mLinkID;
  91. var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
  92. var $mTplExpandCache; // empty-frame expansion cache
  93. var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
  94. var $mExpensiveFunctionCount; // number of expensive parser function calls
  95. var $mFileCache;
  96. # Temporary
  97. # These are variables reset at least once per parse regardless of $clearState
  98. var $mOptions, // ParserOptions object
  99. $mTitle, // Title context, used for self-link rendering and similar things
  100. $mOutputType, // Output type, one of the OT_xxx constants
  101. $ot, // Shortcut alias, see setOutputType()
  102. $mRevisionId, // ID to display in {{REVISIONID}} tags
  103. $mRevisionTimestamp, // The timestamp of the specified revision ID
  104. $mRevIdForTs; // The revision ID which was used to fetch the timestamp
  105. /**#@-*/
  106. /**
  107. * Constructor
  108. *
  109. * @public
  110. */
  111. function __construct( $conf = array() ) {
  112. $this->mConf = $conf;
  113. $this->mTagHooks = array();
  114. $this->mTransparentTagHooks = array();
  115. $this->mFunctionHooks = array();
  116. $this->mFunctionSynonyms = array( 0 => array(), 1 => array() );
  117. $this->mDefaultStripList = $this->mStripList = array( 'nowiki', 'gallery' );
  118. $this->mUrlProtocols = wfUrlProtocols();
  119. $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
  120. '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S';
  121. $this->mVarCache = array();
  122. if ( isset( $conf['preprocessorClass'] ) ) {
  123. $this->mPreprocessorClass = $conf['preprocessorClass'];
  124. } elseif ( extension_loaded( 'domxml' ) ) {
  125. // PECL extension that conflicts with the core DOM extension (bug 13770)
  126. wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
  127. $this->mPreprocessorClass = 'Preprocessor_Hash';
  128. } elseif ( extension_loaded( 'dom' ) ) {
  129. $this->mPreprocessorClass = 'Preprocessor_DOM';
  130. } else {
  131. $this->mPreprocessorClass = 'Preprocessor_Hash';
  132. }
  133. $this->mMarkerIndex = 0;
  134. $this->mFirstCall = true;
  135. }
  136. /**
  137. * Reduce memory usage to reduce the impact of circular references
  138. */
  139. function __destruct() {
  140. if ( isset( $this->mLinkHolders ) ) {
  141. $this->mLinkHolders->__destruct();
  142. }
  143. foreach ( $this as $name => $value ) {
  144. unset( $this->$name );
  145. }
  146. }
  147. /**
  148. * Do various kinds of initialisation on the first call of the parser
  149. */
  150. function firstCallInit() {
  151. if ( !$this->mFirstCall ) {
  152. return;
  153. }
  154. $this->mFirstCall = false;
  155. wfProfileIn( __METHOD__ );
  156. $this->setHook( 'pre', array( $this, 'renderPreTag' ) );
  157. CoreParserFunctions::register( $this );
  158. $this->initialiseVariables();
  159. wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
  160. wfProfileOut( __METHOD__ );
  161. }
  162. /**
  163. * Clear Parser state
  164. *
  165. * @private
  166. */
  167. function clearState() {
  168. wfProfileIn( __METHOD__ );
  169. if ( $this->mFirstCall ) {
  170. $this->firstCallInit();
  171. }
  172. $this->mOutput = new ParserOutput;
  173. $this->mAutonumber = 0;
  174. $this->mLastSection = '';
  175. $this->mDTopen = false;
  176. $this->mIncludeCount = array();
  177. $this->mStripState = new StripState;
  178. $this->mArgStack = false;
  179. $this->mInPre = false;
  180. $this->mLinkHolders = new LinkHolderArray( $this );
  181. $this->mLinkID = 0;
  182. $this->mRevisionTimestamp = $this->mRevisionId = null;
  183. /**
  184. * Prefix for temporary replacement strings for the multipass parser.
  185. * \x07 should never appear in input as it's disallowed in XML.
  186. * Using it at the front also gives us a little extra robustness
  187. * since it shouldn't match when butted up against identifier-like
  188. * string constructs.
  189. *
  190. * Must not consist of all title characters, or else it will change
  191. * the behaviour of <nowiki> in a link.
  192. */
  193. #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
  194. # Changed to \x7f to allow XML double-parsing -- TS
  195. $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
  196. # Clear these on every parse, bug 4549
  197. $this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array();
  198. $this->mShowToc = true;
  199. $this->mForceTocPosition = false;
  200. $this->mIncludeSizes = array(
  201. 'post-expand' => 0,
  202. 'arg' => 0,
  203. );
  204. $this->mPPNodeCount = 0;
  205. $this->mDefaultSort = false;
  206. $this->mHeadings = array();
  207. $this->mDoubleUnderscores = array();
  208. $this->mExpensiveFunctionCount = 0;
  209. $this->mFileCache = array();
  210. # Fix cloning
  211. if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
  212. $this->mPreprocessor = null;
  213. }
  214. wfRunHooks( 'ParserClearState', array( &$this ) );
  215. wfProfileOut( __METHOD__ );
  216. }
  217. function setOutputType( $ot ) {
  218. $this->mOutputType = $ot;
  219. // Shortcut alias
  220. $this->ot = array(
  221. 'html' => $ot == self::OT_HTML,
  222. 'wiki' => $ot == self::OT_WIKI,
  223. 'pre' => $ot == self::OT_PREPROCESS,
  224. );
  225. }
  226. /**
  227. * Set the context title
  228. */
  229. function setTitle( $t ) {
  230. if ( !$t || $t instanceof FakeTitle ) {
  231. $t = Title::newFromText( 'NO TITLE' );
  232. }
  233. if ( strval( $t->getFragment() ) !== '' ) {
  234. # Strip the fragment to avoid various odd effects
  235. $this->mTitle = clone $t;
  236. $this->mTitle->setFragment( '' );
  237. } else {
  238. $this->mTitle = $t;
  239. }
  240. }
  241. /**
  242. * Accessor for mUniqPrefix.
  243. *
  244. * @public
  245. */
  246. function uniqPrefix() {
  247. if( !isset( $this->mUniqPrefix ) ) {
  248. // @fixme this is probably *horribly wrong*
  249. // LanguageConverter seems to want $wgParser's uniqPrefix, however
  250. // if this is called for a parser cache hit, the parser may not
  251. // have ever been initialized in the first place.
  252. // Not really sure what the heck is supposed to be going on here.
  253. return '';
  254. //throw new MWException( "Accessing uninitialized mUniqPrefix" );
  255. }
  256. return $this->mUniqPrefix;
  257. }
  258. /**
  259. * Convert wikitext to HTML
  260. * Do not call this function recursively.
  261. *
  262. * @param $text String: text we want to parse
  263. * @param $title A title object
  264. * @param $options ParserOptions
  265. * @param $linestart boolean
  266. * @param $clearState boolean
  267. * @param $revid Int: number to pass in {{REVISIONID}}
  268. * @return ParserOutput a ParserOutput
  269. */
  270. public function parse( $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) {
  271. /**
  272. * First pass--just handle <nowiki> sections, pass the rest off
  273. * to internalParse() which does all the real work.
  274. */
  275. global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang;
  276. $fname = __METHOD__.'-' . wfGetCaller();
  277. wfProfileIn( __METHOD__ );
  278. wfProfileIn( $fname );
  279. if ( $clearState ) {
  280. $this->clearState();
  281. }
  282. $this->mOptions = $options;
  283. $this->setTitle( $title );
  284. $oldRevisionId = $this->mRevisionId;
  285. $oldRevisionTimestamp = $this->mRevisionTimestamp;
  286. if( $revid !== null ) {
  287. $this->mRevisionId = $revid;
  288. $this->mRevisionTimestamp = null;
  289. }
  290. $this->setOutputType( self::OT_HTML );
  291. wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
  292. # No more strip!
  293. wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
  294. $text = $this->internalParse( $text );
  295. $text = $this->mStripState->unstripGeneral( $text );
  296. # Clean up special characters, only run once, next-to-last before doBlockLevels
  297. $fixtags = array(
  298. # french spaces, last one Guillemet-left
  299. # only if there is something before the space
  300. '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&nbsp;\\2',
  301. # french spaces, Guillemet-right
  302. '/(\\302\\253) /' => '\\1&nbsp;',
  303. '/&nbsp;(!\s*important)/' => ' \\1', #Beware of CSS magic word !important, bug #11874.
  304. );
  305. $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
  306. $text = $this->doBlockLevels( $text, $linestart );
  307. $this->replaceLinkHolders( $text );
  308. # the position of the parserConvert() call should not be changed. it
  309. # assumes that the links are all replaced and the only thing left
  310. # is the <nowiki> mark.
  311. # Side-effects: this calls $this->mOutput->setTitleText()
  312. $text = $wgContLang->parserConvert( $text, $this );
  313. $text = $this->mStripState->unstripNoWiki( $text );
  314. wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
  315. //!JF Move to its own function
  316. $uniq_prefix = $this->mUniqPrefix;
  317. $matches = array();
  318. $elements = array_keys( $this->mTransparentTagHooks );
  319. $text = self::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
  320. foreach( $matches as $marker => $data ) {
  321. list( $element, $content, $params, $tag ) = $data;
  322. $tagName = strtolower( $element );
  323. if( isset( $this->mTransparentTagHooks[$tagName] ) ) {
  324. $output = call_user_func_array( $this->mTransparentTagHooks[$tagName],
  325. array( $content, $params, $this ) );
  326. } else {
  327. $output = $tag;
  328. }
  329. $this->mStripState->general->setPair( $marker, $output );
  330. }
  331. $text = $this->mStripState->unstripGeneral( $text );
  332. $text = Sanitizer::normalizeCharReferences( $text );
  333. if ( ( $wgUseTidy && $this->mOptions->mTidy ) || $wgAlwaysUseTidy ) {
  334. $text = MWTidy::tidy( $text );
  335. } else {
  336. # attempt to sanitize at least some nesting problems
  337. # (bug #2702 and quite a few others)
  338. $tidyregs = array(
  339. # ''Something [http://www.cool.com cool''] -->
  340. # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
  341. '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
  342. '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
  343. # fix up an anchor inside another anchor, only
  344. # at least for a single single nested link (bug 3695)
  345. '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
  346. '\\1\\2</a>\\3</a>\\1\\4</a>',
  347. # fix div inside inline elements- doBlockLevels won't wrap a line which
  348. # contains a div, so fix it up here; replace
  349. # div with escaped text
  350. '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
  351. '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
  352. # remove empty italic or bold tag pairs, some
  353. # introduced by rules above
  354. '/<([bi])><\/\\1>/' => '',
  355. );
  356. $text = preg_replace(
  357. array_keys( $tidyregs ),
  358. array_values( $tidyregs ),
  359. $text );
  360. }
  361. global $wgExpensiveParserFunctionLimit;
  362. if ( $this->mExpensiveFunctionCount > $wgExpensiveParserFunctionLimit ) {
  363. $this->limitationWarn( 'expensive-parserfunction', $this->mExpensiveFunctionCount, $wgExpensiveParserFunctionLimit );
  364. }
  365. wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
  366. # Information on include size limits, for the benefit of users who try to skirt them
  367. if ( $this->mOptions->getEnableLimitReport() ) {
  368. global $wgExpensiveParserFunctionLimit;
  369. $max = $this->mOptions->getMaxIncludeSize();
  370. $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/$wgExpensiveParserFunctionLimit\n";
  371. $limitReport =
  372. "NewPP limit report\n" .
  373. "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" .
  374. "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
  375. "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n".
  376. $PFreport;
  377. wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) );
  378. $text .= "\n<!-- \n$limitReport-->\n";
  379. }
  380. $this->mOutput->setText( $text );
  381. $this->mRevisionId = $oldRevisionId;
  382. $this->mRevisionTimestamp = $oldRevisionTimestamp;
  383. wfProfileOut( $fname );
  384. wfProfileOut( __METHOD__ );
  385. return $this->mOutput;
  386. }
  387. /**
  388. * Recursive parser entry point that can be called from an extension tag
  389. * hook.
  390. */
  391. function recursiveTagParse( $text ) {
  392. wfProfileIn( __METHOD__ );
  393. wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
  394. wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
  395. $text = $this->internalParse( $text );
  396. wfProfileOut( __METHOD__ );
  397. return $text;
  398. }
  399. /**
  400. * Expand templates and variables in the text, producing valid, static wikitext.
  401. * Also removes comments.
  402. */
  403. function preprocess( $text, $title, $options, $revid = null ) {
  404. wfProfileIn( __METHOD__ );
  405. $this->clearState();
  406. $this->setOutputType( self::OT_PREPROCESS );
  407. $this->mOptions = $options;
  408. $this->setTitle( $title );
  409. if( $revid !== null ) {
  410. $this->mRevisionId = $revid;
  411. }
  412. wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
  413. wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
  414. $text = $this->replaceVariables( $text );
  415. $text = $this->mStripState->unstripBoth( $text );
  416. wfProfileOut( __METHOD__ );
  417. return $text;
  418. }
  419. /**
  420. * Get a random string
  421. *
  422. * @private
  423. * @static
  424. */
  425. function getRandomString() {
  426. return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
  427. }
  428. function &getTitle() { return $this->mTitle; }
  429. function getOptions() { return $this->mOptions; }
  430. function getRevisionId() { return $this->mRevisionId; }
  431. function getOutput() { return $this->mOutput; }
  432. function nextLinkID() { return $this->mLinkID++; }
  433. function getFunctionLang() {
  434. global $wgLang, $wgContLang;
  435. $target = $this->mOptions->getTargetLanguage();
  436. if ( $target !== null ) {
  437. return $target;
  438. } else {
  439. return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang;
  440. }
  441. }
  442. /**
  443. * Get a preprocessor object
  444. */
  445. function getPreprocessor() {
  446. if ( !isset( $this->mPreprocessor ) ) {
  447. $class = $this->mPreprocessorClass;
  448. $this->mPreprocessor = new $class( $this );
  449. }
  450. return $this->mPreprocessor;
  451. }
  452. /**
  453. * Replaces all occurrences of HTML-style comments and the given tags
  454. * in the text with a random marker and returns the next text. The output
  455. * parameter $matches will be an associative array filled with data in
  456. * the form:
  457. * 'UNIQ-xxxxx' => array(
  458. * 'element',
  459. * 'tag content',
  460. * array( 'param' => 'x' ),
  461. * '<element param="x">tag content</element>' ) )
  462. *
  463. * @param $elements list of element names. Comments are always extracted.
  464. * @param $text Source text string.
  465. * @param $uniq_prefix
  466. *
  467. * @public
  468. * @static
  469. */
  470. function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
  471. static $n = 1;
  472. $stripped = '';
  473. $matches = array();
  474. $taglist = implode( '|', $elements );
  475. $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
  476. while ( '' != $text ) {
  477. $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
  478. $stripped .= $p[0];
  479. if( count( $p ) < 5 ) {
  480. break;
  481. }
  482. if( count( $p ) > 5 ) {
  483. // comment
  484. $element = $p[4];
  485. $attributes = '';
  486. $close = '';
  487. $inside = $p[5];
  488. } else {
  489. // tag
  490. $element = $p[1];
  491. $attributes = $p[2];
  492. $close = $p[3];
  493. $inside = $p[4];
  494. }
  495. $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . self::MARKER_SUFFIX;
  496. $stripped .= $marker;
  497. if ( $close === '/>' ) {
  498. // Empty element tag, <tag />
  499. $content = null;
  500. $text = $inside;
  501. $tail = null;
  502. } else {
  503. if( $element === '!--' ) {
  504. $end = '/(-->)/';
  505. } else {
  506. $end = "/(<\\/$element\\s*>)/i";
  507. }
  508. $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
  509. $content = $q[0];
  510. if( count( $q ) < 3 ) {
  511. # No end tag -- let it run out to the end of the text.
  512. $tail = '';
  513. $text = '';
  514. } else {
  515. $tail = $q[1];
  516. $text = $q[2];
  517. }
  518. }
  519. $matches[$marker] = array( $element,
  520. $content,
  521. Sanitizer::decodeTagAttributes( $attributes ),
  522. "<$element$attributes$close$content$tail" );
  523. }
  524. return $stripped;
  525. }
  526. /**
  527. * Get a list of strippable XML-like elements
  528. */
  529. function getStripList() {
  530. global $wgRawHtml;
  531. $elements = $this->mStripList;
  532. if( $wgRawHtml ) {
  533. $elements[] = 'html';
  534. }
  535. if( $this->mOptions->getUseTeX() ) {
  536. $elements[] = 'math';
  537. }
  538. return $elements;
  539. }
  540. /**
  541. * @deprecated use replaceVariables
  542. */
  543. function strip( $text, $state, $stripcomments = false , $dontstrip = array () ) {
  544. return $text;
  545. }
  546. /**
  547. * Restores pre, math, and other extensions removed by strip()
  548. *
  549. * always call unstripNoWiki() after this one
  550. * @private
  551. * @deprecated use $this->mStripState->unstrip()
  552. */
  553. function unstrip( $text, $state ) {
  554. return $state->unstripGeneral( $text );
  555. }
  556. /**
  557. * Always call this after unstrip() to preserve the order
  558. *
  559. * @private
  560. * @deprecated use $this->mStripState->unstrip()
  561. */
  562. function unstripNoWiki( $text, $state ) {
  563. return $state->unstripNoWiki( $text );
  564. }
  565. /**
  566. * @deprecated use $this->mStripState->unstripBoth()
  567. */
  568. function unstripForHTML( $text ) {
  569. return $this->mStripState->unstripBoth( $text );
  570. }
  571. /**
  572. * Add an item to the strip state
  573. * Returns the unique tag which must be inserted into the stripped text
  574. * The tag will be replaced with the original text in unstrip()
  575. *
  576. * @private
  577. */
  578. function insertStripItem( $text ) {
  579. $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
  580. $this->mMarkerIndex++;
  581. $this->mStripState->general->setPair( $rnd, $text );
  582. return $rnd;
  583. }
  584. /**
  585. * Interface with html tidy
  586. * @deprecated Use MWTidy::tidy()
  587. */
  588. public static function tidy( $text ) {
  589. wfDeprecated( __METHOD__ );
  590. return MWTidy::tidy( $text );
  591. }
  592. /**
  593. * parse the wiki syntax used to render tables
  594. *
  595. * @private
  596. */
  597. function doTableStuff ( $text ) {
  598. wfProfileIn( __METHOD__ );
  599. $lines = StringUtils::explode( "\n", $text );
  600. $out = '';
  601. $td_history = array (); // Is currently a td tag open?
  602. $last_tag_history = array (); // Save history of last lag activated (td, th or caption)
  603. $tr_history = array (); // Is currently a tr tag open?
  604. $tr_attributes = array (); // history of tr attributes
  605. $has_opened_tr = array(); // Did this table open a <tr> element?
  606. $indent_level = 0; // indent level of the table
  607. foreach ( $lines as $outLine ) {
  608. $line = trim( $outLine );
  609. if( $line == '' ) { // empty line, go to next line
  610. $out .= $outLine."\n";
  611. continue;
  612. }
  613. $first_character = $line[0];
  614. $matches = array();
  615. if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) {
  616. // First check if we are starting a new table
  617. $indent_level = strlen( $matches[1] );
  618. $attributes = $this->mStripState->unstripBoth( $matches[2] );
  619. $attributes = Sanitizer::fixTagAttributes ( $attributes , 'table' );
  620. $outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
  621. array_push ( $td_history , false );
  622. array_push ( $last_tag_history , '' );
  623. array_push ( $tr_history , false );
  624. array_push ( $tr_attributes , '' );
  625. array_push ( $has_opened_tr , false );
  626. } else if ( count ( $td_history ) == 0 ) {
  627. // Don't do any of the following
  628. $out .= $outLine."\n";
  629. continue;
  630. } else if ( substr ( $line , 0 , 2 ) === '|}' ) {
  631. // We are ending a table
  632. $line = '</table>' . substr ( $line , 2 );
  633. $last_tag = array_pop ( $last_tag_history );
  634. if ( !array_pop ( $has_opened_tr ) ) {
  635. $line = "<tr><td></td></tr>{$line}";
  636. }
  637. if ( array_pop ( $tr_history ) ) {
  638. $line = "</tr>{$line}";
  639. }
  640. if ( array_pop ( $td_history ) ) {
  641. $line = "</{$last_tag}>{$line}";
  642. }
  643. array_pop ( $tr_attributes );
  644. $outLine = $line . str_repeat( '</dd></dl>' , $indent_level );
  645. } else if ( substr ( $line , 0 , 2 ) === '|-' ) {
  646. // Now we have a table row
  647. $line = preg_replace( '#^\|-+#', '', $line );
  648. // Whats after the tag is now only attributes
  649. $attributes = $this->mStripState->unstripBoth( $line );
  650. $attributes = Sanitizer::fixTagAttributes ( $attributes , 'tr' );
  651. array_pop ( $tr_attributes );
  652. array_push ( $tr_attributes , $attributes );
  653. $line = '';
  654. $last_tag = array_pop ( $last_tag_history );
  655. array_pop ( $has_opened_tr );
  656. array_push ( $has_opened_tr , true );
  657. if ( array_pop ( $tr_history ) ) {
  658. $line = '</tr>';
  659. }
  660. if ( array_pop ( $td_history ) ) {
  661. $line = "</{$last_tag}>{$line}";
  662. }
  663. $outLine = $line;
  664. array_push ( $tr_history , false );
  665. array_push ( $td_history , false );
  666. array_push ( $last_tag_history , '' );
  667. }
  668. else if ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 ) === '|+' ) {
  669. // This might be cell elements, td, th or captions
  670. if ( substr ( $line , 0 , 2 ) === '|+' ) {
  671. $first_character = '+';
  672. $line = substr ( $line , 1 );
  673. }
  674. $line = substr ( $line , 1 );
  675. if ( $first_character === '!' ) {
  676. $line = str_replace ( '!!' , '||' , $line );
  677. }
  678. // Split up multiple cells on the same line.
  679. // FIXME : This can result in improper nesting of tags processed
  680. // by earlier parser steps, but should avoid splitting up eg
  681. // attribute values containing literal "||".
  682. $cells = StringUtils::explodeMarkup( '||' , $line );
  683. $outLine = '';
  684. // Loop through each table cell
  685. foreach ( $cells as $cell )
  686. {
  687. $previous = '';
  688. if ( $first_character !== '+' )
  689. {
  690. $tr_after = array_pop ( $tr_attributes );
  691. if ( !array_pop ( $tr_history ) ) {
  692. $previous = "<tr{$tr_after}>\n";
  693. }
  694. array_push ( $tr_history , true );
  695. array_push ( $tr_attributes , '' );
  696. array_pop ( $has_opened_tr );
  697. array_push ( $has_opened_tr , true );
  698. }
  699. $last_tag = array_pop ( $last_tag_history );
  700. if ( array_pop ( $td_history ) ) {
  701. $previous = "</{$last_tag}>{$previous}";
  702. }
  703. if ( $first_character === '|' ) {
  704. $last_tag = 'td';
  705. } else if ( $first_character === '!' ) {
  706. $last_tag = 'th';
  707. } else if ( $first_character === '+' ) {
  708. $last_tag = 'caption';
  709. } else {
  710. $last_tag = '';
  711. }
  712. array_push ( $last_tag_history , $last_tag );
  713. // A cell could contain both parameters and data
  714. $cell_data = explode ( '|' , $cell , 2 );
  715. // Bug 553: Note that a '|' inside an invalid link should not
  716. // be mistaken as delimiting cell parameters
  717. if ( strpos( $cell_data[0], '[[' ) !== false ) {
  718. $cell = "{$previous}<{$last_tag}>{$cell}";
  719. } else if ( count ( $cell_data ) == 1 )
  720. $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
  721. else {
  722. $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
  723. $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
  724. $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
  725. }
  726. $outLine .= $cell;
  727. array_push ( $td_history , true );
  728. }
  729. }
  730. $out .= $outLine . "\n";
  731. }
  732. // Closing open td, tr && table
  733. while ( count ( $td_history ) > 0 )
  734. {
  735. if ( array_pop ( $td_history ) ) {
  736. $out .= "</td>\n";
  737. }
  738. if ( array_pop ( $tr_history ) ) {
  739. $out .= "</tr>\n";
  740. }
  741. if ( !array_pop ( $has_opened_tr ) ) {
  742. $out .= "<tr><td></td></tr>\n" ;
  743. }
  744. $out .= "</table>\n";
  745. }
  746. // Remove trailing line-ending (b/c)
  747. if ( substr( $out, -1 ) === "\n" ) {
  748. $out = substr( $out, 0, -1 );
  749. }
  750. // special case: don't return empty table
  751. if( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
  752. $out = '';
  753. }
  754. wfProfileOut( __METHOD__ );
  755. return $out;
  756. }
  757. /**
  758. * Helper function for parse() that transforms wiki markup into
  759. * HTML. Only called for $mOutputType == self::OT_HTML.
  760. *
  761. * @private
  762. */
  763. function internalParse( $text ) {
  764. $isMain = true;
  765. wfProfileIn( __METHOD__ );
  766. # Hook to suspend the parser in this state
  767. if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
  768. wfProfileOut( __METHOD__ );
  769. return $text ;
  770. }
  771. $text = $this->replaceVariables( $text );
  772. $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) );
  773. wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
  774. // Tables need to come after variable replacement for things to work
  775. // properly; putting them before other transformations should keep
  776. // exciting things like link expansions from showing up in surprising
  777. // places.
  778. $text = $this->doTableStuff( $text );
  779. $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
  780. $text = $this->doDoubleUnderscore( $text );
  781. $text = $this->doHeadings( $text );
  782. if( $this->mOptions->getUseDynamicDates() ) {
  783. $df = DateFormatter::getInstance();
  784. $text = $df->reformat( $this->mOptions->getDateFormat(), $text );
  785. }
  786. $text = $this->doAllQuotes( $text );
  787. $text = $this->replaceInternalLinks( $text );
  788. $text = $this->replaceExternalLinks( $text );
  789. # replaceInternalLinks may sometimes leave behind
  790. # absolute URLs, which have to be masked to hide them from replaceExternalLinks
  791. $text = str_replace($this->mUniqPrefix.'NOPARSE', '', $text);
  792. $text = $this->doMagicLinks( $text );
  793. $text = $this->formatHeadings( $text, $isMain );
  794. wfProfileOut( __METHOD__ );
  795. return $text;
  796. }
  797. /**
  798. * Replace special strings like "ISBN xxx" and "RFC xxx" with
  799. * magic external links.
  800. *
  801. * DML
  802. * @private
  803. */
  804. function doMagicLinks( $text ) {
  805. wfProfileIn( __METHOD__ );
  806. $prots = $this->mUrlProtocols;
  807. $urlChar = self::EXT_LINK_URL_CLASS;
  808. $text = preg_replace_callback(
  809. '!(?: # Start cases
  810. (<a.*?</a>) | # m[1]: Skip link text
  811. (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
  812. (\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . '
  813. (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number
  814. ISBN\s+(\b # m[5]: ISBN, capture number
  815. (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
  816. (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
  817. [0-9Xx] # check digit
  818. \b)
  819. )!x', array( &$this, 'magicLinkCallback' ), $text );
  820. wfProfileOut( __METHOD__ );
  821. return $text;
  822. }
  823. function magicLinkCallback( $m ) {
  824. if ( isset( $m[1] ) && $m[1] !== '' ) {
  825. # Skip anchor
  826. return $m[0];
  827. } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
  828. # Skip HTML element
  829. return $m[0];
  830. } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
  831. # Free external link
  832. return $this->makeFreeExternalLink( $m[0] );
  833. } elseif ( isset( $m[4] ) && $m[4] !== '' ) {
  834. # RFC or PMID
  835. if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
  836. $keyword = 'RFC';
  837. $urlmsg = 'rfcurl';
  838. $id = $m[4];
  839. } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
  840. $keyword = 'PMID';
  841. $urlmsg = 'pubmedurl';
  842. $id = $m[4];
  843. } else {
  844. throw new MWException( __METHOD__.': unrecognised match type "' .
  845. substr($m[0], 0, 20 ) . '"' );
  846. }
  847. $url = wfMsg( $urlmsg, $id);
  848. $sk = $this->mOptions->getSkin();
  849. $la = $sk->getExternalLinkAttributes( $url, $keyword.$id );
  850. return "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
  851. } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
  852. # ISBN
  853. $isbn = $m[5];
  854. $num = strtr( $isbn, array(
  855. '-' => '',
  856. ' ' => '',
  857. 'x' => 'X',
  858. ));
  859. $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
  860. return'<a href="' .
  861. $titleObj->escapeLocalUrl() .
  862. "\" class=\"internal\">ISBN $isbn</a>";
  863. } else {
  864. return $m[0];
  865. }
  866. }
  867. /**
  868. * Make a free external link, given a user-supplied URL
  869. * @return HTML
  870. * @private
  871. */
  872. function makeFreeExternalLink( $url ) {
  873. global $wgContLang;
  874. wfProfileIn( __METHOD__ );
  875. $sk = $this->mOptions->getSkin();
  876. $trail = '';
  877. # The characters '<' and '>' (which were escaped by
  878. # removeHTMLtags()) should not be included in
  879. # URLs, per RFC 2396.
  880. $m2 = array();
  881. if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
  882. $trail = substr($url, $m2[0][1]) . $trail;
  883. $url = substr($url, 0, $m2[0][1]);
  884. }
  885. # Move trailing punctuation to $trail
  886. $sep = ',;\.:!?';
  887. # If there is no left bracket, then consider right brackets fair game too
  888. if ( strpos( $url, '(' ) === false ) {
  889. $sep .= ')';
  890. }
  891. $numSepChars = strspn( strrev( $url ), $sep );
  892. if ( $numSepChars ) {
  893. $trail = substr( $url, -$numSepChars ) . $trail;
  894. $url = substr( $url, 0, -$numSepChars );
  895. }
  896. $url = Sanitizer::cleanUrl( $url );
  897. # Is this an external image?
  898. $text = $this->maybeMakeExternalImage( $url );
  899. if ( $text === false ) {
  900. # Not an image, make a link
  901. $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free',
  902. $this->getExternalLinkAttribs( $url ) );
  903. # Register it in the output object...
  904. # Replace unnecessary URL escape codes with their equivalent characters
  905. $pasteurized = self::replaceUnusualEscapes( $url );
  906. $this->mOutput->addExternalLink( $pasteurized );
  907. }
  908. wfProfileOut( __METHOD__ );
  909. return $text . $trail;
  910. }
  911. /**
  912. * Parse headers and return html
  913. *
  914. * @private
  915. */
  916. function doHeadings( $text ) {
  917. wfProfileIn( __METHOD__ );
  918. for ( $i = 6; $i >= 1; --$i ) {
  919. $h = str_repeat( '=', $i );
  920. $text = preg_replace( "/^$h(.+)$h\\s*$/m",
  921. "<h$i>\\1</h$i>", $text );
  922. }
  923. wfProfileOut( __METHOD__ );
  924. return $text;
  925. }
  926. /**
  927. * Replace single quotes with HTML markup
  928. * @private
  929. * @return string the altered text
  930. */
  931. function doAllQuotes( $text ) {
  932. wfProfileIn( __METHOD__ );
  933. $outtext = '';
  934. $lines = StringUtils::explode( "\n", $text );
  935. foreach ( $lines as $line ) {
  936. $outtext .= $this->doQuotes( $line ) . "\n";
  937. }
  938. $outtext = substr($outtext, 0,-1);
  939. wfProfileOut( __METHOD__ );
  940. return $outtext;
  941. }
  942. /**
  943. * Helper function for doAllQuotes()
  944. */
  945. public function doQuotes( $text ) {
  946. $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
  947. if ( count( $arr ) == 1 )
  948. return $text;
  949. else
  950. {
  951. # First, do some preliminary work. This may shift some apostrophes from
  952. # being mark-up to being text. It also counts the number of occurrences
  953. # of bold and italics mark-ups.
  954. $i = 0;
  955. $numbold = 0;
  956. $numitalics = 0;
  957. foreach ( $arr as $r )
  958. {
  959. if ( ( $i % 2 ) == 1 )
  960. {
  961. # If there are ever four apostrophes, assume the first is supposed to
  962. # be text, and the remaining three constitute mark-up for bold text.
  963. if ( strlen( $arr[$i] ) == 4 )
  964. {
  965. $arr[$i-1] .= "'";
  966. $arr[$i] = "'''";
  967. }
  968. # If there are more than 5 apostrophes in a row, assume they're all
  969. # text except for the last 5.
  970. else if ( strlen( $arr[$i] ) > 5 )
  971. {
  972. $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
  973. $arr[$i] = "'''''";
  974. }
  975. # Count the number of occurrences of bold and italics mark-ups.
  976. # We are not counting sequences of five apostrophes.
  977. if ( strlen( $arr[$i] ) == 2 ) { $numitalics++; }
  978. else if ( strlen( $arr[$i] ) == 3 ) { $numbold++; }
  979. else if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; }
  980. }
  981. $i++;
  982. }
  983. # If there is an odd number of both bold and italics, it is likely
  984. # that one of the bold ones was meant to be an apostrophe followed
  985. # by italics. Which one we cannot know for certain, but it is more
  986. # likely to be one that has a single-letter word before it.
  987. if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) )
  988. {
  989. $i = 0;
  990. $firstsingleletterword = -1;
  991. $firstmultiletterword = -1;
  992. $firstspace = -1;
  993. foreach ( $arr as $r )
  994. {
  995. if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) )
  996. {
  997. $x1 = substr ($arr[$i-1], -1);
  998. $x2 = substr ($arr[$i-1], -2, 1);
  999. if ($x1 === ' ') {
  1000. if ($firstspace == -1) $firstspace = $i;
  1001. } else if ($x2 === ' ') {
  1002. if ($firstsingleletterword == -1) $firstsingleletterword = $i;
  1003. } else {
  1004. if ($firstmultiletterword == -1) $firstmultiletterword = $i;
  1005. }
  1006. }
  1007. $i++;
  1008. }
  1009. # If there is a single-letter word, use it!
  1010. if ($firstsingleletterword > -1)
  1011. {
  1012. $arr [ $firstsingleletterword ] = "''";
  1013. $arr [ $firstsingleletterword-1 ] .= "'";
  1014. }
  1015. # If not, but there's a multi-letter word, use that one.
  1016. else if ($firstmultiletterword > -1)
  1017. {
  1018. $arr [ $firstmultiletterword ] = "''";
  1019. $arr [ $firstmultiletterword-1 ] .= "'";
  1020. }
  1021. # ... otherwise use the first one that has neither.
  1022. # (notice that it is possible for all three to be -1 if, for example,
  1023. # there is only one pentuple-apostrophe in the line)
  1024. else if ($firstspace > -1)
  1025. {
  1026. $arr [ $firstspace ] = "''";
  1027. $arr [ $firstspace-1 ] .= "'";
  1028. }
  1029. }
  1030. # Now let's actually convert our apostrophic mush to HTML!
  1031. $output = '';
  1032. $buffer = '';
  1033. $state = '';
  1034. $i = 0;
  1035. foreach ($arr as $r)
  1036. {
  1037. if (($i % 2) == 0)
  1038. {
  1039. if ($state === 'both')
  1040. $buffer .= $r;
  1041. else
  1042. $output .= $r;
  1043. }
  1044. else
  1045. {
  1046. if (strlen ($r) == 2)
  1047. {
  1048. if ($state === 'i')
  1049. { $output .= '</i>'; $state = ''; }
  1050. else if ($state === 'bi')
  1051. { $output .= '</i>'; $state = 'b'; }
  1052. else if ($state === 'ib')
  1053. { $output .= '</b></i><b>'; $state = 'b'; }
  1054. else if ($state === 'both')
  1055. { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
  1056. else # $state can be 'b' or ''
  1057. { $output .= '<i>'; $state .= 'i'; }
  1058. }
  1059. else if (strlen ($r) == 3)
  1060. {
  1061. if ($state === 'b')
  1062. { $output .= '</b>'; $state = ''; }
  1063. else if ($state === 'bi')
  1064. { $output .= '</i></b><i>'; $state = 'i'; }
  1065. else if ($state === 'ib')
  1066. { $output .= '</b>'; $state = 'i'; }
  1067. else if ($state === 'both')
  1068. { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
  1069. else # $state can be 'i' or ''
  1070. { $output .= '<b>'; $state .= 'b'; }
  1071. }
  1072. else if (strlen ($r) == 5)
  1073. {
  1074. if ($state === 'b')
  1075. { $output .= '</b><i>'; $state = 'i'; }
  1076. else if ($state === 'i')
  1077. { $output .= '</i><b>'; $state = 'b'; }
  1078. else if ($state === 'bi')
  1079. { $output .= '</i></b>'; $state = ''; }
  1080. else if ($state === 'ib')
  1081. { $output .= '</b></i>'; $state = ''; }
  1082. else if ($state === 'both')
  1083. { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
  1084. else # ($state == '')
  1085. { $buffer = ''; $state = 'both'; }
  1086. }
  1087. }
  1088. $i++;
  1089. }
  1090. # Now close all remaining tags. Notice that the order is important.
  1091. if ($state === 'b' || $state === 'ib')
  1092. $output .= '</b>';
  1093. if ($state === 'i' || $state === 'bi' || $state === 'ib')
  1094. $output .= '</i>';
  1095. if ($state === 'bi')
  1096. $output .= '</b>';
  1097. # There might be lonely ''''', so make sure we have a buffer
  1098. if ($state === 'both' && $buffer)
  1099. $output .= '<b><i>'.$buffer.'</i></b>';
  1100. return $output;
  1101. }
  1102. }
  1103. /**
  1104. * Replace external links (REL)
  1105. *
  1106. * Note: this is all very hackish and the order of execution matters a lot.
  1107. * Make sure to run maintenance/parserTests.php if you change this code.
  1108. *
  1109. * @private
  1110. */
  1111. function replaceExternalLinks( $text ) {
  1112. global $wgContLang;
  1113. wfProfileIn( __METHOD__ );
  1114. $sk = $this->mOptions->getSkin();
  1115. $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
  1116. $s = array_shift( $bits );
  1117. $i = 0;
  1118. while ( $i<count( $bits ) ) {
  1119. $url = $bits[$i++];
  1120. $protocol = $bits[$i++];
  1121. $text = $bits[$i++];
  1122. $trail = $bits[$i++];
  1123. # The characters '<' and '>' (which were escaped by
  1124. # removeHTMLtags()) should not be included in
  1125. # URLs, per RFC 2396.
  1126. $m2 = array();
  1127. if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
  1128. $text = substr($url, $m2[0][1]) . ' ' . $text;
  1129. $url = substr($url, 0, $m2[0][1]);
  1130. }
  1131. # If the link text is an image URL, replace it with an <img> tag
  1132. # This happened by accident in the original parser, but some people used it extensively
  1133. $img = $this->maybeMakeExternalImage( $text );
  1134. if ( $img !== false ) {
  1135. $text = $img;
  1136. }
  1137. $dtrail = '';
  1138. # Set linktype for CSS - if URL==text, link is essentially free
  1139. $linktype = ($text === $url) ? 'free' : 'text';
  1140. # No link text, e.g. [http://domain.tld/some.link]
  1141. if ( $text == '' ) {
  1142. # Autonumber if allowed. See bug #5918
  1143. if ( strpos( wfUrlProtocols(), substr($protocol, 0, strpos($protocol, ':')) ) !== false ) {
  1144. $langObj = $this->getFunctionLang();
  1145. $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
  1146. $linktype = 'autonumber';
  1147. } else {
  1148. # Otherwise just use the URL
  1149. $text = htmlspecialchars( $url );
  1150. $linktype = 'free';
  1151. }
  1152. } else {
  1153. # Have link text, e.g. [http://domain.tld/some.link text]s
  1154. # Check for trail
  1155. list( $dtrail, $trail ) = Linker::splitTrail( $trail );
  1156. }
  1157. $text = $wgContLang->markNoConversion($text);
  1158. $url = Sanitizer::cleanUrl( $url );
  1159. # Use the encoded URL
  1160. # This means that users can paste URLs directly into the text
  1161. # Funny characters like &ouml; aren't valid in URLs anyway
  1162. # This was changed in August 2004
  1163. $s .= $sk->makeExternalLink( $url, $text, false, $linktype,
  1164. $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
  1165. # Register link in the output object.
  1166. # Replace unnecessary URL escape codes with the referenced character
  1167. # This prevents spammers from hiding links from the filters
  1168. $pasteurized = self::replaceUnusualEscapes( $url );
  1169. $this->mOutput->addExternalLink( $pasteurized );
  1170. }
  1171. wfProfileOut( __METHOD__ );
  1172. return $s;
  1173. }
  1174. /**
  1175. * Get an associative array of additional HTML attributes appropriate for a
  1176. * particular external link. This currently may include rel => nofollow
  1177. * (depending on configuration, namespace, and the URL's domain) and/or a
  1178. * target attribute (depending on configuration).
  1179. *
  1180. * @param string $url Optional URL, to extract the domain from for rel =>
  1181. * nofollow if appropriate
  1182. * @return array Associative array of HTML attributes
  1183. */
  1184. function getExternalLinkAttribs( $url = false ) {
  1185. $attribs = array();
  1186. global $wgNoFollowLinks, $wgNoFollowNsExceptions;
  1187. $ns = $this->mTitle->getNamespace();
  1188. if( $wgNoFollowLinks && !in_array($ns, $wgNoFollowNsExceptions) ) {
  1189. $attribs['rel'] = 'nofollow';
  1190. global $wgNoFollowDomainExceptions;
  1191. if ( $wgNoFollowDomainExceptions ) {
  1192. $bits = wfParseUrl( $url );
  1193. if ( is_array( $bits ) && isset( $bits['host'] ) ) {
  1194. foreach ( $wgNoFollowDomainExceptions as $domain ) {
  1195. if( substr( $bits['host'], -strlen( $domain ) )
  1196. == $domain ) {
  1197. unset( $attribs['rel'] );
  1198. break;
  1199. }
  1200. }
  1201. }
  1202. }
  1203. }
  1204. if ( $this->mOptions->getExternalLinkTarget() ) {
  1205. $attribs['target'] = $this->mOptions->getExternalLinkTarget();
  1206. }
  1207. return $attribs;
  1208. }
  1209. /**
  1210. * Replace unusual URL escape codes with their equivalent characters
  1211. * @param string
  1212. * @return string
  1213. * @static
  1214. * @todo This can merge genuinely required bits in the path or query string,
  1215. * breaking legit URLs. A proper fix would treat the various parts of
  1216. * the URL differently; as a workaround, just use the output for
  1217. * statistical records, not for actual linking/output.
  1218. */
  1219. static function replaceUnusualEscapes( $url ) {
  1220. return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
  1221. array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
  1222. }
  1223. /**
  1224. * Callback function used in replaceUnusualEscapes().
  1225. * Replaces unusual URL escape codes with their equivalent character
  1226. * @static
  1227. * @private
  1228. */
  1229. private static function replaceUnusualEscapesCallback( $matches ) {
  1230. $char = urldecode( $matches[0] );
  1231. $ord = ord( $char );
  1232. // Is it an unsafe or HTTP reserved character according to RFC 1738?
  1233. if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
  1234. // No, shouldn't be escaped
  1235. return $char;
  1236. } else {
  1237. // Yes, leave it escaped
  1238. return $matches[0];
  1239. }
  1240. }
  1241. /**
  1242. * make an image if it's allowed, either through the global
  1243. * option, through the exception, or through the on-wiki whitelist
  1244. * @private
  1245. */
  1246. function maybeMakeExternalImage( $url ) {
  1247. $sk = $this->mOptions->getSkin();
  1248. $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
  1249. $imagesexception = !empty($imagesfrom);
  1250. $text = false;
  1251. # $imagesfrom could be either a single string or an array of strings, parse out the latter
  1252. if( $imagesexception && is_array( $imagesfrom ) ) {
  1253. $imagematch = false;
  1254. foreach( $imagesfrom as $match ) {
  1255. if( strpos( $url, $match ) === 0 ) {
  1256. $imagematch = true;
  1257. break;
  1258. }
  1259. }
  1260. } elseif( $imagesexception ) {
  1261. $imagematch = (strpos( $url, $imagesfrom ) === 0);
  1262. } else {
  1263. $imagematch = false;
  1264. }
  1265. if ( $this->mOptions->getAllowExternalImages()
  1266. || ( $imagesexception && $imagematch ) ) {
  1267. if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
  1268. # Image found
  1269. $text = $sk->makeExternalImage( $url );
  1270. }
  1271. }
  1272. if( !$text && $this->mOptions->getEnableImageWhitelist()
  1273. && preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
  1274. $whitelist = explode( "\n", wfMsgForContent( 'external_image_whitelist' ) );
  1275. foreach( $whitelist as $entry ) {
  1276. # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
  1277. if( strpos( $entry, '#' ) === 0 || $entry === '' )
  1278. continue;
  1279. if( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
  1280. # Image matches a whitelist entry
  1281. $text = $sk->makeExternalImage( $url );
  1282. break;
  1283. }
  1284. }
  1285. }
  1286. return $text;
  1287. }
  1288. /**
  1289. * Process [[ ]] wikilinks
  1290. * @return processed text
  1291. *
  1292. * @private
  1293. */
  1294. function replaceInternalLinks( $s ) {
  1295. $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
  1296. return $s;
  1297. }
  1298. /**
  1299. * Process [[ ]] wikilinks (RIL)
  1300. * @return LinkHolderArray
  1301. *
  1302. * @private
  1303. */
  1304. function replaceInternalLinks2( &$s ) {
  1305. global $wgContLang;
  1306. wfProfileIn( __METHOD__ );
  1307. wfProfileIn( __METHOD__.'-setup' );
  1308. static $tc = FALSE, $e1, $e1_img;
  1309. # the % is needed to support urlencoded titles as well
  1310. if ( !$tc ) {
  1311. $tc = Title::legalChars() . '#%';
  1312. # Match a link having the form [[namespace:link|alternate]]trail
  1313. $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
  1314. # Match cases where there is no "]]", which might still be images
  1315. $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
  1316. }
  1317. $sk = $this->mOptions->getSkin();
  1318. $holders = new LinkHolderArray( $this );
  1319. #split the entire text string on occurences of [[
  1320. $a = StringUtils::explode( '[[', ' ' . $s );
  1321. #get the first element (all text up to first [[), and remove the space we added
  1322. $s = $a->current();
  1323. $a->next();
  1324. $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
  1325. $s = substr( $s, 1 );
  1326. $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
  1327. $e2 = null;
  1328. if ( $useLinkPrefixExtension ) {
  1329. # Match the end of a line for a word that's not followed by whitespace,
  1330. # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
  1331. $e2 = wfMsgForContent( 'linkprefix' );
  1332. }
  1333. if( is_null( $this->mTitle ) ) {
  1334. wfProfileOut( __METHOD__.'-setup' );
  1335. wfProfileOut( __METHOD__ );
  1336. throw new MWException( __METHOD__.": \$this->mTitle is null\n" );
  1337. }
  1338. $nottalk = !$this->mTitle->isTalkPage();
  1339. if ( $useLinkPrefixExtension ) {
  1340. $m = array();
  1341. if ( preg_match( $e2, $s, $m ) ) {
  1342. $first_prefix = $m[2];
  1343. } else {
  1344. $first_prefix = false;
  1345. }
  1346. } else {
  1347. $prefix = '';
  1348. }
  1349. if($wgContLang->hasVariants()) {
  1350. $selflink = $wgContLang->convertLinkToAllVariants($this->mTitle->getPrefixedText());
  1351. } else {
  1352. $selflink = array($this->mTitle->getPrefixedText());
  1353. }
  1354. $useSubpages = $this->areSubpagesAllowed();
  1355. wfProfileOut( __METHOD__.'-setup' );
  1356. # Loop for each link
  1357. for ( ; $line !== false && $line !== null ; $a->next(), $line = $a->current() ) {
  1358. # Check for excessive memory usage
  1359. if ( $holders->isBig() ) {
  1360. # Too big
  1361. # Do the existence check, replace the link holders and clear the array
  1362. $holders->replace( $s );
  1363. $holders->clear();
  1364. }
  1365. if ( $useLinkPrefixExtension ) {
  1366. wfProfileIn( __METHOD__.'-prefixhandling' );
  1367. if ( preg_match( $e2, $s, $m ) ) {
  1368. $prefix = $m[2];
  1369. $s = $m[1];
  1370. } else {
  1371. $prefix='';
  1372. }
  1373. # first link
  1374. if($first_prefix) {
  1375. $prefix = $first_prefix;
  1376. $first_prefix = false;
  1377. }
  1378. wfProfileOut( __METHOD__.'-prefixhandling' );
  1379. }
  1380. $might_be_img = false;
  1381. wfProfileIn( __METHOD__."-e1" );
  1382. if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
  1383. $text = $m[2];
  1384. # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
  1385. # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
  1386. # the real problem is with the $e1 regex
  1387. # See bug 1300.
  1388. #
  1389. # Still some problems for cases where the ] is meant to be outside punctuation,
  1390. # and no image is in sight. See bug 2095.
  1391. #
  1392. if( $text !== '' &&
  1393. substr( $m[3], 0, 1 ) === ']' &&
  1394. strpos($text, '[') !== false
  1395. )
  1396. {
  1397. $text .= ']'; # so that replaceExternalLinks($text) works later
  1398. $m[3] = substr( $m[3], 1 );
  1399. }
  1400. # fix up urlencoded title texts
  1401. if( strpos( $m[1], '%' ) !== false ) {
  1402. # Should anchors '#' also be rejected?
  1403. $m[1] = str_replace( array('<', '>'), array('&lt;', '&gt;'), urldecode($m[1]) );
  1404. }
  1405. $trail = $m[3];
  1406. } elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption
  1407. $might_be_img = true;
  1408. $text = $m[2];
  1409. if ( strpos( $m[1], '%' ) !== false ) {
  1410. $m[1] = urldecode($m[1]);
  1411. }
  1412. $trail = "";
  1413. } else { # Invalid form; output directly
  1414. $s .= $prefix . '[[' . $line ;
  1415. wfProfileOut( __METHOD__."-e1" );
  1416. continue;
  1417. }
  1418. wfProfileOut( __METHOD__."-e1" );
  1419. wfProfileIn( __METHOD__."-misc" );
  1420. # Don't allow internal links to pages containing
  1421. # PROTO: where PROTO is a valid URL protocol; these
  1422. # should be external links.
  1423. if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) {
  1424. $s .= $prefix . '[[' . $line ;
  1425. wfProfileOut( __METHOD__."-misc" );
  1426. continue;
  1427. }
  1428. # Make subpage if necessary
  1429. if( $useSubpages ) {
  1430. $link = $this->maybeDoSubpageLink( $m[1], $text );
  1431. } else {
  1432. $link = $m[1];
  1433. }
  1434. $noforce = (substr($m[1], 0, 1) !== ':');
  1435. if (!$noforce) {
  1436. # Strip off leading ':'
  1437. $link = substr($link, 1);
  1438. }
  1439. wfProfileOut( __METHOD__."-misc" );
  1440. wfProfileIn( __METHOD__."-title" );
  1441. $nt = Title::newFromText( $this->mStripState->unstripNoWiki($link) );
  1442. if( $nt === NULL ) {
  1443. $s .= $prefix . '[[' . $line;
  1444. wfProfileOut( __METHOD__."-title" );
  1445. continue;
  1446. }
  1447. $ns = $nt->getNamespace();
  1448. $iw = $nt->getInterWiki();
  1449. wfProfileOut( __METHOD__."-title" );
  1450. if ($might_be_img) { # if this is actually an invalid link
  1451. wfProfileIn( __METHOD__."-might_be_img" );
  1452. if ($ns == NS_FILE && $noforce) { #but might be an image
  1453. $found = false;
  1454. while ( true ) {
  1455. #look at the next 'line' to see if we can close it there
  1456. $a->next();
  1457. $next_line = $a->current();
  1458. if ( $next_line === false || $next_line === null ) {
  1459. break;
  1460. }
  1461. $m = explode( ']]', $next_line, 3 );
  1462. if ( count( $m ) == 3 ) {
  1463. # the first ]] closes the inner link, the second the image
  1464. $found = true;
  1465. $text .= "[[{$m[0]}]]{$m[1]}";
  1466. $trail = $m[2];
  1467. break;
  1468. } elseif ( count( $m ) == 2 ) {
  1469. #if there's exactly one ]] that's fine, we'll keep looking
  1470. $text .= "[[{$m[0]}]]{$m[1]}";
  1471. } else {
  1472. #if $next_line is invalid too, we need look no further
  1473. $text .= '[[' . $next_line;
  1474. break;
  1475. }
  1476. }
  1477. if ( !$found ) {
  1478. # we couldn't find the end of this imageLink, so output it raw
  1479. #but don't ignore what might be perfectly normal links in the text we've examined
  1480. $holders->merge( $this->replaceInternalLinks2( $text ) );
  1481. $s .= "{$prefix}[[$link|$text";
  1482. # note: no $trail, because without an end, there *is* no trail
  1483. wfProfileOut( __METHOD__."-might_be_img" );
  1484. continue;
  1485. }
  1486. } else { #it's not an image, so output it raw
  1487. $s .= "{$prefix}[[$link|$text";
  1488. # note: no $trail, because without an end, there *is* no trail
  1489. wfProfileOut( __METHOD__."-might_be_img" );
  1490. continue;
  1491. }
  1492. wfProfileOut( __METHOD__."-might_be_img" );
  1493. }
  1494. $wasblank = ( '' == $text );
  1495. if( $wasblank ) $text = $link;
  1496. # Link not escaped by : , create the various objects
  1497. if( $noforce ) {
  1498. # Interwikis
  1499. wfProfileIn( __METHOD__."-interwiki" );
  1500. if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
  1501. $this->mOutput->addLanguageLink( $nt->getFullText() );
  1502. $s = rtrim($s . $prefix);
  1503. $s .= trim($trail, "\n") == '' ? '': $prefix . $trail;
  1504. wfProfileOut( __METHOD__."-interwiki" );
  1505. continue;
  1506. }
  1507. wfProfileOut( __METHOD__."-interwiki" );
  1508. if ( $ns == NS_FILE ) {
  1509. wfProfileIn( __METHOD__."-image" );
  1510. if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
  1511. # recursively parse links inside the image caption
  1512. # actually, this will parse them in any other parameters, too,
  1513. # but it might be hard to fix that, and it doesn't matter ATM
  1514. $text = $this->replaceExternalLinks($text);
  1515. $holders->merge( $this->replaceInternalLinks2( $text ) );
  1516. # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
  1517. $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text, $holders ) ) . $trail;
  1518. }
  1519. $this->mOutput->addImage( $nt->getDBkey() );
  1520. wfProfileOut( __METHOD__."-image" );
  1521. continue;
  1522. }
  1523. if ( $ns == NS_CATEGORY ) {
  1524. wfProfileIn( __METHOD__."-category" );
  1525. $s = rtrim($s . "\n"); # bug 87
  1526. if ( $wasblank ) {
  1527. $sortkey = $this->getDefaultSort();
  1528. } else {
  1529. $sortkey = $text;
  1530. }
  1531. $sortkey = Sanitizer::decodeCharReferences( $sortkey );
  1532. $sortkey = str_replace( "\n", '', $sortkey );
  1533. $sortkey = $wgContLang->convertCategoryKey( $sortkey );
  1534. $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
  1535. /**
  1536. * Strip the whitespace Category links produce, see bug 87
  1537. * @todo We might want to use trim($tmp, "\n") here.
  1538. */
  1539. $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail;
  1540. wfProfileOut( __METHOD__."-category" );
  1541. continue;
  1542. }
  1543. }
  1544. # Self-link checking
  1545. if( $nt->getFragment() === '' && $ns != NS_SPECIAL ) {
  1546. if( in_array( $nt->getPrefixedText(), $selflink, true ) ) {
  1547. $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
  1548. continue;
  1549. }
  1550. }
  1551. # NS_MEDIA is a pseudo-namespace for linking directly to a file
  1552. # FIXME: Should do batch file existence checks, see comment below
  1553. if( $ns == NS_MEDIA ) {
  1554. wfProfileIn( __METHOD__."-media" );
  1555. # Give extensions a chance to select the file revision for us
  1556. $skip = $time = false;
  1557. wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$nt, &$skip, &$time ) );
  1558. if ( $skip ) {
  1559. $link = $sk->link( $nt );
  1560. } else {
  1561. $link = $sk->makeMediaLinkObj( $nt, $text, $time );
  1562. }
  1563. # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
  1564. $s .= $prefix . $this->armorLinks( $link ) . $trail;
  1565. $this->mOutput->addImage( $nt->getDBkey() );
  1566. wfProfileOut( __METHOD__."-media" );
  1567. continue;
  1568. }
  1569. wfProfileIn( __METHOD__."-always_known" );
  1570. # Some titles, such as valid special pages or files in foreign repos, should
  1571. # be shown as bluelinks even though they're not included in the page table
  1572. #
  1573. # FIXME: isAlwaysKnown() can be expensive for file links; we should really do
  1574. # batch file existence checks for NS_FILE and NS_MEDIA
  1575. if( $iw == '' && $nt->isAlwaysKnown() ) {
  1576. $this->mOutput->addLink( $nt );
  1577. $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix );
  1578. } else {
  1579. # Links will be added to the output link list after checking
  1580. $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix );
  1581. }
  1582. wfProfileOut( __METHOD__."-always_known" );
  1583. }
  1584. wfProfileOut( __METHOD__ );
  1585. return $holders;
  1586. }
  1587. /**
  1588. * Make a link placeholder. The text returned can be later resolved to a real link with
  1589. * replaceLinkHolders(). This is done for two reasons: firstly to avoid further
  1590. * parsing of interwiki links, and secondly to allow all existence checks and
  1591. * article length checks (for stub links) to be bundled into a single query.
  1592. *
  1593. * @deprecated
  1594. */
  1595. function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
  1596. return $this->mLinkHolders->makeHolder( $nt, $text, $query, $trail, $prefix );
  1597. }
  1598. /**
  1599. * Render a forced-blue link inline; protect against double expansion of
  1600. * URLs if we're in a mode that prepends full URL prefixes to internal links.
  1601. * Since this little disaster has to split off the trail text to avoid
  1602. * breaking URLs in the following text without breaking trails on the
  1603. * wiki links, it's been made into a horrible function.
  1604. *
  1605. * @param Title $nt
  1606. * @param string $text
  1607. * @param string $query
  1608. * @param string $trail
  1609. * @param string $prefix
  1610. * @return string HTML-wikitext mix oh yuck
  1611. */
  1612. function makeKnownLinkHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
  1613. list( $inside, $trail ) = Linker::splitTrail( $trail );
  1614. $sk = $this->mOptions->getSkin();
  1615. $link = $sk->makeKnownLinkObj( $nt, $text, $query, $inside, $prefix );
  1616. return $this->armorLinks( $link ) . $trail;
  1617. }
  1618. /**
  1619. * Insert a NOPARSE hacky thing into any inline links in a chunk that's
  1620. * going to go through further parsing steps before inline URL expansion.
  1621. *
  1622. * Not needed quite as much as it used to be since free links are a bit
  1623. * more sensible these days. But bracketed links are still an issue.
  1624. *
  1625. * @param string more-or-less HTML
  1626. * @return string less-or-more HTML with NOPARSE bits
  1627. */
  1628. function armorLinks( $text ) {
  1629. return preg_replace( '/\b(' . wfUrlProtocols() . ')/',
  1630. "{$this->mUniqPrefix}NOPARSE$1", $text );
  1631. }
  1632. /**
  1633. * Return true if subpage links should be expanded on this page.
  1634. * @return bool
  1635. */
  1636. function areSubpagesAllowed() {
  1637. # Some namespaces don't allow subpages
  1638. return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
  1639. }
  1640. /**
  1641. * Handle link to subpage if necessary
  1642. * @param string $target the source of the link
  1643. * @param string &$text the link text, modified as necessary
  1644. * @return string the full name of the link
  1645. * @private
  1646. */
  1647. function maybeDoSubpageLink($target, &$text) {
  1648. # Valid link forms:
  1649. # Foobar -- normal
  1650. # :Foobar -- override special treatment of prefix (images, language links)
  1651. # /Foobar -- convert to CurrentPage/Foobar
  1652. # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
  1653. # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage
  1654. # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage
  1655. wfProfileIn( __METHOD__ );
  1656. $ret = $target; # default return value is no change
  1657. # Some namespaces don't allow subpages,
  1658. # so only perform processing if subpages are allowed
  1659. if( $this->areSubpagesAllowed() ) {
  1660. $hash = strpos( $target, '#' );
  1661. if( $hash !== false ) {
  1662. $suffix = substr( $target, $hash );
  1663. $target = substr( $target, 0, $hash );
  1664. } else {
  1665. $suffix = '';
  1666. }
  1667. # bug 7425
  1668. $target = trim( $target );
  1669. # Look at the first character
  1670. if( $target != '' && $target{0} === '/' ) {
  1671. # / at end means we don't want the slash to be shown
  1672. $m = array();
  1673. $trailingSlashes = preg_match_all( '%(/+)$%', $target, $m );
  1674. if( $trailingSlashes ) {
  1675. $noslash = $target = substr( $target, 1, -strlen($m[0][0]) );
  1676. } else {
  1677. $noslash = substr( $target, 1 );
  1678. }
  1679. $ret = $this->mTitle->getPrefixedText(). '/' . trim($noslash) . $suffix;
  1680. if( '' === $text ) {
  1681. $text = $target . $suffix;
  1682. } # this might be changed for ugliness reasons
  1683. } else {
  1684. # check for .. subpage backlinks
  1685. $dotdotcount = 0;
  1686. $nodotdot = $target;
  1687. while( strncmp( $nodotdot, "../", 3 ) == 0 ) {
  1688. ++$dotdotcount;
  1689. $nodotdot = substr( $nodotdot, 3 );
  1690. }
  1691. if($dotdotcount > 0) {
  1692. $exploded = explode( '/', $this->mTitle->GetPrefixedText() );
  1693. if( count( $exploded ) > $dotdotcount ) { # not allowed to go below top level page
  1694. $ret = implode( '/', array_slice( $exploded, 0, -$dotdotcount ) );
  1695. # / at the end means don't show full path
  1696. if( substr( $nodotdot, -1, 1 ) === '/' ) {
  1697. $nodotdot = substr( $nodotdot, 0, -1 );
  1698. if( '' === $text ) {
  1699. $text = $nodotdot . $suffix;
  1700. }
  1701. }
  1702. $nodotdot = trim( $nodotdot );
  1703. if( $nodotdot != '' ) {
  1704. $ret .= '/' . $nodotdot;
  1705. }
  1706. $ret .= $suffix;
  1707. }
  1708. }
  1709. }
  1710. }
  1711. wfProfileOut( __METHOD__ );
  1712. return $ret;
  1713. }
  1714. /**#@+
  1715. * Used by doBlockLevels()
  1716. * @private
  1717. */
  1718. /* private */ function closeParagraph() {
  1719. $result = '';
  1720. if ( '' != $this->mLastSection ) {
  1721. $result = '</' . $this->mLastSection . ">\n";
  1722. }
  1723. $this->mInPre = false;
  1724. $this->mLastSection = '';
  1725. return $result;
  1726. }
  1727. # getCommon() returns the length of the longest common substring
  1728. # of both arguments, starting at the beginning of both.
  1729. #
  1730. /* private */ function getCommon( $st1, $st2 ) {
  1731. $fl = strlen( $st1 );
  1732. $shorter = strlen( $st2 );
  1733. if ( $fl < $shorter ) { $shorter = $fl; }
  1734. for ( $i = 0; $i < $shorter; ++$i ) {
  1735. if ( $st1{$i} != $st2{$i} ) { break; }
  1736. }
  1737. return $i;
  1738. }
  1739. # These next three functions open, continue, and close the list
  1740. # element appropriate to the prefix character passed into them.
  1741. #
  1742. /* private */ function openList( $char ) {
  1743. $result = $this->closeParagraph();
  1744. if ( '*' === $char ) { $result .= '<ul><li>'; }
  1745. else if ( '#' === $char ) { $result .= '<ol><li>'; }
  1746. else if ( ':' === $char ) { $result .= '<dl><dd>'; }
  1747. else if ( ';' === $char ) {
  1748. $result .= '<dl><dt>';
  1749. $this->mDTopen = true;
  1750. }
  1751. else { $result = '<!-- ERR 1 -->'; }
  1752. return $result;
  1753. }
  1754. /* private */ function nextItem( $char ) {
  1755. if ( '*' === $char || '#' === $char ) { return '</li><li>'; }
  1756. else if ( ':' === $char || ';' === $char ) {
  1757. $close = '</dd>';
  1758. if ( $this->mDTopen ) { $close = '</dt>'; }
  1759. if ( ';' === $char ) {
  1760. $this->mDTopen = true;
  1761. return $close . '<dt>';
  1762. } else {
  1763. $this->mDTopen = false;
  1764. return $close . '<dd>';
  1765. }
  1766. }
  1767. return '<!-- ERR 2 -->';
  1768. }
  1769. /* private */ function closeList( $char ) {
  1770. if ( '*' === $char ) { $text = '</li></ul>'; }
  1771. else if ( '#' === $char ) { $text = '</li></ol>'; }
  1772. elseif ( ':' === $char || ';' === $char ) {
  1773. if ( $this->mDTopen ) {
  1774. $this->mDTopen = false;
  1775. $text = '</dt></dl>';
  1776. } else {
  1777. $text = '</dd></dl>';
  1778. }
  1779. }
  1780. else { return '<!-- ERR 3 -->'; }
  1781. return $text."\n";
  1782. }
  1783. /**#@-*/
  1784. /**
  1785. * Make lists from lines starting with ':', '*', '#', etc. (DBL)
  1786. *
  1787. * @private
  1788. * @return string the lists rendered as HTML
  1789. */
  1790. function doBlockLevels( $text, $linestart ) {
  1791. wfProfileIn( __METHOD__ );
  1792. # Parsing through the text line by line. The main thing
  1793. # happening here is handling of block-level elements p, pre,
  1794. # and making lists from lines starting with * # : etc.
  1795. #
  1796. $textLines = StringUtils::explode( "\n", $text );
  1797. $lastPrefix = $output = $lastPrefix2 = '';
  1798. $this->mDTopen = $inBlockElem = false;
  1799. $prefixLength = 0;
  1800. $paragraphStack = false;
  1801. foreach ( $textLines as $oLine ) {
  1802. # Fix up $linestart
  1803. if ( !$linestart ) {
  1804. $output .= $oLine;
  1805. $linestart = true;
  1806. continue;
  1807. }
  1808. $lastPrefixLength = strlen( $lastPrefix );
  1809. $lastPrefix2Length = strlen( $lastPrefix2 );
  1810. $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
  1811. $preOpenMatch = preg_match('/<pre/i', $oLine );
  1812. if ( !$this->mInPre ) {
  1813. # Multiple prefixes may abut each other for nested lists.
  1814. $prefixLength = strspn( $oLine, '*#:;' );
  1815. $prefix = substr( $oLine, 0, $prefixLength );
  1816. if ( substr( $prefix, -1 ) === ':' && substr( $prefix, 0, 1 ) === ';' ) {
  1817. --$prefixLength;
  1818. $prefix = substr( $oLine, 0, $prefixLength );
  1819. }
  1820. # eh?
  1821. $prefix2 = str_replace( ';', ':', $prefix );
  1822. $t = substr( $oLine, $prefixLength );
  1823. $this->mInPre = (bool)$preOpenMatch;
  1824. } else {
  1825. # Don't interpret any other prefixes in preformatted text
  1826. $prefixLength = 0;
  1827. $prefix = $prefix2 = '';
  1828. $t = $oLine;
  1829. }
  1830. # List generation
  1831. if( $prefixLength > 0 && $lastPrefix2 === $prefix2 ) {
  1832. # Same as the last item, so no need to deal with nesting or opening stuff
  1833. $output .= $this->nextItem( substr( $prefix, -1 ) );
  1834. $paragraphStack = false;
  1835. if ( substr( $prefix, -1 ) === ';') {
  1836. # The one nasty exception: definition lists work like this:
  1837. # ; title : definition text
  1838. # So we check for : in the remainder text to split up the
  1839. # title and definition, without b0rking links.
  1840. $term = $t2 = '';
  1841. if ($this->findColonNoLinks($t, $term, $t2) !== false) {
  1842. $t = $t2;
  1843. $output .= $term . $this->nextItem( ':' );
  1844. }
  1845. }
  1846. } elseif( $prefixLength > 0 || $lastPrefixLength > 0 ) {
  1847. # Either open or close a level...
  1848. $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
  1849. $paragraphStack = false;
  1850. while( $commonPrefixLength < $lastPrefixLength ) {
  1851. $output .= $this->closeList( $lastPrefix[$lastPrefixLength-1] );
  1852. --$lastPrefixLength;
  1853. }
  1854. if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
  1855. $output .= $this->nextItem( $prefix[$commonPrefixLength-1] );
  1856. }
  1857. while ( $prefixLength > $commonPrefixLength ) {
  1858. $char = substr( $prefix, $commonPrefixLength, 1 );
  1859. $output .= $this->openList( $char );
  1860. if ( ';' === $char ) {
  1861. # FIXME: This is dupe of code above
  1862. if ($this->findColonNoLinks($t, $term, $t2) !== false) {
  1863. $t = $t2;
  1864. $output .= $term . $this->nextItem( ':' );
  1865. }
  1866. }
  1867. ++$commonPrefixLength;
  1868. }
  1869. $lastPrefix = $prefix;
  1870. $lastPrefix2 = $prefix2;
  1871. }
  1872. if( 0 === $prefixLength ) {
  1873. wfProfileIn( __METHOD__."-paragraph" );
  1874. # No prefix (not in list)--go to paragraph mode
  1875. // XXX: use a stack for nestable elements like span, table and div
  1876. $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
  1877. $closematch = preg_match(
  1878. '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
  1879. '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
  1880. if ( $openmatch or $closematch ) {
  1881. $paragraphStack = false;
  1882. # TODO bug 5718: paragraph closed
  1883. $output .= $this->closeParagraph();
  1884. if ( $preOpenMatch and !$preCloseMatch ) {
  1885. $this->mInPre = true;
  1886. }
  1887. if ( $closematch ) {
  1888. $inBlockElem = false;
  1889. } else {
  1890. $inBlockElem = true;
  1891. }
  1892. } else if ( !$inBlockElem && !$this->mInPre ) {
  1893. if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) {
  1894. // pre
  1895. if ($this->mLastSection !== 'pre') {
  1896. $paragraphStack = false;
  1897. $output .= $this->closeParagraph().'<pre>';
  1898. $this->mLastSection = 'pre';
  1899. }
  1900. $t = substr( $t, 1 );
  1901. } else {
  1902. // paragraph
  1903. if ( '' == trim($t) ) {
  1904. if ( $paragraphStack ) {
  1905. $output .= $paragraphStack.'<br />';
  1906. $paragraphStack = false;
  1907. $this->mLastSection = 'p';
  1908. } else {
  1909. if ($this->mLastSection !== 'p' ) {
  1910. $output .= $this->closeParagraph();
  1911. $this->mLastSection = '';
  1912. $paragraphStack = '<p>';
  1913. } else {
  1914. $paragraphStack = '</p><p>';
  1915. }
  1916. }
  1917. } else {
  1918. if ( $paragraphStack ) {
  1919. $output .= $paragraphStack;
  1920. $paragraphStack = false;
  1921. $this->mLastSection = 'p';
  1922. } else if ($this->mLastSection !== 'p') {
  1923. $output .= $this->closeParagraph().'<p>';
  1924. $this->mLastSection = 'p';
  1925. }
  1926. }
  1927. }
  1928. }
  1929. wfProfileOut( __METHOD__."-paragraph" );
  1930. }
  1931. // somewhere above we forget to get out of pre block (bug 785)
  1932. if($preCloseMatch && $this->mInPre) {
  1933. $this->mInPre = false;
  1934. }
  1935. if ($paragraphStack === false) {
  1936. $output .= $t."\n";
  1937. }
  1938. }
  1939. while ( $prefixLength ) {
  1940. $output .= $this->closeList( $prefix2[$prefixLength-1] );
  1941. --$prefixLength;
  1942. }
  1943. if ( '' != $this->mLastSection ) {
  1944. $output .= '</' . $this->mLastSection . '>';
  1945. $this->mLastSection = '';
  1946. }
  1947. wfProfileOut( __METHOD__ );
  1948. return $output;
  1949. }
  1950. /**
  1951. * Split up a string on ':', ignoring any occurences inside tags
  1952. * to prevent illegal overlapping.
  1953. * @param string $str the string to split
  1954. * @param string &$before set to everything before the ':'
  1955. * @param string &$after set to everything after the ':'
  1956. * return string the position of the ':', or false if none found
  1957. */
  1958. function findColonNoLinks($str, &$before, &$after) {
  1959. wfProfileIn( __METHOD__ );
  1960. $pos = strpos( $str, ':' );
  1961. if( $pos === false ) {
  1962. // Nothing to find!
  1963. wfProfileOut( __METHOD__ );
  1964. return false;
  1965. }
  1966. $lt = strpos( $str, '<' );
  1967. if( $lt === false || $lt > $pos ) {
  1968. // Easy; no tag nesting to worry about
  1969. $before = substr( $str, 0, $pos );
  1970. $after = substr( $str, $pos+1 );
  1971. wfProfileOut( __METHOD__ );
  1972. return $pos;
  1973. }
  1974. // Ugly state machine to walk through avoiding tags.
  1975. $state = self::COLON_STATE_TEXT;
  1976. $stack = 0;
  1977. $len = strlen( $str );
  1978. for( $i = 0; $i < $len; $i++ ) {
  1979. $c = $str{$i};
  1980. switch( $state ) {
  1981. // (Using the number is a performance hack for common cases)
  1982. case 0: // self::COLON_STATE_TEXT:
  1983. switch( $c ) {
  1984. case "<":
  1985. // Could be either a <start> tag or an </end> tag
  1986. $state = self::COLON_STATE_TAGSTART;
  1987. break;
  1988. case ":":
  1989. if( $stack == 0 ) {
  1990. // We found it!
  1991. $before = substr( $str, 0, $i );
  1992. $after = substr( $str, $i + 1 );
  1993. wfProfileOut( __METHOD__ );
  1994. return $i;
  1995. }
  1996. // Embedded in a tag; don't break it.
  1997. break;
  1998. default:
  1999. // Skip ahead looking for something interesting
  2000. $colon = strpos( $str, ':', $i );
  2001. if( $colon === false ) {
  2002. // Nothing else interesting
  2003. wfProfileOut( __METHOD__ );
  2004. return false;
  2005. }
  2006. $lt = strpos( $str, '<', $i );
  2007. if( $stack === 0 ) {
  2008. if( $lt === false || $colon < $lt ) {
  2009. // We found it!
  2010. $before = substr( $str, 0, $colon );
  2011. $after = substr( $str, $colon + 1 );
  2012. wfProfileOut( __METHOD__ );
  2013. return $i;
  2014. }
  2015. }
  2016. if( $lt === false ) {
  2017. // Nothing else interesting to find; abort!
  2018. // We're nested, but there's no close tags left. Abort!
  2019. break 2;
  2020. }
  2021. // Skip ahead to next tag start
  2022. $i = $lt;
  2023. $state = self::COLON_STATE_TAGSTART;
  2024. }
  2025. break;
  2026. case 1: // self::COLON_STATE_TAG:
  2027. // In a <tag>
  2028. switch( $c ) {
  2029. case ">":
  2030. $stack++;
  2031. $state = self::COLON_STATE_TEXT;
  2032. break;
  2033. case "/":
  2034. // Slash may be followed by >?
  2035. $state = self::COLON_STATE_TAGSLASH;
  2036. break;
  2037. default:
  2038. // ignore
  2039. }
  2040. break;
  2041. case 2: // self::COLON_STATE_TAGSTART:
  2042. switch( $c ) {
  2043. case "/":
  2044. $state = self::COLON_STATE_CLOSETAG;
  2045. break;
  2046. case "!":
  2047. $state = self::COLON_STATE_COMMENT;
  2048. break;
  2049. case ">":
  2050. // Illegal early close? This shouldn't happen D:
  2051. $state = self::COLON_STATE_TEXT;
  2052. break;
  2053. default:
  2054. $state = self::COLON_STATE_TAG;
  2055. }
  2056. break;
  2057. case 3: // self::COLON_STATE_CLOSETAG:
  2058. // In a </tag>
  2059. if( $c === ">" ) {
  2060. $stack--;
  2061. if( $stack < 0 ) {
  2062. wfDebug( __METHOD__.": Invalid input; too many close tags\n" );
  2063. wfProfileOut( __METHOD__ );
  2064. return false;
  2065. }
  2066. $state = self::COLON_STATE_TEXT;
  2067. }
  2068. break;
  2069. case self::COLON_STATE_TAGSLASH:
  2070. if( $c === ">" ) {
  2071. // Yes, a self-closed tag <blah/>
  2072. $state = self::COLON_STATE_TEXT;
  2073. } else {
  2074. // Probably we're jumping the gun, and this is an attribute
  2075. $state = self::COLON_STATE_TAG;
  2076. }
  2077. break;
  2078. case 5: // self::COLON_STATE_COMMENT:
  2079. if( $c === "-" ) {
  2080. $state = self::COLON_STATE_COMMENTDASH;
  2081. }
  2082. break;
  2083. case self::COLON_STATE_COMMENTDASH:
  2084. if( $c === "-" ) {
  2085. $state = self::COLON_STATE_COMMENTDASHDASH;
  2086. } else {
  2087. $state = self::COLON_STATE_COMMENT;
  2088. }
  2089. break;
  2090. case self::COLON_STATE_COMMENTDASHDASH:
  2091. if( $c === ">" ) {
  2092. $state = self::COLON_STATE_TEXT;
  2093. } else {
  2094. $state = self::COLON_STATE_COMMENT;
  2095. }
  2096. break;
  2097. default:
  2098. throw new MWException( "State machine error in " . __METHOD__ );
  2099. }
  2100. }
  2101. if( $stack > 0 ) {
  2102. wfDebug( __METHOD__.": Invalid input; not enough close tags (stack $stack, state $state)\n" );
  2103. return false;
  2104. }
  2105. wfProfileOut( __METHOD__ );
  2106. return false;
  2107. }
  2108. /**
  2109. * Return value of a magic variable (like PAGENAME)
  2110. *
  2111. * @private
  2112. */
  2113. function getVariableValue( $index ) {
  2114. global $wgContLang, $wgSitename, $wgServer, $wgServerName, $wgScriptPath;
  2115. /**
  2116. * Some of these require message or data lookups and can be
  2117. * expensive to check many times.
  2118. */
  2119. if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) {
  2120. if ( isset( $this->mVarCache[$index] ) ) {
  2121. return $this->mVarCache[$index];
  2122. }
  2123. }
  2124. $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
  2125. wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) );
  2126. # Use the time zone
  2127. global $wgLocaltimezone;
  2128. if ( isset( $wgLocaltimezone ) ) {
  2129. $oldtz = getenv( 'TZ' );
  2130. putenv( 'TZ='.$wgLocaltimezone );
  2131. }
  2132. wfSuppressWarnings(); // E_STRICT system time bitching
  2133. $localTimestamp = date( 'YmdHis', $ts );
  2134. $localMonth = date( 'm', $ts );
  2135. $localMonthName = date( 'n', $ts );
  2136. $localDay = date( 'j', $ts );
  2137. $localDay2 = date( 'd', $ts );
  2138. $localDayOfWeek = date( 'w', $ts );
  2139. $localWeek = date( 'W', $ts );
  2140. $localYear = date( 'Y', $ts );
  2141. $localHour = date( 'H', $ts );
  2142. if ( isset( $wgLocaltimezone ) ) {
  2143. putenv( 'TZ='.$oldtz );
  2144. }
  2145. wfRestoreWarnings();
  2146. switch ( $index ) {
  2147. case 'currentmonth':
  2148. return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'm', $ts ) );
  2149. case 'currentmonthname':
  2150. return $this->mVarCache[$index] = $wgContLang->getMonthName( gmdate( 'n', $ts ) );
  2151. case 'currentmonthnamegen':
  2152. return $this->mVarCache[$index] = $wgContLang->getMonthNameGen( gmdate( 'n', $ts ) );
  2153. case 'currentmonthabbrev':
  2154. return $this->mVarCache[$index] = $wgContLang->getMonthAbbreviation( gmdate( 'n', $ts ) );
  2155. case 'currentday':
  2156. return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'j', $ts ) );
  2157. case 'currentday2':
  2158. return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'd', $ts ) );
  2159. case 'localmonth':
  2160. return $this->mVarCache[$index] = $wgContLang->formatNum( $localMonth );
  2161. case 'localmonthname':
  2162. return $this->mVarCache[$index] = $wgContLang->getMonthName( $localMonthName );
  2163. case 'localmonthnamegen':
  2164. return $this->mVarCache[$index] = $wgContLang->getMonthNameGen( $localMonthName );
  2165. case 'localmonthabbrev':
  2166. return $this->mVarCache[$index] = $wgContLang->getMonthAbbreviation( $localMonthName );
  2167. case 'localday':
  2168. return $this->mVarCache[$index] = $wgContLang->formatNum( $localDay );
  2169. case 'localday2':
  2170. return $this->mVarCache[$index] = $wgContLang->formatNum( $localDay2 );
  2171. case 'pagename':
  2172. return wfEscapeWikiText( $this->mTitle->getText() );
  2173. case 'pagenamee':
  2174. return $this->mTitle->getPartialURL();
  2175. case 'fullpagename':
  2176. return wfEscapeWikiText( $this->mTitle->getPrefixedText() );
  2177. case 'fullpagenamee':
  2178. return $this->mTitle->getPrefixedURL();
  2179. case 'subpagename':
  2180. return wfEscapeWikiText( $this->mTitle->getSubpageText() );
  2181. case 'subpagenamee':
  2182. return $this->mTitle->getSubpageUrlForm();
  2183. case 'basepagename':
  2184. return wfEscapeWikiText( $this->mTitle->getBaseText() );
  2185. case 'basepagenamee':
  2186. return wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) );
  2187. case 'talkpagename':
  2188. if( $this->mTitle->canTalk() ) {
  2189. $talkPage = $this->mTitle->getTalkPage();
  2190. return wfEscapeWikiText( $talkPage->getPrefixedText() );
  2191. } else {
  2192. return '';
  2193. }
  2194. case 'talkpagenamee':
  2195. if( $this->mTitle->canTalk() ) {
  2196. $talkPage = $this->mTitle->getTalkPage();
  2197. return $talkPage->getPrefixedUrl();
  2198. } else {
  2199. return '';
  2200. }
  2201. case 'subjectpagename':
  2202. $subjPage = $this->mTitle->getSubjectPage();
  2203. return wfEscapeWikiText( $subjPage->getPrefixedText() );
  2204. case 'subjectpagenamee':
  2205. $subjPage = $this->mTitle->getSubjectPage();
  2206. return $subjPage->getPrefixedUrl();
  2207. case 'revisionid':
  2208. // Let the edit saving system know we should parse the page
  2209. // *after* a revision ID has been assigned.
  2210. $this->mOutput->setFlag( 'vary-revision' );
  2211. wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" );
  2212. return $this->mRevisionId;
  2213. case 'revisionday':
  2214. // Let the edit saving system know we should parse the page
  2215. // *after* a revision ID has been assigned. This is for null edits.
  2216. $this->mOutput->setFlag( 'vary-revision' );
  2217. wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
  2218. return intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
  2219. case 'revisionday2':
  2220. // Let the edit saving system know we should parse the page
  2221. // *after* a revision ID has been assigned. This is for null edits.
  2222. $this->mOutput->setFlag( 'vary-revision' );
  2223. wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
  2224. return substr( $this->getRevisionTimestamp(), 6, 2 );
  2225. case 'revisionmonth':
  2226. // Let the edit saving system know we should parse the page
  2227. // *after* a revision ID has been assigned. This is for null edits.
  2228. $this->mOutput->setFlag( 'vary-revision' );
  2229. wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
  2230. return intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
  2231. case 'revisionyear':
  2232. // Let the edit saving system know we should parse the page
  2233. // *after* a revision ID has been assigned. This is for null edits.
  2234. $this->mOutput->setFlag( 'vary-revision' );
  2235. wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
  2236. return substr( $this->getRevisionTimestamp(), 0, 4 );
  2237. case 'revisiontimestamp':
  2238. // Let the edit saving system know we should parse the page
  2239. // *after* a revision ID has been assigned. This is for null edits.
  2240. $this->mOutput->setFlag( 'vary-revision' );
  2241. wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
  2242. return $this->getRevisionTimestamp();
  2243. case 'revisionuser':
  2244. // Let the edit saving system know we should parse the page
  2245. // *after* a revision ID has been assigned. This is for null edits.
  2246. $this->mOutput->setFlag( 'vary-revision' );
  2247. wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
  2248. return $this->getRevisionUser();
  2249. case 'namespace':
  2250. return str_replace('_',' ',$wgContLang->getNsText( $this->mTitle->getNamespace() ) );
  2251. case 'namespacee':
  2252. return wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
  2253. case 'talkspace':
  2254. return $this->mTitle->canTalk() ? str_replace('_',' ',$this->mTitle->getTalkNsText()) : '';
  2255. case 'talkspacee':
  2256. return $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
  2257. case 'subjectspace':
  2258. return $this->mTitle->getSubjectNsText();
  2259. case 'subjectspacee':
  2260. return( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
  2261. case 'currentdayname':
  2262. return $this->mVarCache[$index] = $wgContLang->getWeekdayName( gmdate( 'w', $ts ) + 1 );
  2263. case 'currentyear':
  2264. return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'Y', $ts ), true );
  2265. case 'currenttime':
  2266. return $this->mVarCache[$index] = $wgContLang->time( wfTimestamp( TS_MW, $ts ), false, false );
  2267. case 'currenthour':
  2268. return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'H', $ts ), true );
  2269. case 'currentweek':
  2270. // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
  2271. // int to remove the padding
  2272. return $this->mVarCache[$index] = $wgContLang->formatNum( (int)gmdate( 'W', $ts ) );
  2273. case 'currentdow':
  2274. return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'w', $ts ) );
  2275. case 'localdayname':
  2276. return $this->mVarCache[$index] = $wgContLang->getWeekdayName( $localDayOfWeek + 1 );
  2277. case 'localyear':
  2278. return $this->mVarCache[$index] = $wgContLang->formatNum( $localYear, true );
  2279. case 'localtime':
  2280. return $this->mVarCache[$index] = $wgContLang->time( $localTimestamp, false, false );
  2281. case 'localhour':
  2282. return $this->mVarCache[$index] = $wgContLang->formatNum( $localHour, true );
  2283. case 'localweek':
  2284. // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
  2285. // int to remove the padding
  2286. return $this->mVarCache[$index] = $wgContLang->formatNum( (int)$localWeek );
  2287. case 'localdow':
  2288. return $this->mVarCache[$index] = $wgContLang->formatNum( $localDayOfWeek );
  2289. case 'numberofarticles':
  2290. return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::articles() );
  2291. case 'numberoffiles':
  2292. return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::images() );
  2293. case 'numberofusers':
  2294. return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::users() );
  2295. case 'numberofactiveusers':
  2296. return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::activeUsers() );
  2297. case 'numberofpages':
  2298. return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::pages() );
  2299. case 'numberofadmins':
  2300. return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::numberingroup('sysop') );
  2301. case 'numberofedits':
  2302. return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::edits() );
  2303. case 'numberofviews':
  2304. return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::views() );
  2305. case 'currenttimestamp':
  2306. return $this->mVarCache[$index] = wfTimestamp( TS_MW, $ts );
  2307. case 'localtimestamp':
  2308. return $this->mVarCache[$index] = $localTimestamp;
  2309. case 'currentversion':
  2310. return $this->mVarCache[$index] = SpecialVersion::getVersion();
  2311. case 'sitename':
  2312. return $wgSitename;
  2313. case 'server':
  2314. return $wgServer;
  2315. case 'servername':
  2316. return $wgServerName;
  2317. case 'scriptpath':
  2318. return $wgScriptPath;
  2319. case 'directionmark':
  2320. return $wgContLang->getDirMark();
  2321. case 'contentlanguage':
  2322. global $wgContLanguageCode;
  2323. return $wgContLanguageCode;
  2324. default:
  2325. $ret = null;
  2326. if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$this->mVarCache, &$index, &$ret ) ) )
  2327. return $ret;
  2328. else
  2329. return null;
  2330. }
  2331. }
  2332. /**
  2333. * initialise the magic variables (like CURRENTMONTHNAME)
  2334. *
  2335. * @private
  2336. */
  2337. function initialiseVariables() {
  2338. wfProfileIn( __METHOD__ );
  2339. $variableIDs = MagicWord::getVariableIDs();
  2340. $this->mVariables = new MagicWordArray( $variableIDs );
  2341. wfProfileOut( __METHOD__ );
  2342. }
  2343. /**
  2344. * Preprocess some wikitext and return the document tree.
  2345. * This is the ghost of replace_variables().
  2346. *
  2347. * @param string $text The text to parse
  2348. * @param integer flags Bitwise combination of:
  2349. * self::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being
  2350. * included. Default is to assume a direct page view.
  2351. *
  2352. * The generated DOM tree must depend only on the input text and the flags.
  2353. * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
  2354. *
  2355. * Any flag added to the $flags parameter here, or any other parameter liable to cause a
  2356. * change in the DOM tree for a given text, must be passed through the section identifier
  2357. * in the section edit link and thus back to extractSections().
  2358. *
  2359. * The output of this function is currently only cached in process memory, but a persistent
  2360. * cache may be implemented at a later date which takes further advantage of these strict
  2361. * dependency requirements.
  2362. *
  2363. * @private
  2364. */
  2365. function preprocessToDom ( $text, $flags = 0 ) {
  2366. $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
  2367. return $dom;
  2368. }
  2369. /*
  2370. * Return a three-element array: leading whitespace, string contents, trailing whitespace
  2371. */
  2372. public static function splitWhitespace( $s ) {
  2373. $ltrimmed = ltrim( $s );
  2374. $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
  2375. $trimmed = rtrim( $ltrimmed );
  2376. $diff = strlen( $ltrimmed ) - strlen( $trimmed );
  2377. if ( $diff > 0 ) {
  2378. $w2 = substr( $ltrimmed, -$diff );
  2379. } else {
  2380. $w2 = '';
  2381. }
  2382. return array( $w1, $trimmed, $w2 );
  2383. }
  2384. /**
  2385. * Replace magic variables, templates, and template arguments
  2386. * with the appropriate text. Templates are substituted recursively,
  2387. * taking care to avoid infinite loops.
  2388. *
  2389. * Note that the substitution depends on value of $mOutputType:
  2390. * self::OT_WIKI: only {{subst:}} templates
  2391. * self::OT_PREPROCESS: templates but not extension tags
  2392. * self::OT_HTML: all templates and extension tags
  2393. *
  2394. * @param string $tex The text to transform
  2395. * @param PPFrame $frame Object describing the arguments passed to the template.
  2396. * Arguments may also be provided as an associative array, as was the usual case before MW1.12.
  2397. * Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly.
  2398. * @param bool $argsOnly Only do argument (triple-brace) expansion, not double-brace expansion
  2399. * @private
  2400. */
  2401. function replaceVariables( $text, $frame = false, $argsOnly = false ) {
  2402. # Is there any text? Also, Prevent too big inclusions!
  2403. if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
  2404. return $text;
  2405. }
  2406. wfProfileIn( __METHOD__ );
  2407. if ( $frame === false ) {
  2408. $frame = $this->getPreprocessor()->newFrame();
  2409. } elseif ( !( $frame instanceof PPFrame ) ) {
  2410. wfDebug( __METHOD__." called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" );
  2411. $frame = $this->getPreprocessor()->newCustomFrame($frame);
  2412. }
  2413. $dom = $this->preprocessToDom( $text );
  2414. $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
  2415. $text = $frame->expand( $dom, $flags );
  2416. wfProfileOut( __METHOD__ );
  2417. return $text;
  2418. }
  2419. /// Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
  2420. static function createAssocArgs( $args ) {
  2421. $assocArgs = array();
  2422. $index = 1;
  2423. foreach( $args as $arg ) {
  2424. $eqpos = strpos( $arg, '=' );
  2425. if ( $eqpos === false ) {
  2426. $assocArgs[$index++] = $arg;
  2427. } else {
  2428. $name = trim( substr( $arg, 0, $eqpos ) );
  2429. $value = trim( substr( $arg, $eqpos+1 ) );
  2430. if ( $value === false ) {
  2431. $value = '';
  2432. }
  2433. if ( $name !== false ) {
  2434. $assocArgs[$name] = $value;
  2435. }
  2436. }
  2437. }
  2438. return $assocArgs;
  2439. }
  2440. /**
  2441. * Warn the user when a parser limitation is reached
  2442. * Will warn at most once the user per limitation type
  2443. *
  2444. * @param string $limitationType, should be one of:
  2445. * 'expensive-parserfunction' (corresponding messages: 'expensive-parserfunction-warning', 'expensive-parserfunction-category')
  2446. * 'post-expand-template-argument' (corresponding messages: 'post-expand-template-argument-warning', 'post-expand-template-argument-category')
  2447. * 'post-expand-template-inclusion' (corresponding messages: 'post-expand-template-inclusion-warning', 'post-expand-template-inclusion-category')
  2448. * @params int $current, $max When an explicit limit has been
  2449. * exceeded, provide the values (optional)
  2450. */
  2451. function limitationWarn( $limitationType, $current=null, $max=null) {
  2452. $msgName = $limitationType . '-warning';
  2453. //does no harm if $current and $max are present but are unnecessary for the message
  2454. $warning = wfMsgExt( $msgName, array( 'parsemag', 'escape' ), $current, $max );
  2455. $this->mOutput->addWarning( $warning );
  2456. $cat = Title::makeTitleSafe( NS_CATEGORY, wfMsgForContent( $limitationType . '-category' ) );
  2457. if ( $cat ) {
  2458. $this->mOutput->addCategory( $cat->getDBkey(), $this->getDefaultSort() );
  2459. }
  2460. }
  2461. /**
  2462. * Return the text of a template, after recursively
  2463. * replacing any variables or templates within the template.
  2464. *
  2465. * @param array $piece The parts of the template
  2466. * $piece['title']: the title, i.e. the part before the |
  2467. * $piece['parts']: the parameter array
  2468. * $piece['lineStart']: whether the brace was at the start of a line
  2469. * @param PPFrame The current frame, contains template arguments
  2470. * @return string the text of the template
  2471. * @private
  2472. */
  2473. function braceSubstitution( $piece, $frame ) {
  2474. global $wgContLang, $wgNonincludableNamespaces;
  2475. wfProfileIn( __METHOD__ );
  2476. wfProfileIn( __METHOD__.'-setup' );
  2477. # Flags
  2478. $found = false; # $text has been filled
  2479. $nowiki = false; # wiki markup in $text should be escaped
  2480. $isHTML = false; # $text is HTML, armour it against wikitext transformation
  2481. $forceRawInterwiki = false; # Force interwiki transclusion to be done in raw mode not rendered
  2482. $isChildObj = false; # $text is a DOM node needing expansion in a child frame
  2483. $isLocalObj = false; # $text is a DOM node needing expansion in the current frame
  2484. # Title object, where $text came from
  2485. $title = NULL;
  2486. # $part1 is the bit before the first |, and must contain only title characters.
  2487. # Various prefixes will be stripped from it later.
  2488. $titleWithSpaces = $frame->expand( $piece['title'] );
  2489. $part1 = trim( $titleWithSpaces );
  2490. $titleText = false;
  2491. # Original title text preserved for various purposes
  2492. $originalTitle = $part1;
  2493. # $args is a list of argument nodes, starting from index 0, not including $part1
  2494. $args = (null == $piece['parts']) ? array() : $piece['parts'];
  2495. wfProfileOut( __METHOD__.'-setup' );
  2496. # SUBST
  2497. wfProfileIn( __METHOD__.'-modifiers' );
  2498. if ( !$found ) {
  2499. $mwSubst = MagicWord::get( 'subst' );
  2500. if ( $mwSubst->matchStartAndRemove( $part1 ) xor $this->ot['wiki'] ) {
  2501. # One of two possibilities is true:
  2502. # 1) Found SUBST but not in the PST phase
  2503. # 2) Didn't find SUBST and in the PST phase
  2504. # In either case, return without further processing
  2505. $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
  2506. $isLocalObj = true;
  2507. $found = true;
  2508. }
  2509. }
  2510. # Variables
  2511. if ( !$found && $args->getLength() == 0 ) {
  2512. $id = $this->mVariables->matchStartToEnd( $part1 );
  2513. if ( $id !== false ) {
  2514. $text = $this->getVariableValue( $id );
  2515. if (MagicWord::getCacheTTL($id)>-1)
  2516. $this->mOutput->mContainsOldMagic = true;
  2517. $found = true;
  2518. }
  2519. }
  2520. # MSG, MSGNW and RAW
  2521. if ( !$found ) {
  2522. # Check for MSGNW:
  2523. $mwMsgnw = MagicWord::get( 'msgnw' );
  2524. if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
  2525. $nowiki = true;
  2526. } else {
  2527. # Remove obsolete MSG:
  2528. $mwMsg = MagicWord::get( 'msg' );
  2529. $mwMsg->matchStartAndRemove( $part1 );
  2530. }
  2531. # Check for RAW:
  2532. $mwRaw = MagicWord::get( 'raw' );
  2533. if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
  2534. $forceRawInterwiki = true;
  2535. }
  2536. }
  2537. wfProfileOut( __METHOD__.'-modifiers' );
  2538. # Parser functions
  2539. if ( !$found ) {
  2540. wfProfileIn( __METHOD__ . '-pfunc' );
  2541. $colonPos = strpos( $part1, ':' );
  2542. if ( $colonPos !== false ) {
  2543. # Case sensitive functions
  2544. $function = substr( $part1, 0, $colonPos );
  2545. if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
  2546. $function = $this->mFunctionSynonyms[1][$function];
  2547. } else {
  2548. # Case insensitive functions
  2549. $function = strtolower( $function );
  2550. if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
  2551. $function = $this->mFunctionSynonyms[0][$function];
  2552. } else {
  2553. $function = false;
  2554. }
  2555. }
  2556. if ( $function ) {
  2557. list( $callback, $flags ) = $this->mFunctionHooks[$function];
  2558. $initialArgs = array( &$this );
  2559. $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) );
  2560. if ( $flags & SFH_OBJECT_ARGS ) {
  2561. # Add a frame parameter, and pass the arguments as an array
  2562. $allArgs = $initialArgs;
  2563. $allArgs[] = $frame;
  2564. for ( $i = 0; $i < $args->getLength(); $i++ ) {
  2565. $funcArgs[] = $args->item( $i );
  2566. }
  2567. $allArgs[] = $funcArgs;
  2568. } else {
  2569. # Convert arguments to plain text
  2570. for ( $i = 0; $i < $args->getLength(); $i++ ) {
  2571. $funcArgs[] = trim( $frame->expand( $args->item( $i ) ) );
  2572. }
  2573. $allArgs = array_merge( $initialArgs, $funcArgs );
  2574. }
  2575. # Workaround for PHP bug 35229 and similar
  2576. if ( !is_callable( $callback ) ) {
  2577. throw new MWException( "Tag hook for $function is not callable\n" );
  2578. }
  2579. $result = call_user_func_array( $callback, $allArgs );
  2580. $found = true;
  2581. $noparse = true;
  2582. $preprocessFlags = 0;
  2583. if ( is_array( $result ) ) {
  2584. if ( isset( $result[0] ) ) {
  2585. $text = $result[0];
  2586. unset( $result[0] );
  2587. }
  2588. // Extract flags into the local scope
  2589. // This allows callers to set flags such as nowiki, found, etc.
  2590. extract( $result );
  2591. } else {
  2592. $text = $result;
  2593. }
  2594. if ( !$noparse ) {
  2595. $text = $this->preprocessToDom( $text, $preprocessFlags );
  2596. $isChildObj = true;
  2597. }
  2598. }
  2599. }
  2600. wfProfileOut( __METHOD__ . '-pfunc' );
  2601. }
  2602. # Finish mangling title and then check for loops.
  2603. # Set $title to a Title object and $titleText to the PDBK
  2604. if ( !$found ) {
  2605. $ns = NS_TEMPLATE;
  2606. # Split the title into page and subpage
  2607. $subpage = '';
  2608. $part1 = $this->maybeDoSubpageLink( $part1, $subpage );
  2609. if ($subpage !== '') {
  2610. $ns = $this->mTitle->getNamespace();
  2611. }
  2612. $title = Title::newFromText( $part1, $ns );
  2613. if ( $title ) {
  2614. $titleText = $title->getPrefixedText();
  2615. # Check for language variants if the template is not found
  2616. if($wgContLang->hasVariants() && $title->getArticleID() == 0){
  2617. $wgContLang->findVariantLink( $part1, $title, true );
  2618. }
  2619. # Do recursion depth check
  2620. $limit = $this->mOptions->getMaxTemplateDepth();
  2621. if ( $frame->depth >= $limit ) {
  2622. $found = true;
  2623. $text = '<span class="error">' . wfMsgForContent( 'parser-template-recursion-depth-warning', $limit ) . '</span>';
  2624. }
  2625. }
  2626. }
  2627. # Load from database
  2628. if ( !$found && $title ) {
  2629. wfProfileIn( __METHOD__ . '-loadtpl' );
  2630. if ( !$title->isExternal() ) {
  2631. if ( $title->getNamespace() == NS_SPECIAL && $this->mOptions->getAllowSpecialInclusion() && $this->ot['html'] ) {
  2632. $text = SpecialPage::capturePath( $title );
  2633. if ( is_string( $text ) ) {
  2634. $found = true;
  2635. $isHTML = true;
  2636. $this->disableCache();
  2637. }
  2638. } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) {
  2639. $found = false; //access denied
  2640. wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() );
  2641. } else {
  2642. list( $text, $title ) = $this->getTemplateDom( $title );
  2643. if ( $text !== false ) {
  2644. $found = true;
  2645. $isChildObj = true;
  2646. }
  2647. }
  2648. # If the title is valid but undisplayable, make a link to it
  2649. if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
  2650. $text = "[[:$titleText]]";
  2651. $found = true;
  2652. }
  2653. } elseif ( $title->isTrans() ) {
  2654. // Interwiki transclusion
  2655. if ( $this->ot['html'] && !$forceRawInterwiki ) {
  2656. $text = $this->interwikiTransclude( $title, 'render' );
  2657. $isHTML = true;
  2658. } else {
  2659. $text = $this->interwikiTransclude( $title, 'raw' );
  2660. // Preprocess it like a template
  2661. $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
  2662. $isChildObj = true;
  2663. }
  2664. $found = true;
  2665. }
  2666. # Do infinite loop check
  2667. # This has to be done after redirect resolution to avoid infinite loops via redirects
  2668. if ( !$frame->loopCheck( $title ) ) {
  2669. $found = true;
  2670. $text = '<span class="error">' . wfMsgForContent( 'parser-template-loop-warning', $titleText ) . '</span>';
  2671. wfDebug( __METHOD__.": template loop broken at '$titleText'\n" );
  2672. }
  2673. wfProfileOut( __METHOD__ . '-loadtpl' );
  2674. }
  2675. # If we haven't found text to substitute by now, we're done
  2676. # Recover the source wikitext and return it
  2677. if ( !$found ) {
  2678. $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
  2679. wfProfileOut( __METHOD__ );
  2680. return array( 'object' => $text );
  2681. }
  2682. # Expand DOM-style return values in a child frame
  2683. if ( $isChildObj ) {
  2684. # Clean up argument array
  2685. $newFrame = $frame->newChild( $args, $title );
  2686. if ( $nowiki ) {
  2687. $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
  2688. } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
  2689. # Expansion is eligible for the empty-frame cache
  2690. if ( isset( $this->mTplExpandCache[$titleText] ) ) {
  2691. $text = $this->mTplExpandCache[$titleText];
  2692. } else {
  2693. $text = $newFrame->expand( $text );
  2694. $this->mTplExpandCache[$titleText] = $text;
  2695. }
  2696. } else {
  2697. # Uncached expansion
  2698. $text = $newFrame->expand( $text );
  2699. }
  2700. }
  2701. if ( $isLocalObj && $nowiki ) {
  2702. $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
  2703. $isLocalObj = false;
  2704. }
  2705. # Replace raw HTML by a placeholder
  2706. # Add a blank line preceding, to prevent it from mucking up
  2707. # immediately preceding headings
  2708. if ( $isHTML ) {
  2709. $text = "\n\n" . $this->insertStripItem( $text );
  2710. }
  2711. # Escape nowiki-style return values
  2712. elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
  2713. $text = wfEscapeWikiText( $text );
  2714. }
  2715. # Bug 529: if the template begins with a table or block-level
  2716. # element, it should be treated as beginning a new line.
  2717. # This behaviour is somewhat controversial.
  2718. elseif ( is_string( $text ) && !$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{
  2719. $text = "\n" . $text;
  2720. }
  2721. if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
  2722. # Error, oversize inclusion
  2723. $text = "[[$originalTitle]]" .
  2724. $this->insertStripItem( '<!-- WARNING: template omitted, post-expand include size too large -->' );
  2725. $this->limitationWarn( 'post-expand-template-inclusion' );
  2726. }
  2727. if ( $isLocalObj ) {
  2728. $ret = array( 'object' => $text );
  2729. } else {
  2730. $ret = array( 'text' => $text );
  2731. }
  2732. wfProfileOut( __METHOD__ );
  2733. return $ret;
  2734. }
  2735. /**
  2736. * Get the semi-parsed DOM representation of a template with a given title,
  2737. * and its redirect destination title. Cached.
  2738. */
  2739. function getTemplateDom( $title ) {
  2740. $cacheTitle = $title;
  2741. $titleText = $title->getPrefixedDBkey();
  2742. if ( isset( $this->mTplRedirCache[$titleText] ) ) {
  2743. list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
  2744. $title = Title::makeTitle( $ns, $dbk );
  2745. $titleText = $title->getPrefixedDBkey();
  2746. }
  2747. if ( isset( $this->mTplDomCache[$titleText] ) ) {
  2748. return array( $this->mTplDomCache[$titleText], $title );
  2749. }
  2750. // Cache miss, go to the database
  2751. list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
  2752. if ( $text === false ) {
  2753. $this->mTplDomCache[$titleText] = false;
  2754. return array( false, $title );
  2755. }
  2756. $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
  2757. $this->mTplDomCache[ $titleText ] = $dom;
  2758. if (! $title->equals($cacheTitle)) {
  2759. $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
  2760. array( $title->getNamespace(),$cdb = $title->getDBkey() );
  2761. }
  2762. return array( $dom, $title );
  2763. }
  2764. /**
  2765. * Fetch the unparsed text of a template and register a reference to it.
  2766. */
  2767. function fetchTemplateAndTitle( $title ) {
  2768. $templateCb = $this->mOptions->getTemplateCallback();
  2769. $stuff = call_user_func( $templateCb, $title, $this );
  2770. $text = $stuff['text'];
  2771. $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
  2772. if ( isset( $stuff['deps'] ) ) {
  2773. foreach ( $stuff['deps'] as $dep ) {
  2774. $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
  2775. }
  2776. }
  2777. return array($text,$finalTitle);
  2778. }
  2779. function fetchTemplate( $title ) {
  2780. $rv = $this->fetchTemplateAndTitle($title);
  2781. return $rv[0];
  2782. }
  2783. /**
  2784. * Static function to get a template
  2785. * Can be overridden via ParserOptions::setTemplateCallback().
  2786. */
  2787. static function statelessFetchTemplate( $title, $parser=false ) {
  2788. $text = $skip = false;
  2789. $finalTitle = $title;
  2790. $deps = array();
  2791. // Loop to fetch the article, with up to 1 redirect
  2792. for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
  2793. # Give extensions a chance to select the revision instead
  2794. $id = false; // Assume current
  2795. wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) );
  2796. if( $skip ) {
  2797. $text = false;
  2798. $deps[] = array(
  2799. 'title' => $title,
  2800. 'page_id' => $title->getArticleID(),
  2801. 'rev_id' => null );
  2802. break;
  2803. }
  2804. $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title );
  2805. $rev_id = $rev ? $rev->getId() : 0;
  2806. // If there is no current revision, there is no page
  2807. if( $id === false && !$rev ) {
  2808. $linkCache = LinkCache::singleton();
  2809. $linkCache->addBadLinkObj( $title );
  2810. }
  2811. $deps[] = array(
  2812. 'title' => $title,
  2813. 'page_id' => $title->getArticleID(),
  2814. 'rev_id' => $rev_id );
  2815. if( $rev ) {
  2816. $text = $rev->getText();
  2817. } elseif( $title->getNamespace() == NS_MEDIAWIKI ) {
  2818. global $wgContLang;
  2819. $message = $wgContLang->lcfirst( $title->getText() );
  2820. $text = wfMsgForContentNoTrans( $message );
  2821. if( wfEmptyMsg( $message, $text ) ) {
  2822. $text = false;
  2823. break;
  2824. }
  2825. } else {
  2826. break;
  2827. }
  2828. if ( $text === false ) {
  2829. break;
  2830. }
  2831. // Redirect?
  2832. $finalTitle = $title;
  2833. $title = Title::newFromRedirect( $text );
  2834. }
  2835. return array(
  2836. 'text' => $text,
  2837. 'finalTitle' => $finalTitle,
  2838. 'deps' => $deps );
  2839. }
  2840. /**
  2841. * Transclude an interwiki link.
  2842. */
  2843. function interwikiTransclude( $title, $action ) {
  2844. global $wgEnableScaryTranscluding;
  2845. if (!$wgEnableScaryTranscluding)
  2846. return wfMsg('scarytranscludedisabled');
  2847. $url = $title->getFullUrl( "action=$action" );
  2848. if (strlen($url) > 255)
  2849. return wfMsg('scarytranscludetoolong');
  2850. return $this->fetchScaryTemplateMaybeFromCache($url);
  2851. }
  2852. function fetchScaryTemplateMaybeFromCache($url) {
  2853. global $wgTranscludeCacheExpiry;
  2854. $dbr = wfGetDB(DB_SLAVE);
  2855. $obj = $dbr->selectRow('transcache', array('tc_time', 'tc_contents'),
  2856. array('tc_url' => $url));
  2857. if ($obj) {
  2858. $time = $obj->tc_time;
  2859. $text = $obj->tc_contents;
  2860. if ($time && time() < $time + $wgTranscludeCacheExpiry ) {
  2861. return $text;
  2862. }
  2863. }
  2864. $text = Http::get($url);
  2865. if (!$text)
  2866. return wfMsg('scarytranscludefailed', $url);
  2867. $dbw = wfGetDB(DB_MASTER);
  2868. $dbw->replace('transcache', array('tc_url'), array(
  2869. 'tc_url' => $url,
  2870. 'tc_time' => time(),
  2871. 'tc_contents' => $text));
  2872. return $text;
  2873. }
  2874. /**
  2875. * Triple brace replacement -- used for template arguments
  2876. * @private
  2877. */
  2878. function argSubstitution( $piece, $frame ) {
  2879. wfProfileIn( __METHOD__ );
  2880. $error = false;
  2881. $parts = $piece['parts'];
  2882. $nameWithSpaces = $frame->expand( $piece['title'] );
  2883. $argName = trim( $nameWithSpaces );
  2884. $object = false;
  2885. $text = $frame->getArgument( $argName );
  2886. if ( $text === false && $parts->getLength() > 0
  2887. && (
  2888. $this->ot['html']
  2889. || $this->ot['pre']
  2890. || ( $this->ot['wiki'] && $frame->isTemplate() )
  2891. )
  2892. ) {
  2893. # No match in frame, use the supplied default
  2894. $object = $parts->item( 0 )->getChildren();
  2895. }
  2896. if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
  2897. $error = '<!-- WARNING: argument omitted, expansion size too large -->';
  2898. $this->limitationWarn( 'post-expand-template-argument' );
  2899. }
  2900. if ( $text === false && $object === false ) {
  2901. # No match anywhere
  2902. $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
  2903. }
  2904. if ( $error !== false ) {
  2905. $text .= $error;
  2906. }
  2907. if ( $object !== false ) {
  2908. $ret = array( 'object' => $object );
  2909. } else {
  2910. $ret = array( 'text' => $text );
  2911. }
  2912. wfProfileOut( __METHOD__ );
  2913. return $ret;
  2914. }
  2915. /**
  2916. * Return the text to be used for a given extension tag.
  2917. * This is the ghost of strip().
  2918. *
  2919. * @param array $params Associative array of parameters:
  2920. * name PPNode for the tag name
  2921. * attr PPNode for unparsed text where tag attributes are thought to be
  2922. * attributes Optional associative array of parsed attributes
  2923. * inner Contents of extension element
  2924. * noClose Original text did not have a close tag
  2925. * @param PPFrame $frame
  2926. */
  2927. function extensionSubstitution( $params, $frame ) {
  2928. global $wgRawHtml, $wgContLang;
  2929. $name = $frame->expand( $params['name'] );
  2930. $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
  2931. $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
  2932. $marker = "{$this->mUniqPrefix}-$name-" . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX;
  2933. if ( $this->ot['html'] ) {
  2934. $name = strtolower( $name );
  2935. $attributes = Sanitizer::decodeTagAttributes( $attrText );
  2936. if ( isset( $params['attributes'] ) ) {
  2937. $attributes = $attributes + $params['attributes'];
  2938. }
  2939. switch ( $name ) {
  2940. case 'html':
  2941. if( $wgRawHtml ) {
  2942. $output = $content;
  2943. break;
  2944. } else {
  2945. throw new MWException( '<html> extension tag encountered unexpectedly' );
  2946. }
  2947. case 'nowiki':
  2948. $content = strtr($content, array('-{' => '-&#123;', '}-' => '&#125;-'));
  2949. $output = Xml::escapeTagsOnly( $content );
  2950. break;
  2951. case 'math':
  2952. $output = $wgContLang->armourMath(
  2953. MathRenderer::renderMath( $content, $attributes ) );
  2954. break;
  2955. case 'gallery':
  2956. $output = $this->renderImageGallery( $content, $attributes );
  2957. break;
  2958. default:
  2959. if( isset( $this->mTagHooks[$name] ) ) {
  2960. # Workaround for PHP bug 35229 and similar
  2961. if ( !is_callable( $this->mTagHooks[$name] ) ) {
  2962. throw new MWException( "Tag hook for $name is not callable\n" );
  2963. }
  2964. $output = call_user_func_array( $this->mTagHooks[$name],
  2965. array( $content, $attributes, &$this ) );
  2966. } else {
  2967. $output = '<span class="error">Invalid tag extension name: ' .
  2968. htmlspecialchars( $name ) . '</span>';
  2969. }
  2970. }
  2971. } else {
  2972. if ( is_null( $attrText ) ) {
  2973. $attrText = '';
  2974. }
  2975. if ( isset( $params['attributes'] ) ) {
  2976. foreach ( $params['attributes'] as $attrName => $attrValue ) {
  2977. $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
  2978. htmlspecialchars( $attrValue ) . '"';
  2979. }
  2980. }
  2981. if ( $content === null ) {
  2982. $output = "<$name$attrText/>";
  2983. } else {
  2984. $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
  2985. $output = "<$name$attrText>$content$close";
  2986. }
  2987. }
  2988. if ( $name === 'html' || $name === 'nowiki' ) {
  2989. $this->mStripState->nowiki->setPair( $marker, $output );
  2990. } else {
  2991. $this->mStripState->general->setPair( $marker, $output );
  2992. }
  2993. return $marker;
  2994. }
  2995. /**
  2996. * Increment an include size counter
  2997. *
  2998. * @param string $type The type of expansion
  2999. * @param integer $size The size of the text
  3000. * @return boolean False if this inclusion would take it over the maximum, true otherwise
  3001. */
  3002. function incrementIncludeSize( $type, $size ) {
  3003. if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize( $type ) ) {
  3004. return false;
  3005. } else {
  3006. $this->mIncludeSizes[$type] += $size;
  3007. return true;
  3008. }
  3009. }
  3010. /**
  3011. * Increment the expensive function count
  3012. *
  3013. * @return boolean False if the limit has been exceeded
  3014. */
  3015. function incrementExpensiveFunctionCount() {
  3016. global $wgExpensiveParserFunctionLimit;
  3017. $this->mExpensiveFunctionCount++;
  3018. if($this->mExpensiveFunctionCount <= $wgExpensiveParserFunctionLimit) {
  3019. return true;
  3020. }
  3021. return false;
  3022. }
  3023. /**
  3024. * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
  3025. * Fills $this->mDoubleUnderscores, returns the modified text
  3026. */
  3027. function doDoubleUnderscore( $text ) {
  3028. wfProfileIn( __METHOD__ );
  3029. // The position of __TOC__ needs to be recorded
  3030. $mw = MagicWord::get( 'toc' );
  3031. if( $mw->match( $text ) ) {
  3032. $this->mShowToc = true;
  3033. $this->mForceTocPosition = true;
  3034. // Set a placeholder. At the end we'll fill it in with the TOC.
  3035. $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
  3036. // Only keep the first one.
  3037. $text = $mw->replace( '', $text );
  3038. }
  3039. // Now match and remove the rest of them
  3040. $mwa = MagicWord::getDoubleUnderscoreArray();
  3041. $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
  3042. if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
  3043. $this->mOutput->mNoGallery = true;
  3044. }
  3045. if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
  3046. $this->mShowToc = false;
  3047. }
  3048. if ( isset( $this->mDoubleUnderscores['hiddencat'] ) && $this->mTitle->getNamespace() == NS_CATEGORY ) {
  3049. $this->mOutput->setProperty( 'hiddencat', 'y' );
  3050. $containerCategory = Title::makeTitleSafe( NS_CATEGORY, wfMsgForContent( 'hidden-category-category' ) );
  3051. if ( $containerCategory ) {
  3052. $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() );
  3053. } else {
  3054. wfDebug( __METHOD__.": [[MediaWiki:hidden-category-category]] is not a valid title!\n" );
  3055. }
  3056. }
  3057. # (bug 8068) Allow control over whether robots index a page.
  3058. #
  3059. # FIXME (bug 14899): __INDEX__ always overrides __NOINDEX__ here! This
  3060. # is not desirable, the last one on the page should win.
  3061. if( isset( $this->mDoubleUnderscores['noindex'] ) ) {
  3062. $this->mOutput->setIndexPolicy( 'noindex' );
  3063. } elseif( isset( $this->mDoubleUnderscores['index'] ) ) {
  3064. $this->mOutput->setIndexPolicy( 'index' );
  3065. }
  3066. wfProfileOut( __METHOD__ );
  3067. return $text;
  3068. }
  3069. /**
  3070. * This function accomplishes several tasks:
  3071. * 1) Auto-number headings if that option is enabled
  3072. * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
  3073. * 3) Add a Table of contents on the top for users who have enabled the option
  3074. * 4) Auto-anchor headings
  3075. *
  3076. * It loops through all headlines, collects the necessary data, then splits up the
  3077. * string and re-inserts the newly formatted headlines.
  3078. *
  3079. * @param string $text
  3080. * @param boolean $isMain
  3081. * @private
  3082. */
  3083. function formatHeadings( $text, $isMain=true ) {
  3084. global $wgMaxTocLevel, $wgContLang, $wgEnforceHtmlIds;
  3085. $doNumberHeadings = $this->mOptions->getNumberHeadings();
  3086. $showEditLink = $this->mOptions->getEditSection();
  3087. // Do not call quickUserCan unless necessary
  3088. if( $showEditLink && !$this->mTitle->quickUserCan( 'edit' ) ) {
  3089. $showEditLink = 0;
  3090. }
  3091. # Inhibit editsection links if requested in the page
  3092. if ( isset( $this->mDoubleUnderscores['noeditsection'] ) || $this->mOptions->getIsPrintable() ) {
  3093. $showEditLink = 0;
  3094. }
  3095. # Get all headlines for numbering them and adding funky stuff like [edit]
  3096. # links - this is for later, but we need the number of headlines right now
  3097. $matches = array();
  3098. $numMatches = preg_match_all( '/<H(?P<level>[1-6])(?P<attrib>.*?'.'>)(?P<header>.*?)<\/H[1-6] *>/i', $text, $matches );
  3099. # if there are fewer than 4 headlines in the article, do not show TOC
  3100. # unless it's been explicitly enabled.
  3101. $enoughToc = $this->mShowToc &&
  3102. (($numMatches >= 4) || $this->mForceTocPosition);
  3103. # Allow user to stipulate that a page should have a "new section"
  3104. # link added via __NEWSECTIONLINK__
  3105. if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
  3106. $this->mOutput->setNewSection( true );
  3107. }
  3108. # Allow user to remove the "new section"
  3109. # link via __NONEWSECTIONLINK__
  3110. if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
  3111. $this->mOutput->hideNewSection( true );
  3112. }
  3113. # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
  3114. # override above conditions and always show TOC above first header
  3115. if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
  3116. $this->mShowToc = true;
  3117. $enoughToc = true;
  3118. }
  3119. # We need this to perform operations on the HTML
  3120. $sk = $this->mOptions->getSkin();
  3121. # headline counter
  3122. $headlineCount = 0;
  3123. $numVisible = 0;
  3124. # Ugh .. the TOC should have neat indentation levels which can be
  3125. # passed to the skin functions. These are determined here
  3126. $toc = '';
  3127. $full = '';
  3128. $head = array();
  3129. $sublevelCount = array();
  3130. $levelCount = array();
  3131. $toclevel = 0;
  3132. $level = 0;
  3133. $prevlevel = 0;
  3134. $toclevel = 0;
  3135. $prevtoclevel = 0;
  3136. $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-" . self::MARKER_SUFFIX;
  3137. $baseTitleText = $this->mTitle->getPrefixedDBkey();
  3138. $tocraw = array();
  3139. foreach( $matches[3] as $headline ) {
  3140. $isTemplate = false;
  3141. $titleText = false;
  3142. $sectionIndex = false;
  3143. $numbering = '';
  3144. $markerMatches = array();
  3145. if (preg_match("/^$markerRegex/", $headline, $markerMatches)) {
  3146. $serial = $markerMatches[1];
  3147. list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
  3148. $isTemplate = ($titleText != $baseTitleText);
  3149. $headline = preg_replace("/^$markerRegex/", "", $headline);
  3150. }
  3151. if( $toclevel ) {
  3152. $prevlevel = $level;
  3153. $prevtoclevel = $toclevel;
  3154. }
  3155. $level = $matches[1][$headlineCount];
  3156. if( $doNumberHeadings || $enoughToc ) {
  3157. if ( $level > $prevlevel ) {
  3158. # Increase TOC level
  3159. $toclevel++;
  3160. $sublevelCount[$toclevel] = 0;
  3161. if( $toclevel<$wgMaxTocLevel ) {
  3162. $prevtoclevel = $toclevel;
  3163. $toc .= $sk->tocIndent();
  3164. $numVisible++;
  3165. }
  3166. }
  3167. elseif ( $level < $prevlevel && $toclevel > 1 ) {
  3168. # Decrease TOC level, find level to jump to
  3169. if ( $toclevel == 2 && $level <= $levelCount[1] ) {
  3170. # Can only go down to level 1
  3171. $toclevel = 1;
  3172. } else {
  3173. for ($i = $toclevel; $i > 0; $i--) {
  3174. if ( $levelCount[$i] == $level ) {
  3175. # Found last matching level
  3176. $toclevel = $i;
  3177. break;
  3178. }
  3179. elseif ( $levelCount[$i] < $level ) {
  3180. # Found first matching level below current level
  3181. $toclevel = $i + 1;
  3182. break;
  3183. }
  3184. }
  3185. }
  3186. if( $toclevel<$wgMaxTocLevel ) {
  3187. if($prevtoclevel < $wgMaxTocLevel) {
  3188. # Unindent only if the previous toc level was shown :p
  3189. $toc .= $sk->tocUnindent( $prevtoclevel - $toclevel );
  3190. $prevtoclevel = $toclevel;
  3191. } else {
  3192. $toc .= $sk->tocLineEnd();
  3193. }
  3194. }
  3195. }
  3196. else {
  3197. # No change in level, end TOC line
  3198. if( $toclevel<$wgMaxTocLevel ) {
  3199. $toc .= $sk->tocLineEnd();
  3200. }
  3201. }
  3202. $levelCount[$toclevel] = $level;
  3203. # count number of headlines for each level
  3204. @$sublevelCount[$toclevel]++;
  3205. $dot = 0;
  3206. for( $i = 1; $i <= $toclevel; $i++ ) {
  3207. if( !empty( $sublevelCount[$i] ) ) {
  3208. if( $dot ) {
  3209. $numbering .= '.';
  3210. }
  3211. $numbering .= $wgContLang->formatNum( $sublevelCount[$i] );
  3212. $dot = 1;
  3213. }
  3214. }
  3215. }
  3216. # The safe header is a version of the header text safe to use for links
  3217. # Avoid insertion of weird stuff like <math> by expanding the relevant sections
  3218. $safeHeadline = $this->mStripState->unstripBoth( $headline );
  3219. # Remove link placeholders by the link text.
  3220. # <!--LINK number-->
  3221. # turns into
  3222. # link text with suffix
  3223. $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
  3224. # Strip out HTML (other than plain <sup> and <sub>: bug 8393)
  3225. $tocline = preg_replace(
  3226. array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ),
  3227. array( '', '<$1>'),
  3228. $safeHeadline
  3229. );
  3230. $tocline = trim( $tocline );
  3231. # For the anchor, strip out HTML-y stuff period
  3232. $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline );
  3233. $safeHeadline = trim( $safeHeadline );
  3234. # Save headline for section edit hint before it's escaped
  3235. $headlineHint = $safeHeadline;
  3236. if ( $wgEnforceHtmlIds ) {
  3237. $legacyHeadline = false;
  3238. $safeHeadline = Sanitizer::escapeId( $safeHeadline,
  3239. 'noninitial' );
  3240. } else {
  3241. # For reverse compatibility, provide an id that's
  3242. # HTML4-compatible, like we used to.
  3243. #
  3244. # It may be worth noting, academically, that it's possible for
  3245. # the legacy anchor to conflict with a non-legacy headline
  3246. # anchor on the page. In this case likely the "correct" thing
  3247. # would be to either drop the legacy anchors or make sure
  3248. # they're numbered first. However, this would require people
  3249. # to type in section names like "abc_.D7.93.D7.90.D7.A4"
  3250. # manually, so let's not bother worrying about it.
  3251. $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
  3252. 'noninitial' );
  3253. $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' );
  3254. if ( $legacyHeadline == $safeHeadline ) {
  3255. # No reason to have both (in fact, we can't)
  3256. $legacyHeadline = false;
  3257. } elseif ( $legacyHeadline != Sanitizer::escapeId(
  3258. $legacyHeadline, 'xml' ) ) {
  3259. # The legacy id is invalid XML. We used to allow this, but
  3260. # there's no reason to do so anymore. Backward
  3261. # compatibility will fail slightly in this case, but it's
  3262. # no big deal.
  3263. $legacyHeadline = false;
  3264. }
  3265. }
  3266. # HTML names must be case-insensitively unique (bug 10721). FIXME:
  3267. # Does this apply to Unicode characters? Because we aren't
  3268. # handling those here.
  3269. $arrayKey = strtolower( $safeHeadline );
  3270. if ( $legacyHeadline === false ) {
  3271. $legacyArrayKey = false;
  3272. } else {
  3273. $legacyArrayKey = strtolower( $legacyHeadline );
  3274. }
  3275. # count how many in assoc. array so we can track dupes in anchors
  3276. if ( isset( $refers[$arrayKey] ) ) {
  3277. $refers[$arrayKey]++;
  3278. } else {
  3279. $refers[$arrayKey] = 1;
  3280. }
  3281. if ( isset( $refers[$legacyArrayKey] ) ) {
  3282. $refers[$legacyArrayKey]++;
  3283. } else {
  3284. $refers[$legacyArrayKey] = 1;
  3285. }
  3286. # Don't number the heading if it is the only one (looks silly)
  3287. if( $doNumberHeadings && count( $matches[3] ) > 1) {
  3288. # the two are different if the line contains a link
  3289. $headline=$numbering . ' ' . $headline;
  3290. }
  3291. # Create the anchor for linking from the TOC to the section
  3292. $anchor = $safeHeadline;
  3293. $legacyAnchor = $legacyHeadline;
  3294. if ( $refers[$arrayKey] > 1 ) {
  3295. $anchor .= '_' . $refers[$arrayKey];
  3296. }
  3297. if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) {
  3298. $legacyAnchor .= '_' . $refers[$legacyArrayKey];
  3299. }
  3300. if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
  3301. $toc .= $sk->tocLine($anchor, $tocline, $numbering, $toclevel);
  3302. $tocraw[] = array( 'toclevel' => $toclevel, 'level' => $level, 'line' => $tocline, 'number' => $numbering );
  3303. }
  3304. # give headline the correct <h#> tag
  3305. if( $showEditLink && $sectionIndex !== false ) {
  3306. if( $isTemplate ) {
  3307. # Put a T flag in the section identifier, to indicate to extractSections()
  3308. # that sections inside <includeonly> should be counted.
  3309. $editlink = $sk->doEditSectionLink(Title::newFromText( $titleText ), "T-$sectionIndex");
  3310. } else {
  3311. $editlink = $sk->doEditSectionLink($this->mTitle, $sectionIndex, $headlineHint);
  3312. }
  3313. } else {
  3314. $editlink = '';
  3315. }
  3316. $head[$headlineCount] = $sk->makeHeadline( $level,
  3317. $matches['attrib'][$headlineCount], $anchor, $headline,
  3318. $editlink, $legacyAnchor );
  3319. $headlineCount++;
  3320. }
  3321. $this->mOutput->setSections( $tocraw );
  3322. # Never ever show TOC if no headers
  3323. if( $numVisible < 1 ) {
  3324. $enoughToc = false;
  3325. }
  3326. if( $enoughToc ) {
  3327. if( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
  3328. $toc .= $sk->tocUnindent( $prevtoclevel - 1 );
  3329. }
  3330. $toc = $sk->tocList( $toc );
  3331. }
  3332. # split up and insert constructed headlines
  3333. $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
  3334. $i = 0;
  3335. foreach( $blocks as $block ) {
  3336. if( $showEditLink && $headlineCount > 0 && $i == 0 && $block !== "\n" ) {
  3337. # This is the [edit] link that appears for the top block of text when
  3338. # section editing is enabled
  3339. # Disabled because it broke block formatting
  3340. # For example, a bullet point in the top line
  3341. # $full .= $sk->editSectionLink(0);
  3342. }
  3343. $full .= $block;
  3344. if( $enoughToc && !$i && $isMain && !$this->mForceTocPosition ) {
  3345. # Top anchor now in skin
  3346. $full = $full.$toc;
  3347. }
  3348. if( !empty( $head[$i] ) ) {
  3349. $full .= $head[$i];
  3350. }
  3351. $i++;
  3352. }
  3353. if( $this->mForceTocPosition ) {
  3354. return str_replace( '<!--MWTOC-->', $toc, $full );
  3355. } else {
  3356. return $full;
  3357. }
  3358. }
  3359. /**
  3360. * Transform wiki markup when saving a page by doing \r\n -> \n
  3361. * conversion, substitting signatures, {{subst:}} templates, etc.
  3362. *
  3363. * @param string $text the text to transform
  3364. * @param Title &$title the Title object for the current article
  3365. * @param User $user the User object describing the current user
  3366. * @param ParserOptions $options parsing options
  3367. * @param bool $clearState whether to clear the parser state first
  3368. * @return string the altered wiki markup
  3369. * @public
  3370. */
  3371. function preSaveTransform( $text, Title $title, $user, $options, $clearState = true ) {
  3372. $this->mOptions = $options;
  3373. $this->setTitle( $title );
  3374. $this->setOutputType( self::OT_WIKI );
  3375. if ( $clearState ) {
  3376. $this->clearState();
  3377. }
  3378. $pairs = array(
  3379. "\r\n" => "\n",
  3380. );
  3381. $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
  3382. $text = $this->pstPass2( $text, $user );
  3383. $text = $this->mStripState->unstripBoth( $text );
  3384. return $text;
  3385. }
  3386. /**
  3387. * Pre-save transform helper function
  3388. * @private
  3389. */
  3390. function pstPass2( $text, $user ) {
  3391. global $wgContLang, $wgLocaltimezone;
  3392. /* Note: This is the timestamp saved as hardcoded wikitext to
  3393. * the database, we use $wgContLang here in order to give
  3394. * everyone the same signature and use the default one rather
  3395. * than the one selected in each user's preferences.
  3396. *
  3397. * (see also bug 12815)
  3398. */
  3399. $ts = $this->mOptions->getTimestamp();
  3400. $tz = wfMsgForContent( 'timezone-utc' );
  3401. if ( isset( $wgLocaltimezone ) ) {
  3402. $unixts = wfTimestamp( TS_UNIX, $ts );
  3403. $oldtz = getenv( 'TZ' );
  3404. putenv( 'TZ='.$wgLocaltimezone );
  3405. $ts = date( 'YmdHis', $unixts );
  3406. $tz = date( 'T', $unixts ); # might vary on DST changeover!
  3407. /* Allow translation of timezones trough wiki. date() can return
  3408. * whatever crap the system uses, localised or not, so we cannot
  3409. * ship premade translations.
  3410. */
  3411. $key = 'timezone-' . strtolower( trim( $tz ) );
  3412. $value = wfMsgForContent( $key );
  3413. if ( !wfEmptyMsg( $key, $value ) ) $tz = $value;
  3414. putenv( 'TZ='.$oldtz );
  3415. }
  3416. $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tz)";
  3417. # Variable replacement
  3418. # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
  3419. $text = $this->replaceVariables( $text );
  3420. # Signatures
  3421. $sigText = $this->getUserSig( $user );
  3422. $text = strtr( $text, array(
  3423. '~~~~~' => $d,
  3424. '~~~~' => "$sigText $d",
  3425. '~~~' => $sigText
  3426. ) );
  3427. # Context links: [[|name]] and [[name (context)|]]
  3428. #
  3429. global $wgLegalTitleChars;
  3430. $tc = "[$wgLegalTitleChars]";
  3431. $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
  3432. $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/"; # [[ns:page (context)|]]
  3433. $p4 = "/\[\[(:?$nc+:|:|)($tc+?)(($tc+))\\|]]/"; # [[ns:page(context)|]]
  3434. $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]]
  3435. $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]]
  3436. # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
  3437. $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
  3438. $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
  3439. $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
  3440. $t = $this->mTitle->getText();
  3441. $m = array();
  3442. if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
  3443. $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
  3444. } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && '' != "$m[1]$m[2]" ) {
  3445. $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
  3446. } else {
  3447. # if there's no context, don't bother duplicating the title
  3448. $text = preg_replace( $p2, '[[\\1]]', $text );
  3449. }
  3450. # Trim trailing whitespace
  3451. $text = rtrim( $text );
  3452. return $text;
  3453. }
  3454. /**
  3455. * Fetch the user's signature text, if any, and normalize to
  3456. * validated, ready-to-insert wikitext.
  3457. *
  3458. * @param User $user
  3459. * @return string
  3460. * @private
  3461. */
  3462. function getUserSig( &$user ) {
  3463. global $wgMaxSigChars;
  3464. $username = $user->getName();
  3465. $nickname = $user->getOption( 'nickname' );
  3466. $nickname = $nickname === '' ? $username : $nickname;
  3467. if( mb_strlen( $nickname ) > $wgMaxSigChars ) {
  3468. $nickname = $username;
  3469. wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
  3470. } elseif( $user->getBoolOption( 'fancysig' ) !== false ) {
  3471. # Sig. might contain markup; validate this
  3472. if( $this->validateSig( $nickname ) !== false ) {
  3473. # Validated; clean up (if needed) and return it
  3474. return $this->cleanSig( $nickname, true );
  3475. } else {
  3476. # Failed to validate; fall back to the default
  3477. $nickname = $username;
  3478. wfDebug( __METHOD__.": $username has bad XML tags in signature.\n" );
  3479. }
  3480. }
  3481. // Make sure nickname doesnt get a sig in a sig
  3482. $nickname = $this->cleanSigInSig( $nickname );
  3483. # If we're still here, make it a link to the user page
  3484. $userText = wfEscapeWikiText( $username );
  3485. $nickText = wfEscapeWikiText( $nickname );
  3486. if ( $user->isAnon() ) {
  3487. return wfMsgExt( 'signature-anon', array( 'content', 'parsemag' ), $userText, $nickText );
  3488. } else {
  3489. return wfMsgExt( 'signature', array( 'content', 'parsemag' ), $userText, $nickText );
  3490. }
  3491. }
  3492. /**
  3493. * Check that the user's signature contains no bad XML
  3494. *
  3495. * @param string $text
  3496. * @return mixed An expanded string, or false if invalid.
  3497. */
  3498. function validateSig( $text ) {
  3499. return( Xml::isWellFormedXmlFragment( $text ) ? $text : false );
  3500. }
  3501. /**
  3502. * Clean up signature text
  3503. *
  3504. * 1) Strip ~~~, ~~~~ and ~~~~~ out of signatures @see cleanSigInSig
  3505. * 2) Substitute all transclusions
  3506. *
  3507. * @param string $text
  3508. * @param $parsing Whether we're cleaning (preferences save) or parsing
  3509. * @return string Signature text
  3510. */
  3511. function cleanSig( $text, $parsing = false ) {
  3512. if ( !$parsing ) {
  3513. global $wgTitle;
  3514. $this->clearState();
  3515. $this->setTitle( $wgTitle );
  3516. $this->mOptions = new ParserOptions;
  3517. $this->setOutputType = self::OT_PREPROCESS;
  3518. }
  3519. # Option to disable this feature
  3520. if ( !$this->mOptions->getCleanSignatures() ) {
  3521. return $text;
  3522. }
  3523. # FIXME: regex doesn't respect extension tags or nowiki
  3524. # => Move this logic to braceSubstitution()
  3525. $substWord = MagicWord::get( 'subst' );
  3526. $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
  3527. $substText = '{{' . $substWord->getSynonym( 0 );
  3528. $text = preg_replace( $substRegex, $substText, $text );
  3529. $text = $this->cleanSigInSig( $text );
  3530. $dom = $this->preprocessToDom( $text );
  3531. $frame = $this->getPreprocessor()->newFrame();
  3532. $text = $frame->expand( $dom );
  3533. if ( !$parsing ) {
  3534. $text = $this->mStripState->unstripBoth( $text );
  3535. }
  3536. return $text;
  3537. }
  3538. /**
  3539. * Strip ~~~, ~~~~ and ~~~~~ out of signatures
  3540. * @param string $text
  3541. * @return string Signature text with /~{3,5}/ removed
  3542. */
  3543. function cleanSigInSig( $text ) {
  3544. $text = preg_replace( '/~{3,5}/', '', $text );
  3545. return $text;
  3546. }
  3547. /**
  3548. * Set up some variables which are usually set up in parse()
  3549. * so that an external function can call some class members with confidence
  3550. * @public
  3551. */
  3552. function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
  3553. $this->setTitle( $title );
  3554. $this->mOptions = $options;
  3555. $this->setOutputType( $outputType );
  3556. if ( $clearState ) {
  3557. $this->clearState();
  3558. }
  3559. }
  3560. /**
  3561. * Wrapper for preprocess()
  3562. *
  3563. * @param string $text the text to preprocess
  3564. * @param ParserOptions $options options
  3565. * @return string
  3566. * @public
  3567. */
  3568. function transformMsg( $text, $options ) {
  3569. global $wgTitle;
  3570. static $executing = false;
  3571. # Guard against infinite recursion
  3572. if ( $executing ) {
  3573. return $text;
  3574. }
  3575. $executing = true;
  3576. wfProfileIn(__METHOD__);
  3577. $text = $this->preprocess( $text, $wgTitle, $options );
  3578. $executing = false;
  3579. wfProfileOut(__METHOD__);
  3580. return $text;
  3581. }
  3582. /**
  3583. * Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
  3584. * The callback should have the following form:
  3585. * function myParserHook( $text, $params, &$parser ) { ... }
  3586. *
  3587. * Transform and return $text. Use $parser for any required context, e.g. use
  3588. * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
  3589. *
  3590. * @public
  3591. *
  3592. * @param mixed $tag The tag to use, e.g. 'hook' for <hook>
  3593. * @param mixed $callback The callback function (and object) to use for the tag
  3594. *
  3595. * @return The old value of the mTagHooks array associated with the hook
  3596. */
  3597. function setHook( $tag, $callback ) {
  3598. $tag = strtolower( $tag );
  3599. $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
  3600. $this->mTagHooks[$tag] = $callback;
  3601. if( !in_array( $tag, $this->mStripList ) ) {
  3602. $this->mStripList[] = $tag;
  3603. }
  3604. return $oldVal;
  3605. }
  3606. function setTransparentTagHook( $tag, $callback ) {
  3607. $tag = strtolower( $tag );
  3608. $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
  3609. $this->mTransparentTagHooks[$tag] = $callback;
  3610. return $oldVal;
  3611. }
  3612. /**
  3613. * Remove all tag hooks
  3614. */
  3615. function clearTagHooks() {
  3616. $this->mTagHooks = array();
  3617. $this->mStripList = $this->mDefaultStripList;
  3618. }
  3619. /**
  3620. * Create a function, e.g. {{sum:1|2|3}}
  3621. * The callback function should have the form:
  3622. * function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
  3623. *
  3624. * Or with SFH_OBJECT_ARGS:
  3625. * function myParserFunction( $parser, $frame, $args ) { ... }
  3626. *
  3627. * The callback may either return the text result of the function, or an array with the text
  3628. * in element 0, and a number of flags in the other elements. The names of the flags are
  3629. * specified in the keys. Valid flags are:
  3630. * found The text returned is valid, stop processing the template. This
  3631. * is on by default.
  3632. * nowiki Wiki markup in the return value should be escaped
  3633. * isHTML The returned text is HTML, armour it against wikitext transformation
  3634. *
  3635. * @public
  3636. *
  3637. * @param string $id The magic word ID
  3638. * @param mixed $callback The callback function (and object) to use
  3639. * @param integer $flags a combination of the following flags:
  3640. * SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
  3641. *
  3642. * SFH_OBJECT_ARGS Pass the template arguments as PPNode objects instead of text. This
  3643. * allows for conditional expansion of the parse tree, allowing you to eliminate dead
  3644. * branches and thus speed up parsing. It is also possible to analyse the parse tree of
  3645. * the arguments, and to control the way they are expanded.
  3646. *
  3647. * The $frame parameter is a PPFrame. This can be used to produce expanded text from the
  3648. * arguments, for instance:
  3649. * $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
  3650. *
  3651. * For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
  3652. * future versions. Please call $frame->expand() on it anyway so that your code keeps
  3653. * working if/when this is changed.
  3654. *
  3655. * If you want whitespace to be trimmed from $args, you need to do it yourself, post-
  3656. * expansion.
  3657. *
  3658. * Please read the documentation in includes/parser/Preprocessor.php for more information
  3659. * about the methods available in PPFrame and PPNode.
  3660. *
  3661. * @return The old callback function for this name, if any
  3662. */
  3663. function setFunctionHook( $id, $callback, $flags = 0 ) {
  3664. $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
  3665. $this->mFunctionHooks[$id] = array( $callback, $flags );
  3666. # Add to function cache
  3667. $mw = MagicWord::get( $id );
  3668. if( !$mw )
  3669. throw new MWException( __METHOD__.'() expecting a magic word identifier.' );
  3670. $synonyms = $mw->getSynonyms();
  3671. $sensitive = intval( $mw->isCaseSensitive() );
  3672. foreach ( $synonyms as $syn ) {
  3673. # Case
  3674. if ( !$sensitive ) {
  3675. $syn = strtolower( $syn );
  3676. }
  3677. # Add leading hash
  3678. if ( !( $flags & SFH_NO_HASH ) ) {
  3679. $syn = '#' . $syn;
  3680. }
  3681. # Remove trailing colon
  3682. if ( substr( $syn, -1, 1 ) === ':' ) {
  3683. $syn = substr( $syn, 0, -1 );
  3684. }
  3685. $this->mFunctionSynonyms[$sensitive][$syn] = $id;
  3686. }
  3687. return $oldVal;
  3688. }
  3689. /**
  3690. * Get all registered function hook identifiers
  3691. *
  3692. * @return array
  3693. */
  3694. function getFunctionHooks() {
  3695. return array_keys( $this->mFunctionHooks );
  3696. }
  3697. /**
  3698. * Replace <!--LINK--> link placeholders with actual links, in the buffer
  3699. * Placeholders created in Skin::makeLinkObj()
  3700. * Returns an array of link CSS classes, indexed by PDBK.
  3701. */
  3702. function replaceLinkHolders( &$text, $options = 0 ) {
  3703. return $this->mLinkHolders->replace( $text );
  3704. }
  3705. /**
  3706. * Replace <!--LINK--> link placeholders with plain text of links
  3707. * (not HTML-formatted).
  3708. * @param string $text
  3709. * @return string
  3710. */
  3711. function replaceLinkHoldersText( $text ) {
  3712. return $this->mLinkHolders->replaceText( $text );
  3713. }
  3714. /**
  3715. * Tag hook handler for 'pre'.
  3716. */
  3717. function renderPreTag( $text, $attribs ) {
  3718. // Backwards-compatibility hack
  3719. $content = StringUtils::delimiterReplace( '<nowiki>', '</nowiki>', '$1', $text, 'i' );
  3720. $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' );
  3721. return Xml::openElement( 'pre', $attribs ) .
  3722. Xml::escapeTagsOnly( $content ) .
  3723. '</pre>';
  3724. }
  3725. /**
  3726. * Renders an image gallery from a text with one line per image.
  3727. * text labels may be given by using |-style alternative text. E.g.
  3728. * Image:one.jpg|The number "1"
  3729. * Image:tree.jpg|A tree
  3730. * given as text will return the HTML of a gallery with two images,
  3731. * labeled 'The number "1"' and
  3732. * 'A tree'.
  3733. */
  3734. function renderImageGallery( $text, $params ) {
  3735. $ig = new ImageGallery();
  3736. $ig->setContextTitle( $this->mTitle );
  3737. $ig->setShowBytes( false );
  3738. $ig->setShowFilename( false );
  3739. $ig->setParser( $this );
  3740. $ig->setHideBadImages();
  3741. $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
  3742. $ig->useSkin( $this->mOptions->getSkin() );
  3743. $ig->mRevisionId = $this->mRevisionId;
  3744. if( isset( $params['caption'] ) ) {
  3745. $caption = $params['caption'];
  3746. $caption = htmlspecialchars( $caption );
  3747. $caption = $this->replaceInternalLinks( $caption );
  3748. $ig->setCaptionHtml( $caption );
  3749. }
  3750. if( isset( $params['perrow'] ) ) {
  3751. $ig->setPerRow( $params['perrow'] );
  3752. }
  3753. if( isset( $params['widths'] ) ) {
  3754. $ig->setWidths( $params['widths'] );
  3755. }
  3756. if( isset( $params['heights'] ) ) {
  3757. $ig->setHeights( $params['heights'] );
  3758. }
  3759. wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) );
  3760. $lines = StringUtils::explode( "\n", $text );
  3761. foreach ( $lines as $line ) {
  3762. # match lines like these:
  3763. # Image:someimage.jpg|This is some image
  3764. $matches = array();
  3765. preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
  3766. # Skip empty lines
  3767. if ( count( $matches ) == 0 ) {
  3768. continue;
  3769. }
  3770. if ( strpos( $matches[0], '%' ) !== false )
  3771. $matches[1] = urldecode( $matches[1] );
  3772. $tp = Title::newFromText( $matches[1]/*, NS_FILE*/ );
  3773. $nt =& $tp;
  3774. if( is_null( $nt ) ) {
  3775. # Bogus title. Ignore these so we don't bomb out later.
  3776. continue;
  3777. }
  3778. if ( isset( $matches[3] ) ) {
  3779. $label = $matches[3];
  3780. } else {
  3781. $label = '';
  3782. }
  3783. $html = $this->recursiveTagParse( trim( $label ) );
  3784. $ig->add( $nt, $html );
  3785. # Only add real images (bug #5586)
  3786. if ( $nt->getNamespace() == NS_FILE ) {
  3787. $this->mOutput->addImage( $nt->getDBkey() );
  3788. }
  3789. }
  3790. return $ig->toHTML();
  3791. }
  3792. function getImageParams( $handler ) {
  3793. if ( $handler ) {
  3794. $handlerClass = get_class( $handler );
  3795. } else {
  3796. $handlerClass = '';
  3797. }
  3798. if ( !isset( $this->mImageParams[$handlerClass] ) ) {
  3799. // Initialise static lists
  3800. static $internalParamNames = array(
  3801. 'horizAlign' => array( 'left', 'right', 'center', 'none' ),
  3802. 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
  3803. 'bottom', 'text-bottom' ),
  3804. 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless',
  3805. 'upright', 'border', 'link', 'alt' ),
  3806. );
  3807. static $internalParamMap;
  3808. if ( !$internalParamMap ) {
  3809. $internalParamMap = array();
  3810. foreach ( $internalParamNames as $type => $names ) {
  3811. foreach ( $names as $name ) {
  3812. $magicName = str_replace( '-', '_', "img_$name" );
  3813. $internalParamMap[$magicName] = array( $type, $name );
  3814. }
  3815. }
  3816. }
  3817. // Add handler params
  3818. $paramMap = $internalParamMap;
  3819. if ( $handler ) {
  3820. $handlerParamMap = $handler->getParamMap();
  3821. foreach ( $handlerParamMap as $magic => $paramName ) {
  3822. $paramMap[$magic] = array( 'handler', $paramName );
  3823. }
  3824. }
  3825. $this->mImageParams[$handlerClass] = $paramMap;
  3826. $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
  3827. }
  3828. return array( $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] );
  3829. }
  3830. /**
  3831. * Parse image options text and use it to make an image
  3832. * @param Title $title
  3833. * @param string $options
  3834. * @param LinkHolderArray $holders
  3835. */
  3836. function makeImage( $title, $options, $holders = false ) {
  3837. # Check if the options text is of the form "options|alt text"
  3838. # Options are:
  3839. # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
  3840. # * left no resizing, just left align. label is used for alt= only
  3841. # * right same, but right aligned
  3842. # * none same, but not aligned
  3843. # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
  3844. # * center center the image
  3845. # * framed Keep original image size, no magnify-button.
  3846. # * frameless like 'thumb' but without a frame. Keeps user preferences for width
  3847. # * upright reduce width for upright images, rounded to full __0 px
  3848. # * border draw a 1px border around the image
  3849. # * alt Text for HTML alt attribute (defaults to empty)
  3850. # vertical-align values (no % or length right now):
  3851. # * baseline
  3852. # * sub
  3853. # * super
  3854. # * top
  3855. # * text-top
  3856. # * middle
  3857. # * bottom
  3858. # * text-bottom
  3859. $parts = StringUtils::explode( "|", $options );
  3860. $sk = $this->mOptions->getSkin();
  3861. # Give extensions a chance to select the file revision for us
  3862. $skip = $time = $descQuery = false;
  3863. wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$title, &$skip, &$time, &$descQuery ) );
  3864. if ( $skip ) {
  3865. return $sk->link( $title );
  3866. }
  3867. # Get the file
  3868. $imagename = $title->getDBkey();
  3869. if ( isset( $this->mFileCache[$imagename][$time] ) ) {
  3870. $file = $this->mFileCache[$imagename][$time];
  3871. } else {
  3872. $file = wfFindFile( $title, $time );
  3873. if ( count( $this->mFileCache ) > 1000 ) {
  3874. $this->mFileCache = array();
  3875. }
  3876. $this->mFileCache[$imagename][$time] = $file;
  3877. }
  3878. # Get parameter map
  3879. $handler = $file ? $file->getHandler() : false;
  3880. list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
  3881. # Process the input parameters
  3882. $caption = '';
  3883. $params = array( 'frame' => array(), 'handler' => array(),
  3884. 'horizAlign' => array(), 'vertAlign' => array() );
  3885. foreach( $parts as $part ) {
  3886. $part = trim( $part );
  3887. list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
  3888. $validated = false;
  3889. if( isset( $paramMap[$magicName] ) ) {
  3890. list( $type, $paramName ) = $paramMap[$magicName];
  3891. // Special case; width and height come in one variable together
  3892. if( $type === 'handler' && $paramName === 'width' ) {
  3893. $m = array();
  3894. # (bug 13500) In both cases (width/height and width only),
  3895. # permit trailing "px" for backward compatibility.
  3896. if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
  3897. $width = intval( $m[1] );
  3898. $height = intval( $m[2] );
  3899. if ( $handler->validateParam( 'width', $width ) ) {
  3900. $params[$type]['width'] = $width;
  3901. $validated = true;
  3902. }
  3903. if ( $handler->validateParam( 'height', $height ) ) {
  3904. $params[$type]['height'] = $height;
  3905. $validated = true;
  3906. }
  3907. } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
  3908. $width = intval( $value );
  3909. if ( $handler->validateParam( 'width', $width ) ) {
  3910. $params[$type]['width'] = $width;
  3911. $validated = true;
  3912. }
  3913. } // else no validation -- bug 13436
  3914. } else {
  3915. if ( $type === 'handler' ) {
  3916. # Validate handler parameter
  3917. $validated = $handler->validateParam( $paramName, $value );
  3918. } else {
  3919. # Validate internal parameters
  3920. switch( $paramName ) {
  3921. case 'manualthumb':
  3922. case 'alt':
  3923. // @fixme - possibly check validity here for
  3924. // manualthumb? downstream behavior seems odd with
  3925. // missing manual thumbs.
  3926. $validated = true;
  3927. $value = $this->stripAltText( $value, $holders );
  3928. break;
  3929. case 'link':
  3930. $chars = self::EXT_LINK_URL_CLASS;
  3931. $prots = $this->mUrlProtocols;
  3932. if ( $value === '' ) {
  3933. $paramName = 'no-link';
  3934. $value = true;
  3935. $validated = true;
  3936. } elseif ( preg_match( "/^$prots/", $value ) ) {
  3937. if ( preg_match( "/^($prots)$chars+$/", $value, $m ) ) {
  3938. $paramName = 'link-url';
  3939. $this->mOutput->addExternalLink( $value );
  3940. $validated = true;
  3941. }
  3942. } else {
  3943. $linkTitle = Title::newFromText( $value );
  3944. if ( $linkTitle ) {
  3945. $paramName = 'link-title';
  3946. $value = $linkTitle;
  3947. $this->mOutput->addLink( $linkTitle );
  3948. $validated = true;
  3949. }
  3950. }
  3951. break;
  3952. default:
  3953. // Most other things appear to be empty or numeric...
  3954. $validated = ( $value === false || is_numeric( trim( $value ) ) );
  3955. }
  3956. }
  3957. if ( $validated ) {
  3958. $params[$type][$paramName] = $value;
  3959. }
  3960. }
  3961. }
  3962. if ( !$validated ) {
  3963. $caption = $part;
  3964. }
  3965. }
  3966. # Process alignment parameters
  3967. if ( $params['horizAlign'] ) {
  3968. $params['frame']['align'] = key( $params['horizAlign'] );
  3969. }
  3970. if ( $params['vertAlign'] ) {
  3971. $params['frame']['valign'] = key( $params['vertAlign'] );
  3972. }
  3973. $params['frame']['caption'] = $caption;
  3974. $params['frame']['title'] = $this->stripAltText( $caption, $holders );
  3975. # In the old days, [[Image:Foo|text...]] would set alt text. Later it
  3976. # came to also set the caption, ordinary text after the image -- which
  3977. # makes no sense, because that just repeats the text multiple times in
  3978. # screen readers. It *also* came to set the title attribute.
  3979. #
  3980. # Now that we have an alt attribute, we should not set the alt text to
  3981. # equal the caption: that's worse than useless, it just repeats the
  3982. # text. This is the framed/thumbnail case. If there's no caption, we
  3983. # use the unnamed parameter for alt text as well, just for the time be-
  3984. # ing, if the unnamed param is set and the alt param is not.
  3985. #
  3986. # For the future, we need to figure out if we want to tweak this more,
  3987. # e.g., introducing a title= parameter for the title; ignoring the un-
  3988. # named parameter entirely for images without a caption; adding an ex-
  3989. # plicit caption= parameter and preserving the old magic unnamed para-
  3990. # meter for BC; ...
  3991. if( $caption !== '' && !isset( $params['frame']['alt'] )
  3992. && !isset( $params['frame']['framed'] )
  3993. && !isset( $params['frame']['thumbnail'] )
  3994. && !isset( $params['frame']['manualthumb'] ) ) {
  3995. $params['frame']['alt'] = $params['frame']['title'];
  3996. }
  3997. wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params ) );
  3998. # Linker does the rest
  3999. $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'], $time, $descQuery );
  4000. # Give the handler a chance to modify the parser object
  4001. if ( $handler ) {
  4002. $handler->parserTransformHook( $this, $file );
  4003. }
  4004. return $ret;
  4005. }
  4006. protected function stripAltText( $caption, $holders ) {
  4007. # Strip bad stuff out of the title (tooltip). We can't just use
  4008. # replaceLinkHoldersText() here, because if this function is called
  4009. # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
  4010. if ( $holders ) {
  4011. $tooltip = $holders->replaceText( $caption );
  4012. } else {
  4013. $tooltip = $this->replaceLinkHoldersText( $caption );
  4014. }
  4015. # make sure there are no placeholders in thumbnail attributes
  4016. # that are later expanded to html- so expand them now and
  4017. # remove the tags
  4018. $tooltip = $this->mStripState->unstripBoth( $tooltip );
  4019. $tooltip = Sanitizer::stripAllTags( $tooltip );
  4020. return $tooltip;
  4021. }
  4022. /**
  4023. * Set a flag in the output object indicating that the content is dynamic and
  4024. * shouldn't be cached.
  4025. */
  4026. function disableCache() {
  4027. wfDebug( "Parser output marked as uncacheable.\n" );
  4028. $this->mOutput->mCacheTime = -1;
  4029. }
  4030. /**#@+
  4031. * Callback from the Sanitizer for expanding items found in HTML attribute
  4032. * values, so they can be safely tested and escaped.
  4033. * @param string $text
  4034. * @param PPFrame $frame
  4035. * @return string
  4036. * @private
  4037. */
  4038. function attributeStripCallback( &$text, $frame = false ) {
  4039. $text = $this->replaceVariables( $text, $frame );
  4040. $text = $this->mStripState->unstripBoth( $text );
  4041. return $text;
  4042. }
  4043. /**#@-*/
  4044. /**#@+
  4045. * Accessor/mutator
  4046. */
  4047. function Title( $x = NULL ) { return wfSetVar( $this->mTitle, $x ); }
  4048. function Options( $x = NULL ) { return wfSetVar( $this->mOptions, $x ); }
  4049. function OutputType( $x = NULL ) { return wfSetVar( $this->mOutputType, $x ); }
  4050. /**#@-*/
  4051. /**#@+
  4052. * Accessor
  4053. */
  4054. function getTags() { return array_merge( array_keys($this->mTransparentTagHooks), array_keys( $this->mTagHooks ) ); }
  4055. /**#@-*/
  4056. /**
  4057. * Break wikitext input into sections, and either pull or replace
  4058. * some particular section's text.
  4059. *
  4060. * External callers should use the getSection and replaceSection methods.
  4061. *
  4062. * @param string $text Page wikitext
  4063. * @param string $section A section identifier string of the form:
  4064. * <flag1> - <flag2> - ... - <section number>
  4065. *
  4066. * Currently the only recognised flag is "T", which means the target section number
  4067. * was derived during a template inclusion parse, in other words this is a template
  4068. * section edit link. If no flags are given, it was an ordinary section edit link.
  4069. * This flag is required to avoid a section numbering mismatch when a section is
  4070. * enclosed by <includeonly> (bug 6563).
  4071. *
  4072. * The section number 0 pulls the text before the first heading; other numbers will
  4073. * pull the given section along with its lower-level subsections. If the section is
  4074. * not found, $mode=get will return $newtext, and $mode=replace will return $text.
  4075. *
  4076. * @param string $mode One of "get" or "replace"
  4077. * @param string $newText Replacement text for section data.
  4078. * @return string for "get", the extracted section text.
  4079. * for "replace", the whole page with the section replaced.
  4080. */
  4081. private function extractSections( $text, $section, $mode, $newText='' ) {
  4082. global $wgTitle;
  4083. $this->clearState();
  4084. $this->setTitle( $wgTitle ); // not generally used but removes an ugly failure mode
  4085. $this->mOptions = new ParserOptions;
  4086. $this->setOutputType( self::OT_WIKI );
  4087. $outText = '';
  4088. $frame = $this->getPreprocessor()->newFrame();
  4089. // Process section extraction flags
  4090. $flags = 0;
  4091. $sectionParts = explode( '-', $section );
  4092. $sectionIndex = array_pop( $sectionParts );
  4093. foreach ( $sectionParts as $part ) {
  4094. if ( $part === 'T' ) {
  4095. $flags |= self::PTD_FOR_INCLUSION;
  4096. }
  4097. }
  4098. // Preprocess the text
  4099. $root = $this->preprocessToDom( $text, $flags );
  4100. // <h> nodes indicate section breaks
  4101. // They can only occur at the top level, so we can find them by iterating the root's children
  4102. $node = $root->getFirstChild();
  4103. // Find the target section
  4104. if ( $sectionIndex == 0 ) {
  4105. // Section zero doesn't nest, level=big
  4106. $targetLevel = 1000;
  4107. } else {
  4108. while ( $node ) {
  4109. if ( $node->getName() === 'h' ) {
  4110. $bits = $node->splitHeading();
  4111. if ( $bits['i'] == $sectionIndex ) {
  4112. $targetLevel = $bits['level'];
  4113. break;
  4114. }
  4115. }
  4116. if ( $mode === 'replace' ) {
  4117. $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
  4118. }
  4119. $node = $node->getNextSibling();
  4120. }
  4121. }
  4122. if ( !$node ) {
  4123. // Not found
  4124. if ( $mode === 'get' ) {
  4125. return $newText;
  4126. } else {
  4127. return $text;
  4128. }
  4129. }
  4130. // Find the end of the section, including nested sections
  4131. do {
  4132. if ( $node->getName() === 'h' ) {
  4133. $bits = $node->splitHeading();
  4134. $curLevel = $bits['level'];
  4135. if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
  4136. break;
  4137. }
  4138. }
  4139. if ( $mode === 'get' ) {
  4140. $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
  4141. }
  4142. $node = $node->getNextSibling();
  4143. } while ( $node );
  4144. // Write out the remainder (in replace mode only)
  4145. if ( $mode === 'replace' ) {
  4146. // Output the replacement text
  4147. // Add two newlines on -- trailing whitespace in $newText is conventionally
  4148. // stripped by the editor, so we need both newlines to restore the paragraph gap
  4149. // Only add trailing whitespace if there is newText
  4150. if($newText != "") {
  4151. $outText .= $newText . "\n\n";
  4152. }
  4153. while ( $node ) {
  4154. $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
  4155. $node = $node->getNextSibling();
  4156. }
  4157. }
  4158. if ( is_string( $outText ) ) {
  4159. // Re-insert stripped tags
  4160. $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
  4161. }
  4162. return $outText;
  4163. }
  4164. /**
  4165. * This function returns the text of a section, specified by a number ($section).
  4166. * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
  4167. * the first section before any such heading (section 0).
  4168. *
  4169. * If a section contains subsections, these are also returned.
  4170. *
  4171. * @param string $text text to look in
  4172. * @param string $section section identifier
  4173. * @param string $deftext default to return if section is not found
  4174. * @return string text of the requested section
  4175. */
  4176. public function getSection( $text, $section, $deftext='' ) {
  4177. return $this->extractSections( $text, $section, "get", $deftext );
  4178. }
  4179. public function replaceSection( $oldtext, $section, $text ) {
  4180. return $this->extractSections( $oldtext, $section, "replace", $text );
  4181. }
  4182. /**
  4183. * Get the timestamp associated with the current revision, adjusted for
  4184. * the default server-local timestamp
  4185. */
  4186. function getRevisionTimestamp() {
  4187. if ( is_null( $this->mRevisionTimestamp ) ) {
  4188. wfProfileIn( __METHOD__ );
  4189. global $wgContLang;
  4190. $dbr = wfGetDB( DB_SLAVE );
  4191. $timestamp = $dbr->selectField( 'revision', 'rev_timestamp',
  4192. array( 'rev_id' => $this->mRevisionId ), __METHOD__ );
  4193. // Normalize timestamp to internal MW format for timezone processing.
  4194. // This has the added side-effect of replacing a null value with
  4195. // the current time, which gives us more sensible behavior for
  4196. // previews.
  4197. $timestamp = wfTimestamp( TS_MW, $timestamp );
  4198. // The cryptic '' timezone parameter tells to use the site-default
  4199. // timezone offset instead of the user settings.
  4200. //
  4201. // Since this value will be saved into the parser cache, served
  4202. // to other users, and potentially even used inside links and such,
  4203. // it needs to be consistent for all visitors.
  4204. $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
  4205. wfProfileOut( __METHOD__ );
  4206. }
  4207. return $this->mRevisionTimestamp;
  4208. }
  4209. /**
  4210. * Get the name of the user that edited the last revision
  4211. */
  4212. function getRevisionUser() {
  4213. // if this template is subst: the revision id will be blank,
  4214. // so just use the current user's name
  4215. if( $this->mRevisionId ) {
  4216. $revision = Revision::newFromId( $this->mRevisionId );
  4217. $revuser = $revision->getUserText();
  4218. } else {
  4219. global $wgUser;
  4220. $revuser = $wgUser->getName();
  4221. }
  4222. return $revuser;
  4223. }
  4224. /**
  4225. * Mutator for $mDefaultSort
  4226. *
  4227. * @param $sort New value
  4228. */
  4229. public function setDefaultSort( $sort ) {
  4230. $this->mDefaultSort = $sort;
  4231. }
  4232. /**
  4233. * Accessor for $mDefaultSort
  4234. * Will use the title/prefixed title if none is set
  4235. *
  4236. * @return string
  4237. */
  4238. public function getDefaultSort() {
  4239. global $wgCategoryPrefixedDefaultSortkey;
  4240. if( $this->mDefaultSort !== false ) {
  4241. return $this->mDefaultSort;
  4242. } elseif ($this->mTitle->getNamespace() == NS_CATEGORY ||
  4243. !$wgCategoryPrefixedDefaultSortkey) {
  4244. return $this->mTitle->getText();
  4245. } else {
  4246. return $this->mTitle->getPrefixedText();
  4247. }
  4248. }
  4249. /**
  4250. * Accessor for $mDefaultSort
  4251. * Unlike getDefaultSort(), will return false if none is set
  4252. *
  4253. * @return string or false
  4254. */
  4255. public function getCustomDefaultSort() {
  4256. return $this->mDefaultSort;
  4257. }
  4258. /**
  4259. * Try to guess the section anchor name based on a wikitext fragment
  4260. * presumably extracted from a heading, for example "Header" from
  4261. * "== Header ==".
  4262. */
  4263. public function guessSectionNameFromWikiText( $text ) {
  4264. # Strip out wikitext links(they break the anchor)
  4265. $text = $this->stripSectionName( $text );
  4266. $headline = Sanitizer::decodeCharReferences( $text );
  4267. # strip out HTML
  4268. $headline = StringUtils::delimiterReplace( '<', '>', '', $headline );
  4269. $headline = trim( $headline );
  4270. $sectionanchor = '#' . urlencode( str_replace( ' ', '_', $headline ) );
  4271. $replacearray = array(
  4272. '%3A' => ':',
  4273. '%' => '.'
  4274. );
  4275. return str_replace(
  4276. array_keys( $replacearray ),
  4277. array_values( $replacearray ),
  4278. $sectionanchor );
  4279. }
  4280. /**
  4281. * Strips a text string of wikitext for use in a section anchor
  4282. *
  4283. * Accepts a text string and then removes all wikitext from the
  4284. * string and leaves only the resultant text (i.e. the result of
  4285. * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
  4286. * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
  4287. * to create valid section anchors by mimicing the output of the
  4288. * parser when headings are parsed.
  4289. *
  4290. * @param $text string Text string to be stripped of wikitext
  4291. * for use in a Section anchor
  4292. * @return Filtered text string
  4293. */
  4294. public function stripSectionName( $text ) {
  4295. # Strip internal link markup
  4296. $text = preg_replace('/\[\[:?([^[|]+)\|([^[]+)\]\]/','$2',$text);
  4297. $text = preg_replace('/\[\[:?([^[]+)\|?\]\]/','$1',$text);
  4298. # Strip external link markup (FIXME: Not Tolerant to blank link text
  4299. # I.E. [http://www.mediawiki.org] will render as [1] or something depending
  4300. # on how many empty links there are on the page - need to figure that out.
  4301. $text = preg_replace('/\[(?:' . wfUrlProtocols() . ')([^ ]+?) ([^[]+)\]/','$2',$text);
  4302. # Parse wikitext quotes (italics & bold)
  4303. $text = $this->doQuotes($text);
  4304. # Strip HTML tags
  4305. $text = StringUtils::delimiterReplace( '<', '>', '', $text );
  4306. return $text;
  4307. }
  4308. function srvus( $text ) {
  4309. return $this->testSrvus( $text, $this->mOutputType );
  4310. }
  4311. /**
  4312. * strip/replaceVariables/unstrip for preprocessor regression testing
  4313. */
  4314. function testSrvus( $text, $title, $options, $outputType = self::OT_HTML ) {
  4315. $this->clearState();
  4316. if ( ! ( $title instanceof Title ) ) {
  4317. $title = Title::newFromText( $title );
  4318. }
  4319. $this->mTitle = $title;
  4320. $this->mOptions = $options;
  4321. $this->setOutputType( $outputType );
  4322. $text = $this->replaceVariables( $text );
  4323. $text = $this->mStripState->unstripBoth( $text );
  4324. $text = Sanitizer::removeHTMLtags( $text );
  4325. return $text;
  4326. }
  4327. function testPst( $text, $title, $options ) {
  4328. global $wgUser;
  4329. if ( ! ( $title instanceof Title ) ) {
  4330. $title = Title::newFromText( $title );
  4331. }
  4332. return $this->preSaveTransform( $text, $title, $wgUser, $options );
  4333. }
  4334. function testPreprocess( $text, $title, $options ) {
  4335. if ( ! ( $title instanceof Title ) ) {
  4336. $title = Title::newFromText( $title );
  4337. }
  4338. return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
  4339. }
  4340. function markerSkipCallback( $s, $callback ) {
  4341. $i = 0;
  4342. $out = '';
  4343. while ( $i < strlen( $s ) ) {
  4344. $markerStart = strpos( $s, $this->mUniqPrefix, $i );
  4345. if ( $markerStart === false ) {
  4346. $out .= call_user_func( $callback, substr( $s, $i ) );
  4347. break;
  4348. } else {
  4349. $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
  4350. $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
  4351. if ( $markerEnd === false ) {
  4352. $out .= substr( $s, $markerStart );
  4353. break;
  4354. } else {
  4355. $markerEnd += strlen( self::MARKER_SUFFIX );
  4356. $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
  4357. $i = $markerEnd;
  4358. }
  4359. }
  4360. }
  4361. return $out;
  4362. }
  4363. function serialiseHalfParsedText( $text ) {
  4364. $data = array();
  4365. $data['text'] = $text;
  4366. // First, find all strip markers, and store their
  4367. // data in an array.
  4368. $stripState = new StripState;
  4369. $pos = 0;
  4370. while( ( $start_pos = strpos( $text, $this->mUniqPrefix, $pos ) ) && ( $end_pos = strpos( $text, self::MARKER_SUFFIX, $pos ) ) ) {
  4371. $end_pos += strlen( self::MARKER_SUFFIX );
  4372. $marker = substr( $text, $start_pos, $end_pos-$start_pos );
  4373. if ( !empty( $this->mStripState->general->data[$marker] ) ) {
  4374. $replaceArray = $stripState->general;
  4375. $stripText = $this->mStripState->general->data[$marker];
  4376. } elseif ( !empty( $this->mStripState->nowiki->data[$marker] ) ) {
  4377. $replaceArray = $stripState->nowiki;
  4378. $stripText = $this->mStripState->nowiki->data[$marker];
  4379. } else {
  4380. throw new MWException( "Hanging strip marker: '$marker'." );
  4381. }
  4382. $replaceArray->setPair( $marker, $stripText );
  4383. $pos = $end_pos;
  4384. }
  4385. $data['stripstate'] = $stripState;
  4386. // Now, find all of our links, and store THEIR
  4387. // data in an array! :)
  4388. $links = array( 'internal' => array(), 'interwiki' => array() );
  4389. $pos = 0;
  4390. // Internal links
  4391. while( ( $start_pos = strpos( $text, '<!--LINK ', $pos ) ) ) {
  4392. list( $ns, $trail ) = explode( ':', substr( $text, $start_pos + strlen( '<!--LINK ' ) ), 2 );
  4393. $ns = trim($ns);
  4394. if (empty( $links['internal'][$ns] )) {
  4395. $links['internal'][$ns] = array();
  4396. }
  4397. $key = trim( substr( $trail, 0, strpos( $trail, '-->' ) ) );
  4398. $links['internal'][$ns][] = $this->mLinkHolders->internals[$ns][$key];
  4399. $pos = $start_pos + strlen( "<!--LINK $ns:$key-->" );
  4400. }
  4401. $pos = 0;
  4402. // Interwiki links
  4403. while( ( $start_pos = strpos( $text, '<!--IWLINK ', $pos ) ) ) {
  4404. $data = substr( $text, $start_pos );
  4405. $key = trim( substr( $data, 0, strpos( $data, '-->' ) ) );
  4406. $links['interwiki'][] = $this->mLinkHolders->interwiki[$key];
  4407. $pos = $start_pos + strlen( "<!--IWLINK $key-->" );
  4408. }
  4409. $data['linkholder'] = $links;
  4410. return $data;
  4411. }
  4412. function unserialiseHalfParsedText( $data, $intPrefix = null /* Unique identifying prefix */ ) {
  4413. if (!$intPrefix)
  4414. $intPrefix = $this->getRandomString();
  4415. // First, extract the strip state.
  4416. $stripState = $data['stripstate'];
  4417. $this->mStripState->general->merge( $stripState->general );
  4418. $this->mStripState->nowiki->merge( $stripState->nowiki );
  4419. // Now, extract the text, and renumber links
  4420. $text = $data['text'];
  4421. $links = $data['linkholder'];
  4422. // Internal...
  4423. foreach( $links['internal'] as $ns => $nsLinks ) {
  4424. foreach( $nsLinks as $key => $entry ) {
  4425. $newKey = $intPrefix . '-' . $key;
  4426. $this->mLinkHolders->internals[$ns][$newKey] = $entry;
  4427. $text = str_replace( "<!--LINK $ns:$key-->", "<!--LINK $ns:$newKey-->", $text );
  4428. }
  4429. }
  4430. // Interwiki...
  4431. foreach( $links['interwiki'] as $key => $entry ) {
  4432. $newKey = "$intPrefix-$key";
  4433. $this->mLinkHolders->interwikis[$newKey] = $entry;
  4434. $text = str_replace( "<!--IWLINK $key-->", "<!--IWLINK $newKey-->", $text );
  4435. }
  4436. // Should be good to go.
  4437. return $text;
  4438. }
  4439. }
  4440. /**
  4441. * @todo document, briefly.
  4442. * @ingroup Parser
  4443. */
  4444. class StripState {
  4445. var $general, $nowiki;
  4446. function __construct() {
  4447. $this->general = new ReplacementArray;
  4448. $this->nowiki = new ReplacementArray;
  4449. }
  4450. function unstripGeneral( $text ) {
  4451. wfProfileIn( __METHOD__ );
  4452. do {
  4453. $oldText = $text;
  4454. $text = $this->general->replace( $text );
  4455. } while ( $text !== $oldText );
  4456. wfProfileOut( __METHOD__ );
  4457. return $text;
  4458. }
  4459. function unstripNoWiki( $text ) {
  4460. wfProfileIn( __METHOD__ );
  4461. do {
  4462. $oldText = $text;
  4463. $text = $this->nowiki->replace( $text );
  4464. } while ( $text !== $oldText );
  4465. wfProfileOut( __METHOD__ );
  4466. return $text;
  4467. }
  4468. function unstripBoth( $text ) {
  4469. wfProfileIn( __METHOD__ );
  4470. do {
  4471. $oldText = $text;
  4472. $text = $this->general->replace( $text );
  4473. $text = $this->nowiki->replace( $text );
  4474. } while ( $text !== $oldText );
  4475. wfProfileOut( __METHOD__ );
  4476. return $text;
  4477. }
  4478. }
  4479. /**
  4480. * @todo document, briefly.
  4481. * @ingroup Parser
  4482. */
  4483. class OnlyIncludeReplacer {
  4484. var $output = '';
  4485. function replace( $matches ) {
  4486. if ( substr( $matches[1], -1 ) === "\n" ) {
  4487. $this->output .= substr( $matches[1], 0, -1 );
  4488. } else {
  4489. $this->output .= $matches[1];
  4490. }
  4491. }
  4492. }