Markdown.php 88 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152
  1. <?php
  2. #
  3. # Markdown - A text-to-HTML conversion tool for web writers
  4. #
  5. # PHP Markdown
  6. # Copyright (c) 2004-2014 Michel Fortin
  7. # <http://michelf.com/projects/php-markdown/>
  8. #
  9. # Original Markdown
  10. # Copyright (c) 2004-2006 John Gruber
  11. # <http://daringfireball.net/projects/markdown/>
  12. #
  13. namespace Michelf;
  14. #
  15. # Markdown Parser Class
  16. #
  17. class Markdown implements MarkdownInterface {
  18. ### Version ###
  19. const MARKDOWNLIB_VERSION = "1.4.1";
  20. ### Simple Function Interface ###
  21. public static function defaultTransform($text) {
  22. #
  23. # Initialize the parser and return the result of its transform method.
  24. # This will work fine for derived classes too.
  25. #
  26. # Take parser class on which this function was called.
  27. $parser_class = \get_called_class();
  28. # try to take parser from the static parser list
  29. static $parser_list;
  30. $parser =& $parser_list[$parser_class];
  31. # create the parser it not already set
  32. if (!$parser)
  33. $parser = new $parser_class;
  34. # Transform text using parser.
  35. return $parser->transform($text);
  36. }
  37. ### Configuration Variables ###
  38. # Change to ">" for HTML output.
  39. public $empty_element_suffix = " />";
  40. public $tab_width = 4;
  41. # Change to `true` to disallow markup or entities.
  42. public $no_markup = false;
  43. public $no_entities = false;
  44. # Predefined urls and titles for reference links and images.
  45. public $predef_urls = array();
  46. public $predef_titles = array();
  47. # Optional filter function for URLs
  48. public $url_filter_func = null;
  49. ### Parser Implementation ###
  50. # Regex to match balanced [brackets].
  51. # Needed to insert a maximum bracked depth while converting to PHP.
  52. protected $nested_brackets_depth = 6;
  53. protected $nested_brackets_re;
  54. protected $nested_url_parenthesis_depth = 4;
  55. protected $nested_url_parenthesis_re;
  56. # Table of hash values for escaped characters:
  57. protected $escape_chars = '\`*_{}[]()>#+-.!';
  58. protected $escape_chars_re;
  59. public function __construct() {
  60. #
  61. # Constructor function. Initialize appropriate member variables.
  62. #
  63. $this->_initDetab();
  64. $this->prepareItalicsAndBold();
  65. $this->nested_brackets_re =
  66. str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  67. str_repeat('\])*', $this->nested_brackets_depth);
  68. $this->nested_url_parenthesis_re =
  69. str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
  70. str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  71. $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
  72. # Sort document, block, and span gamut in ascendent priority order.
  73. asort($this->document_gamut);
  74. asort($this->block_gamut);
  75. asort($this->span_gamut);
  76. }
  77. # Internal hashes used during transformation.
  78. protected $urls = array();
  79. protected $titles = array();
  80. protected $html_hashes = array();
  81. # Status flag to avoid invalid nesting.
  82. protected $in_anchor = false;
  83. protected function setup() {
  84. #
  85. # Called before the transformation process starts to setup parser
  86. # states.
  87. #
  88. # Clear global hashes.
  89. $this->urls = $this->predef_urls;
  90. $this->titles = $this->predef_titles;
  91. $this->html_hashes = array();
  92. $this->in_anchor = false;
  93. }
  94. protected function teardown() {
  95. #
  96. # Called after the transformation process to clear any variable
  97. # which may be taking up memory unnecessarly.
  98. #
  99. $this->urls = array();
  100. $this->titles = array();
  101. $this->html_hashes = array();
  102. }
  103. public function transform($text) {
  104. #
  105. # Main function. Performs some preprocessing on the input text
  106. # and pass it through the document gamut.
  107. #
  108. $this->setup();
  109. # Remove UTF-8 BOM and marker character in input, if present.
  110. $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
  111. # Standardize line endings:
  112. # DOS to Unix and Mac to Unix
  113. $text = preg_replace('{\r\n?}', "\n", $text);
  114. # Make sure $text ends with a couple of newlines:
  115. $text .= "\n\n";
  116. # Convert all tabs to spaces.
  117. $text = $this->detab($text);
  118. # Turn block-level HTML blocks into hash entries
  119. $text = $this->hashHTMLBlocks($text);
  120. # Strip any lines consisting only of spaces and tabs.
  121. # This makes subsequent regexen easier to write, because we can
  122. # match consecutive blank lines with /\n+/ instead of something
  123. # contorted like /[ ]*\n+/ .
  124. $text = preg_replace('/^[ ]+$/m', '', $text);
  125. # Run document gamut methods.
  126. foreach ($this->document_gamut as $method => $priority) {
  127. $text = $this->$method($text);
  128. }
  129. $this->teardown();
  130. return $text . "\n";
  131. }
  132. protected $document_gamut = array(
  133. # Strip link definitions, store in hashes.
  134. "stripLinkDefinitions" => 20,
  135. "runBasicBlockGamut" => 30,
  136. );
  137. protected function stripLinkDefinitions($text) {
  138. #
  139. # Strips link definitions from text, stores the URLs and titles in
  140. # hash references.
  141. #
  142. $less_than_tab = $this->tab_width - 1;
  143. # Link defs are in the form: ^[id]: url "optional title"
  144. $text = preg_replace_callback('{
  145. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  146. [ ]*
  147. \n? # maybe *one* newline
  148. [ ]*
  149. (?:
  150. <(.+?)> # url = $2
  151. |
  152. (\S+?) # url = $3
  153. )
  154. [ ]*
  155. \n? # maybe one newline
  156. [ ]*
  157. (?:
  158. (?<=\s) # lookbehind for whitespace
  159. ["(]
  160. (.*?) # title = $4
  161. [")]
  162. [ ]*
  163. )? # title is optional
  164. (?:\n+|\Z)
  165. }xm',
  166. array($this, '_stripLinkDefinitions_callback'),
  167. $text);
  168. return $text;
  169. }
  170. protected function _stripLinkDefinitions_callback($matches) {
  171. $link_id = strtolower($matches[1]);
  172. $url = $matches[2] == '' ? $matches[3] : $matches[2];
  173. $this->urls[$link_id] = $url;
  174. $this->titles[$link_id] =& $matches[4];
  175. return ''; # String that will replace the block
  176. }
  177. protected function hashHTMLBlocks($text) {
  178. if ($this->no_markup) return $text;
  179. $less_than_tab = $this->tab_width - 1;
  180. # Hashify HTML blocks:
  181. # We only want to do this for block-level HTML tags, such as headers,
  182. # lists, and tables. That's because we still want to wrap <p>s around
  183. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  184. # phrase emphasis, and spans. The list of tags we're looking for is
  185. # hard-coded:
  186. #
  187. # * List "a" is made of tags which can be both inline or block-level.
  188. # These will be treated block-level when the start tag is alone on
  189. # its line, otherwise they're not matched here and will be taken as
  190. # inline later.
  191. # * List "b" is made of tags which are always block-level;
  192. #
  193. $block_tags_a_re = 'ins|del';
  194. $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
  195. 'script|noscript|style|form|fieldset|iframe|math|svg|'.
  196. 'article|section|nav|aside|hgroup|header|footer|'.
  197. 'figure';
  198. # Regular expression for the content of a block tag.
  199. $nested_tags_level = 4;
  200. $attr = '
  201. (?> # optional tag attributes
  202. \s # starts with whitespace
  203. (?>
  204. [^>"/]+ # text outside quotes
  205. |
  206. /+(?!>) # slash not followed by ">"
  207. |
  208. "[^"]*" # text inside double quotes (tolerate ">")
  209. |
  210. \'[^\']*\' # text inside single quotes (tolerate ">")
  211. )*
  212. )?
  213. ';
  214. $content =
  215. str_repeat('
  216. (?>
  217. [^<]+ # content without tag
  218. |
  219. <\2 # nested opening tag
  220. '.$attr.' # attributes
  221. (?>
  222. />
  223. |
  224. >', $nested_tags_level). # end of opening tag
  225. '.*?'. # last level nested tag content
  226. str_repeat('
  227. </\2\s*> # closing nested tag
  228. )
  229. |
  230. <(?!/\2\s*> # other tags with a different name
  231. )
  232. )*',
  233. $nested_tags_level);
  234. $content2 = str_replace('\2', '\3', $content);
  235. # First, look for nested blocks, e.g.:
  236. # <div>
  237. # <div>
  238. # tags for inner block must be indented.
  239. # </div>
  240. # </div>
  241. #
  242. # The outermost tags must start at the left margin for this to match, and
  243. # the inner nested divs must be indented.
  244. # We need to do this before the next, more liberal match, because the next
  245. # match will start at the first `<div>` and stop at the first `</div>`.
  246. $text = preg_replace_callback('{(?>
  247. (?>
  248. (?<=\n) # Starting on its own line
  249. | # or
  250. \A\n? # the at beginning of the doc
  251. )
  252. ( # save in $1
  253. # Match from `\n<tag>` to `</tag>\n`, handling nested tags
  254. # in between.
  255. [ ]{0,'.$less_than_tab.'}
  256. <('.$block_tags_b_re.')# start tag = $2
  257. '.$attr.'> # attributes followed by > and \n
  258. '.$content.' # content, support nesting
  259. </\2> # the matching end tag
  260. [ ]* # trailing spaces/tabs
  261. (?=\n+|\Z) # followed by a newline or end of document
  262. | # Special version for tags of group a.
  263. [ ]{0,'.$less_than_tab.'}
  264. <('.$block_tags_a_re.')# start tag = $3
  265. '.$attr.'>[ ]*\n # attributes followed by >
  266. '.$content2.' # content, support nesting
  267. </\3> # the matching end tag
  268. [ ]* # trailing spaces/tabs
  269. (?=\n+|\Z) # followed by a newline or end of document
  270. | # Special case just for <hr />. It was easier to make a special
  271. # case than to make the other regex more complicated.
  272. [ ]{0,'.$less_than_tab.'}
  273. <(hr) # start tag = $2
  274. '.$attr.' # attributes
  275. /?> # the matching end tag
  276. [ ]*
  277. (?=\n{2,}|\Z) # followed by a blank line or end of document
  278. | # Special case for standalone HTML comments:
  279. [ ]{0,'.$less_than_tab.'}
  280. (?s:
  281. <!-- .*? -->
  282. )
  283. [ ]*
  284. (?=\n{2,}|\Z) # followed by a blank line or end of document
  285. | # PHP and ASP-style processor instructions (<? and <%)
  286. [ ]{0,'.$less_than_tab.'}
  287. (?s:
  288. <([?%]) # $2
  289. .*?
  290. \2>
  291. )
  292. [ ]*
  293. (?=\n{2,}|\Z) # followed by a blank line or end of document
  294. )
  295. )}Sxmi',
  296. array($this, '_hashHTMLBlocks_callback'),
  297. $text);
  298. return $text;
  299. }
  300. protected function _hashHTMLBlocks_callback($matches) {
  301. $text = $matches[1];
  302. $key = $this->hashBlock($text);
  303. return "\n\n$key\n\n";
  304. }
  305. protected function hashPart($text, $boundary = 'X') {
  306. #
  307. # Called whenever a tag must be hashed when a function insert an atomic
  308. # element in the text stream. Passing $text to through this function gives
  309. # a unique text-token which will be reverted back when calling unhash.
  310. #
  311. # The $boundary argument specify what character should be used to surround
  312. # the token. By convension, "B" is used for block elements that needs not
  313. # to be wrapped into paragraph tags at the end, ":" is used for elements
  314. # that are word separators and "X" is used in the general case.
  315. #
  316. # Swap back any tag hash found in $text so we do not have to `unhash`
  317. # multiple times at the end.
  318. $text = $this->unhash($text);
  319. # Then hash the block.
  320. static $i = 0;
  321. $key = "$boundary\x1A" . ++$i . $boundary;
  322. $this->html_hashes[$key] = $text;
  323. return $key; # String that will replace the tag.
  324. }
  325. protected function hashBlock($text) {
  326. #
  327. # Shortcut function for hashPart with block-level boundaries.
  328. #
  329. return $this->hashPart($text, 'B');
  330. }
  331. protected $block_gamut = array(
  332. #
  333. # These are all the transformations that form block-level
  334. # tags like paragraphs, headers, and list items.
  335. #
  336. "doHeaders" => 10,
  337. "doHorizontalRules" => 20,
  338. "doLists" => 40,
  339. "doCodeBlocks" => 50,
  340. "doBlockQuotes" => 60,
  341. );
  342. protected function runBlockGamut($text) {
  343. #
  344. # Run block gamut tranformations.
  345. #
  346. # We need to escape raw HTML in Markdown source before doing anything
  347. # else. This need to be done for each block, and not only at the
  348. # begining in the Markdown function since hashed blocks can be part of
  349. # list items and could have been indented. Indented blocks would have
  350. # been seen as a code block in a previous pass of hashHTMLBlocks.
  351. $text = $this->hashHTMLBlocks($text);
  352. return $this->runBasicBlockGamut($text);
  353. }
  354. protected function runBasicBlockGamut($text) {
  355. #
  356. # Run block gamut tranformations, without hashing HTML blocks. This is
  357. # useful when HTML blocks are known to be already hashed, like in the first
  358. # whole-document pass.
  359. #
  360. foreach ($this->block_gamut as $method => $priority) {
  361. $text = $this->$method($text);
  362. }
  363. # Finally form paragraph and restore hashed blocks.
  364. $text = $this->formParagraphs($text);
  365. return $text;
  366. }
  367. protected function doHorizontalRules($text) {
  368. # Do Horizontal Rules:
  369. return preg_replace(
  370. '{
  371. ^[ ]{0,3} # Leading space
  372. ([-*_]) # $1: First marker
  373. (?> # Repeated marker group
  374. [ ]{0,2} # Zero, one, or two spaces.
  375. \1 # Marker character
  376. ){2,} # Group repeated at least twice
  377. [ ]* # Tailing spaces
  378. $ # End of line.
  379. }mx',
  380. "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
  381. $text);
  382. }
  383. protected $span_gamut = array(
  384. #
  385. # These are all the transformations that occur *within* block-level
  386. # tags like paragraphs, headers, and list items.
  387. #
  388. # Process character escapes, code spans, and inline HTML
  389. # in one shot.
  390. "parseSpan" => -30,
  391. # Process anchor and image tags. Images must come first,
  392. # because ![foo][f] looks like an anchor.
  393. "doImages" => 10,
  394. "doAnchors" => 20,
  395. # Make links out of things like `<http://example.com/>`
  396. # Must come after doAnchors, because you can use < and >
  397. # delimiters in inline links like [this](<url>).
  398. "doAutoLinks" => 30,
  399. "encodeAmpsAndAngles" => 40,
  400. "doItalicsAndBold" => 50,
  401. "doHardBreaks" => 60,
  402. );
  403. protected function runSpanGamut($text) {
  404. #
  405. # Run span gamut tranformations.
  406. #
  407. foreach ($this->span_gamut as $method => $priority) {
  408. $text = $this->$method($text);
  409. }
  410. return $text;
  411. }
  412. protected function doHardBreaks($text) {
  413. # Do hard breaks:
  414. return preg_replace_callback('/ {2,}\n/',
  415. array($this, '_doHardBreaks_callback'), $text);
  416. }
  417. protected function _doHardBreaks_callback($matches) {
  418. return $this->hashPart("<br$this->empty_element_suffix\n");
  419. }
  420. protected function doAnchors($text) {
  421. #
  422. # Turn Markdown link shortcuts into XHTML <a> tags.
  423. #
  424. if ($this->in_anchor) return $text;
  425. $this->in_anchor = true;
  426. #
  427. # First, handle reference-style links: [link text] [id]
  428. #
  429. $text = preg_replace_callback('{
  430. ( # wrap whole match in $1
  431. \[
  432. ('.$this->nested_brackets_re.') # link text = $2
  433. \]
  434. [ ]? # one optional space
  435. (?:\n[ ]*)? # one optional newline followed by spaces
  436. \[
  437. (.*?) # id = $3
  438. \]
  439. )
  440. }xs',
  441. array($this, '_doAnchors_reference_callback'), $text);
  442. #
  443. # Next, inline-style links: [link text](url "optional title")
  444. #
  445. $text = preg_replace_callback('{
  446. ( # wrap whole match in $1
  447. \[
  448. ('.$this->nested_brackets_re.') # link text = $2
  449. \]
  450. \( # literal paren
  451. [ \n]*
  452. (?:
  453. <(.+?)> # href = $3
  454. |
  455. ('.$this->nested_url_parenthesis_re.') # href = $4
  456. )
  457. [ \n]*
  458. ( # $5
  459. ([\'"]) # quote char = $6
  460. (.*?) # Title = $7
  461. \6 # matching quote
  462. [ \n]* # ignore any spaces/tabs between closing quote and )
  463. )? # title is optional
  464. \)
  465. )
  466. }xs',
  467. array($this, '_doAnchors_inline_callback'), $text);
  468. #
  469. # Last, handle reference-style shortcuts: [link text]
  470. # These must come last in case you've also got [link text][1]
  471. # or [link text](/foo)
  472. #
  473. $text = preg_replace_callback('{
  474. ( # wrap whole match in $1
  475. \[
  476. ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  477. \]
  478. )
  479. }xs',
  480. array($this, '_doAnchors_reference_callback'), $text);
  481. $this->in_anchor = false;
  482. return $text;
  483. }
  484. protected function _doAnchors_reference_callback($matches) {
  485. $whole_match = $matches[1];
  486. $link_text = $matches[2];
  487. $link_id =& $matches[3];
  488. if ($link_id == "") {
  489. # for shortcut links like [this][] or [this].
  490. $link_id = $link_text;
  491. }
  492. # lower-case and turn embedded newlines into spaces
  493. $link_id = strtolower($link_id);
  494. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  495. if (isset($this->urls[$link_id])) {
  496. $url = $this->urls[$link_id];
  497. $url = $this->encodeURLAttribute($url);
  498. $result = "<a href=\"$url\"";
  499. if ( isset( $this->titles[$link_id] ) ) {
  500. $title = $this->titles[$link_id];
  501. $title = $this->encodeAttribute($title);
  502. $result .= " title=\"$title\"";
  503. }
  504. $link_text = $this->runSpanGamut($link_text);
  505. $result .= ">$link_text</a>";
  506. $result = $this->hashPart($result);
  507. }
  508. else {
  509. $result = $whole_match;
  510. }
  511. return $result;
  512. }
  513. protected function _doAnchors_inline_callback($matches) {
  514. $whole_match = $matches[1];
  515. $link_text = $this->runSpanGamut($matches[2]);
  516. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  517. $title =& $matches[7];
  518. // if the URL was of the form <s p a c e s> it got caught by the HTML
  519. // tag parser and hashed. Need to reverse the process before using the URL.
  520. $unhashed = $this->unhash($url);
  521. if ($unhashed != $url)
  522. $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
  523. $url = $this->encodeURLAttribute($url);
  524. $result = "<a href=\"$url\"";
  525. if (isset($title)) {
  526. $title = $this->encodeAttribute($title);
  527. $result .= " title=\"$title\"";
  528. }
  529. $link_text = $this->runSpanGamut($link_text);
  530. $result .= ">$link_text</a>";
  531. return $this->hashPart($result);
  532. }
  533. protected function doImages($text) {
  534. #
  535. # Turn Markdown image shortcuts into <img> tags.
  536. #
  537. #
  538. # First, handle reference-style labeled images: ![alt text][id]
  539. #
  540. $text = preg_replace_callback('{
  541. ( # wrap whole match in $1
  542. !\[
  543. ('.$this->nested_brackets_re.') # alt text = $2
  544. \]
  545. [ ]? # one optional space
  546. (?:\n[ ]*)? # one optional newline followed by spaces
  547. \[
  548. (.*?) # id = $3
  549. \]
  550. )
  551. }xs',
  552. array($this, '_doImages_reference_callback'), $text);
  553. #
  554. # Next, handle inline images: ![alt text](url "optional title")
  555. # Don't forget: encode * and _
  556. #
  557. $text = preg_replace_callback('{
  558. ( # wrap whole match in $1
  559. !\[
  560. ('.$this->nested_brackets_re.') # alt text = $2
  561. \]
  562. \s? # One optional whitespace character
  563. \( # literal paren
  564. [ \n]*
  565. (?:
  566. <(\S*)> # src url = $3
  567. |
  568. ('.$this->nested_url_parenthesis_re.') # src url = $4
  569. )
  570. [ \n]*
  571. ( # $5
  572. ([\'"]) # quote char = $6
  573. (.*?) # title = $7
  574. \6 # matching quote
  575. [ \n]*
  576. )? # title is optional
  577. \)
  578. )
  579. }xs',
  580. array($this, '_doImages_inline_callback'), $text);
  581. return $text;
  582. }
  583. protected function _doImages_reference_callback($matches) {
  584. $whole_match = $matches[1];
  585. $alt_text = $matches[2];
  586. $link_id = strtolower($matches[3]);
  587. if ($link_id == "") {
  588. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  589. }
  590. $alt_text = $this->encodeAttribute($alt_text);
  591. if (isset($this->urls[$link_id])) {
  592. $url = $this->encodeURLAttribute($this->urls[$link_id]);
  593. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  594. if (isset($this->titles[$link_id])) {
  595. $title = $this->titles[$link_id];
  596. $title = $this->encodeAttribute($title);
  597. $result .= " title=\"$title\"";
  598. }
  599. $result .= $this->empty_element_suffix;
  600. $result = $this->hashPart($result);
  601. }
  602. else {
  603. # If there's no such link ID, leave intact:
  604. $result = $whole_match;
  605. }
  606. return $result;
  607. }
  608. protected function _doImages_inline_callback($matches) {
  609. $whole_match = $matches[1];
  610. $alt_text = $matches[2];
  611. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  612. $title =& $matches[7];
  613. $alt_text = $this->encodeAttribute($alt_text);
  614. $url = $this->encodeURLAttribute($url);
  615. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  616. if (isset($title)) {
  617. $title = $this->encodeAttribute($title);
  618. $result .= " title=\"$title\""; # $title already quoted
  619. }
  620. $result .= $this->empty_element_suffix;
  621. return $this->hashPart($result);
  622. }
  623. protected function doHeaders($text) {
  624. # Setext-style headers:
  625. # Header 1
  626. # ========
  627. #
  628. # Header 2
  629. # --------
  630. #
  631. $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
  632. array($this, '_doHeaders_callback_setext'), $text);
  633. # atx-style headers:
  634. # # Header 1
  635. # ## Header 2
  636. # ## Header 2 with closing hashes ##
  637. # ...
  638. # ###### Header 6
  639. #
  640. $text = preg_replace_callback('{
  641. ^(\#{1,6}) # $1 = string of #\'s
  642. [ ]*
  643. (.+?) # $2 = Header text
  644. [ ]*
  645. \#* # optional closing #\'s (not counted)
  646. \n+
  647. }xm',
  648. array($this, '_doHeaders_callback_atx'), $text);
  649. return $text;
  650. }
  651. protected function _doHeaders_callback_setext($matches) {
  652. # Terrible hack to check we haven't found an empty list item.
  653. if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
  654. return $matches[0];
  655. $level = $matches[2]{0} == '=' ? 1 : 2;
  656. $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
  657. return "\n" . $this->hashBlock($block) . "\n\n";
  658. }
  659. protected function _doHeaders_callback_atx($matches) {
  660. $level = strlen($matches[1]);
  661. $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
  662. return "\n" . $this->hashBlock($block) . "\n\n";
  663. }
  664. protected function doLists($text) {
  665. #
  666. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  667. #
  668. $less_than_tab = $this->tab_width - 1;
  669. # Re-usable patterns to match list item bullets and number markers:
  670. $marker_ul_re = '[*+-]';
  671. $marker_ol_re = '\d+[\.]';
  672. $markers_relist = array(
  673. $marker_ul_re => $marker_ol_re,
  674. $marker_ol_re => $marker_ul_re,
  675. );
  676. foreach ($markers_relist as $marker_re => $other_marker_re) {
  677. # Re-usable pattern to match any entirel ul or ol list:
  678. $whole_list_re = '
  679. ( # $1 = whole list
  680. ( # $2
  681. ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces
  682. ('.$marker_re.') # $4 = first list item marker
  683. [ ]+
  684. )
  685. (?s:.+?)
  686. ( # $5
  687. \z
  688. |
  689. \n{2,}
  690. (?=\S)
  691. (?! # Negative lookahead for another list item marker
  692. [ ]*
  693. '.$marker_re.'[ ]+
  694. )
  695. |
  696. (?= # Lookahead for another kind of list
  697. \n
  698. \3 # Must have the same indentation
  699. '.$other_marker_re.'[ ]+
  700. )
  701. )
  702. )
  703. '; // mx
  704. # We use a different prefix before nested lists than top-level lists.
  705. # See extended comment in _ProcessListItems().
  706. if ($this->list_level) {
  707. $text = preg_replace_callback('{
  708. ^
  709. '.$whole_list_re.'
  710. }mx',
  711. array($this, '_doLists_callback'), $text);
  712. }
  713. else {
  714. $text = preg_replace_callback('{
  715. (?:(?<=\n)\n|\A\n?) # Must eat the newline
  716. '.$whole_list_re.'
  717. }mx',
  718. array($this, '_doLists_callback'), $text);
  719. }
  720. }
  721. return $text;
  722. }
  723. protected function _doLists_callback($matches) {
  724. # Re-usable patterns to match list item bullets and number markers:
  725. $marker_ul_re = '[*+-]';
  726. $marker_ol_re = '\d+[\.]';
  727. $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
  728. $list = $matches[1];
  729. $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
  730. $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
  731. $list .= "\n";
  732. $result = $this->processListItems($list, $marker_any_re);
  733. $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
  734. return "\n". $result ."\n\n";
  735. }
  736. protected $list_level = 0;
  737. protected function processListItems($list_str, $marker_any_re) {
  738. #
  739. # Process the contents of a single ordered or unordered list, splitting it
  740. # into individual list items.
  741. #
  742. # The $this->list_level global keeps track of when we're inside a list.
  743. # Each time we enter a list, we increment it; when we leave a list,
  744. # we decrement. If it's zero, we're not in a list anymore.
  745. #
  746. # We do this because when we're not inside a list, we want to treat
  747. # something like this:
  748. #
  749. # I recommend upgrading to version
  750. # 8. Oops, now this line is treated
  751. # as a sub-list.
  752. #
  753. # As a single paragraph, despite the fact that the second line starts
  754. # with a digit-period-space sequence.
  755. #
  756. # Whereas when we're inside a list (or sub-list), that line will be
  757. # treated as the start of a sub-list. What a kludge, huh? This is
  758. # an aspect of Markdown's syntax that's hard to parse perfectly
  759. # without resorting to mind-reading. Perhaps the solution is to
  760. # change the syntax rules such that sub-lists must start with a
  761. # starting cardinal number; e.g. "1." or "a.".
  762. $this->list_level++;
  763. # trim trailing blank lines:
  764. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  765. $list_str = preg_replace_callback('{
  766. (\n)? # leading line = $1
  767. (^[ ]*) # leading whitespace = $2
  768. ('.$marker_any_re.' # list marker and space = $3
  769. (?:[ ]+|(?=\n)) # space only required if item is not empty
  770. )
  771. ((?s:.*?)) # list item text = $4
  772. (?:(\n+(?=\n))|\n) # tailing blank line = $5
  773. (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
  774. }xm',
  775. array($this, '_processListItems_callback'), $list_str);
  776. $this->list_level--;
  777. return $list_str;
  778. }
  779. protected function _processListItems_callback($matches) {
  780. $item = $matches[4];
  781. $leading_line =& $matches[1];
  782. $leading_space =& $matches[2];
  783. $marker_space = $matches[3];
  784. $tailing_blank_line =& $matches[5];
  785. if ($leading_line || $tailing_blank_line ||
  786. preg_match('/\n{2,}/', $item))
  787. {
  788. # Replace marker with the appropriate whitespace indentation
  789. $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
  790. $item = $this->runBlockGamut($this->outdent($item)."\n");
  791. }
  792. else {
  793. # Recursion for sub-lists:
  794. $item = $this->doLists($this->outdent($item));
  795. $item = preg_replace('/\n+$/', '', $item);
  796. $item = $this->runSpanGamut($item);
  797. }
  798. return "<li>" . $item . "</li>\n";
  799. }
  800. protected function doCodeBlocks($text) {
  801. #
  802. # Process Markdown `<pre><code>` blocks.
  803. #
  804. $text = preg_replace_callback('{
  805. (?:\n\n|\A\n?)
  806. ( # $1 = the code block -- one or more lines, starting with a space/tab
  807. (?>
  808. [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
  809. .*\n+
  810. )+
  811. )
  812. ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  813. }xm',
  814. array($this, '_doCodeBlocks_callback'), $text);
  815. return $text;
  816. }
  817. protected function _doCodeBlocks_callback($matches) {
  818. $codeblock = $matches[1];
  819. $codeblock = $this->outdent($codeblock);
  820. $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
  821. # trim leading newlines and trailing newlines
  822. $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
  823. $codeblock = "<pre><code>$codeblock\n</code></pre>";
  824. return "\n\n".$this->hashBlock($codeblock)."\n\n";
  825. }
  826. protected function makeCodeSpan($code) {
  827. #
  828. # Create a code span markup for $code. Called from handleSpanToken.
  829. #
  830. $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
  831. return $this->hashPart("<code>$code</code>");
  832. }
  833. protected $em_relist = array(
  834. '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
  835. '*' => '(?<![\s*])\*(?!\*)',
  836. '_' => '(?<![\s_])_(?!_)',
  837. );
  838. protected $strong_relist = array(
  839. '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
  840. '**' => '(?<![\s*])\*\*(?!\*)',
  841. '__' => '(?<![\s_])__(?!_)',
  842. );
  843. protected $em_strong_relist = array(
  844. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
  845. '***' => '(?<![\s*])\*\*\*(?!\*)',
  846. '___' => '(?<![\s_])___(?!_)',
  847. );
  848. protected $em_strong_prepared_relist;
  849. protected function prepareItalicsAndBold() {
  850. #
  851. # Prepare regular expressions for searching emphasis tokens in any
  852. # context.
  853. #
  854. foreach ($this->em_relist as $em => $em_re) {
  855. foreach ($this->strong_relist as $strong => $strong_re) {
  856. # Construct list of allowed token expressions.
  857. $token_relist = array();
  858. if (isset($this->em_strong_relist["$em$strong"])) {
  859. $token_relist[] = $this->em_strong_relist["$em$strong"];
  860. }
  861. $token_relist[] = $em_re;
  862. $token_relist[] = $strong_re;
  863. # Construct master expression from list.
  864. $token_re = '{('. implode('|', $token_relist) .')}';
  865. $this->em_strong_prepared_relist["$em$strong"] = $token_re;
  866. }
  867. }
  868. }
  869. protected function doItalicsAndBold($text) {
  870. $token_stack = array('');
  871. $text_stack = array('');
  872. $em = '';
  873. $strong = '';
  874. $tree_char_em = false;
  875. while (1) {
  876. #
  877. # Get prepared regular expression for seraching emphasis tokens
  878. # in current context.
  879. #
  880. $token_re = $this->em_strong_prepared_relist["$em$strong"];
  881. #
  882. # Each loop iteration search for the next emphasis token.
  883. # Each token is then passed to handleSpanToken.
  884. #
  885. $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  886. $text_stack[0] .= $parts[0];
  887. $token =& $parts[1];
  888. $text =& $parts[2];
  889. if (empty($token)) {
  890. # Reached end of text span: empty stack without emitting.
  891. # any more emphasis.
  892. while ($token_stack[0]) {
  893. $text_stack[1] .= array_shift($token_stack);
  894. $text_stack[0] .= array_shift($text_stack);
  895. }
  896. break;
  897. }
  898. $token_len = strlen($token);
  899. if ($tree_char_em) {
  900. # Reached closing marker while inside a three-char emphasis.
  901. if ($token_len == 3) {
  902. # Three-char closing marker, close em and strong.
  903. array_shift($token_stack);
  904. $span = array_shift($text_stack);
  905. $span = $this->runSpanGamut($span);
  906. $span = "<strong><em>$span</em></strong>";
  907. $text_stack[0] .= $this->hashPart($span);
  908. $em = '';
  909. $strong = '';
  910. } else {
  911. # Other closing marker: close one em or strong and
  912. # change current token state to match the other
  913. $token_stack[0] = str_repeat($token{0}, 3-$token_len);
  914. $tag = $token_len == 2 ? "strong" : "em";
  915. $span = $text_stack[0];
  916. $span = $this->runSpanGamut($span);
  917. $span = "<$tag>$span</$tag>";
  918. $text_stack[0] = $this->hashPart($span);
  919. $$tag = ''; # $$tag stands for $em or $strong
  920. }
  921. $tree_char_em = false;
  922. } else if ($token_len == 3) {
  923. if ($em) {
  924. # Reached closing marker for both em and strong.
  925. # Closing strong marker:
  926. for ($i = 0; $i < 2; ++$i) {
  927. $shifted_token = array_shift($token_stack);
  928. $tag = strlen($shifted_token) == 2 ? "strong" : "em";
  929. $span = array_shift($text_stack);
  930. $span = $this->runSpanGamut($span);
  931. $span = "<$tag>$span</$tag>";
  932. $text_stack[0] .= $this->hashPart($span);
  933. $$tag = ''; # $$tag stands for $em or $strong
  934. }
  935. } else {
  936. # Reached opening three-char emphasis marker. Push on token
  937. # stack; will be handled by the special condition above.
  938. $em = $token{0};
  939. $strong = "$em$em";
  940. array_unshift($token_stack, $token);
  941. array_unshift($text_stack, '');
  942. $tree_char_em = true;
  943. }
  944. } else if ($token_len == 2) {
  945. if ($strong) {
  946. # Unwind any dangling emphasis marker:
  947. if (strlen($token_stack[0]) == 1) {
  948. $text_stack[1] .= array_shift($token_stack);
  949. $text_stack[0] .= array_shift($text_stack);
  950. }
  951. # Closing strong marker:
  952. array_shift($token_stack);
  953. $span = array_shift($text_stack);
  954. $span = $this->runSpanGamut($span);
  955. $span = "<strong>$span</strong>";
  956. $text_stack[0] .= $this->hashPart($span);
  957. $strong = '';
  958. } else {
  959. array_unshift($token_stack, $token);
  960. array_unshift($text_stack, '');
  961. $strong = $token;
  962. }
  963. } else {
  964. # Here $token_len == 1
  965. if ($em) {
  966. if (strlen($token_stack[0]) == 1) {
  967. # Closing emphasis marker:
  968. array_shift($token_stack);
  969. $span = array_shift($text_stack);
  970. $span = $this->runSpanGamut($span);
  971. $span = "<em>$span</em>";
  972. $text_stack[0] .= $this->hashPart($span);
  973. $em = '';
  974. } else {
  975. $text_stack[0] .= $token;
  976. }
  977. } else {
  978. array_unshift($token_stack, $token);
  979. array_unshift($text_stack, '');
  980. $em = $token;
  981. }
  982. }
  983. }
  984. return $text_stack[0];
  985. }
  986. protected function doBlockQuotes($text) {
  987. $text = preg_replace_callback('/
  988. ( # Wrap whole match in $1
  989. (?>
  990. ^[ ]*>[ ]? # ">" at the start of a line
  991. .+\n # rest of the first line
  992. (.+\n)* # subsequent consecutive lines
  993. \n* # blanks
  994. )+
  995. )
  996. /xm',
  997. array($this, '_doBlockQuotes_callback'), $text);
  998. return $text;
  999. }
  1000. protected function _doBlockQuotes_callback($matches) {
  1001. $bq = $matches[1];
  1002. # trim one level of quoting - trim whitespace-only lines
  1003. $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
  1004. $bq = $this->runBlockGamut($bq); # recurse
  1005. $bq = preg_replace('/^/m', " ", $bq);
  1006. # These leading spaces cause problem with <pre> content,
  1007. # so we need to fix that:
  1008. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
  1009. array($this, '_doBlockQuotes_callback2'), $bq);
  1010. return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
  1011. }
  1012. protected function _doBlockQuotes_callback2($matches) {
  1013. $pre = $matches[1];
  1014. $pre = preg_replace('/^ /m', '', $pre);
  1015. return $pre;
  1016. }
  1017. protected function formParagraphs($text) {
  1018. #
  1019. # Params:
  1020. # $text - string to process with html <p> tags
  1021. #
  1022. # Strip leading and trailing lines:
  1023. $text = preg_replace('/\A\n+|\n+\z/', '', $text);
  1024. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1025. #
  1026. # Wrap <p> tags and unhashify HTML blocks
  1027. #
  1028. foreach ($grafs as $key => $value) {
  1029. if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
  1030. # Is a paragraph.
  1031. $value = $this->runSpanGamut($value);
  1032. $value = preg_replace('/^([ ]*)/', "<p>", $value);
  1033. $value .= "</p>";
  1034. $grafs[$key] = $this->unhash($value);
  1035. }
  1036. else {
  1037. # Is a block.
  1038. # Modify elements of @grafs in-place...
  1039. $graf = $value;
  1040. $block = $this->html_hashes[$graf];
  1041. $graf = $block;
  1042. // if (preg_match('{
  1043. // \A
  1044. // ( # $1 = <div> tag
  1045. // <div \s+
  1046. // [^>]*
  1047. // \b
  1048. // markdown\s*=\s* ([\'"]) # $2 = attr quote char
  1049. // 1
  1050. // \2
  1051. // [^>]*
  1052. // >
  1053. // )
  1054. // ( # $3 = contents
  1055. // .*
  1056. // )
  1057. // (</div>) # $4 = closing tag
  1058. // \z
  1059. // }xs', $block, $matches))
  1060. // {
  1061. // list(, $div_open, , $div_content, $div_close) = $matches;
  1062. //
  1063. // # We can't call Markdown(), because that resets the hash;
  1064. // # that initialization code should be pulled into its own sub, though.
  1065. // $div_content = $this->hashHTMLBlocks($div_content);
  1066. //
  1067. // # Run document gamut methods on the content.
  1068. // foreach ($this->document_gamut as $method => $priority) {
  1069. // $div_content = $this->$method($div_content);
  1070. // }
  1071. //
  1072. // $div_open = preg_replace(
  1073. // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
  1074. //
  1075. // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
  1076. // }
  1077. $grafs[$key] = $graf;
  1078. }
  1079. }
  1080. return implode("\n\n", $grafs);
  1081. }
  1082. protected function encodeAttribute($text) {
  1083. #
  1084. # Encode text for a double-quoted HTML attribute. This function
  1085. # is *not* suitable for attributes enclosed in single quotes.
  1086. #
  1087. $text = $this->encodeAmpsAndAngles($text);
  1088. $text = str_replace('"', '&quot;', $text);
  1089. return $text;
  1090. }
  1091. protected function encodeURLAttribute($url, &$text = null) {
  1092. #
  1093. # Encode text for a double-quoted HTML attribute containing a URL,
  1094. # applying the URL filter if set. Also generates the textual
  1095. # representation for the URL (removing mailto: or tel:) storing it in $text.
  1096. # This function is *not* suitable for attributes enclosed in single quotes.
  1097. #
  1098. if ($this->url_filter_func)
  1099. $url = call_user_func($this->url_filter_func, $url);
  1100. if (preg_match('{^mailto:}i', $url))
  1101. $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
  1102. else if (preg_match('{^tel:}i', $url))
  1103. {
  1104. $url = $this->encodeAttribute($url);
  1105. $text = substr($url, 4);
  1106. }
  1107. else
  1108. {
  1109. $url = $this->encodeAttribute($url);
  1110. $text = $url;
  1111. }
  1112. return $url;
  1113. }
  1114. protected function encodeAmpsAndAngles($text) {
  1115. #
  1116. # Smart processing for ampersands and angle brackets that need to
  1117. # be encoded. Valid character entities are left alone unless the
  1118. # no-entities mode is set.
  1119. #
  1120. if ($this->no_entities) {
  1121. $text = str_replace('&', '&amp;', $text);
  1122. } else {
  1123. # Ampersand-encoding based entirely on Nat Irons's Amputator
  1124. # MT plugin: <http://bumppo.net/projects/amputator/>
  1125. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
  1126. '&amp;', $text);
  1127. }
  1128. # Encode remaining <'s
  1129. $text = str_replace('<', '&lt;', $text);
  1130. return $text;
  1131. }
  1132. protected function doAutoLinks($text) {
  1133. $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
  1134. array($this, '_doAutoLinks_url_callback'), $text);
  1135. # Email addresses: <address@domain.foo>
  1136. $text = preg_replace_callback('{
  1137. <
  1138. (?:mailto:)?
  1139. (
  1140. (?:
  1141. [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
  1142. |
  1143. ".*?"
  1144. )
  1145. \@
  1146. (?:
  1147. [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
  1148. |
  1149. \[[\d.a-fA-F:]+\] # IPv4 & IPv6
  1150. )
  1151. )
  1152. >
  1153. }xi',
  1154. array($this, '_doAutoLinks_email_callback'), $text);
  1155. return $text;
  1156. }
  1157. protected function _doAutoLinks_url_callback($matches) {
  1158. $url = $this->encodeURLAttribute($matches[1], $text);
  1159. $link = "<a href=\"$url\">$text</a>";
  1160. return $this->hashPart($link);
  1161. }
  1162. protected function _doAutoLinks_email_callback($matches) {
  1163. $addr = $matches[1];
  1164. $url = $this->encodeURLAttribute("mailto:$addr", $text);
  1165. $link = "<a href=\"$url\">$text</a>";
  1166. return $this->hashPart($link);
  1167. }
  1168. protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
  1169. #
  1170. # Input: some text to obfuscate, e.g. "mailto:foo@example.com"
  1171. #
  1172. # Output: the same text but with most characters encoded as either a
  1173. # decimal or hex entity, in the hopes of foiling most address
  1174. # harvesting spam bots. E.g.:
  1175. #
  1176. # &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
  1177. # &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
  1178. # &#x6d;
  1179. #
  1180. # Note: the additional output $tail is assigned the same value as the
  1181. # ouput, minus the number of characters specified by $head_length.
  1182. #
  1183. # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
  1184. # With some optimizations by Milian Wolff. Forced encoding of HTML
  1185. # attribute special characters by Allan Odgaard.
  1186. #
  1187. if ($text == "") return $tail = "";
  1188. $chars = preg_split('/(?<!^)(?!$)/', $text);
  1189. $seed = (int)abs(crc32($text) / strlen($text)); # Deterministic seed.
  1190. foreach ($chars as $key => $char) {
  1191. $ord = ord($char);
  1192. # Ignore non-ascii chars.
  1193. if ($ord < 128) {
  1194. $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
  1195. # roughly 10% raw, 45% hex, 45% dec
  1196. # '@' *must* be encoded. I insist.
  1197. # '"' and '>' have to be encoded inside the attribute
  1198. if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */;
  1199. else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
  1200. else $chars[$key] = '&#'.$ord.';';
  1201. }
  1202. }
  1203. $text = implode('', $chars);
  1204. $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
  1205. return $text;
  1206. }
  1207. protected function parseSpan($str) {
  1208. #
  1209. # Take the string $str and parse it into tokens, hashing embeded HTML,
  1210. # escaped characters and handling code spans.
  1211. #
  1212. $output = '';
  1213. $span_re = '{
  1214. (
  1215. \\\\'.$this->escape_chars_re.'
  1216. |
  1217. (?<![`\\\\])
  1218. `+ # code span marker
  1219. '.( $this->no_markup ? '' : '
  1220. |
  1221. <!-- .*? --> # comment
  1222. |
  1223. <\?.*?\?> | <%.*?%> # processing instruction
  1224. |
  1225. <[!$]?[-a-zA-Z0-9:_]+ # regular tags
  1226. (?>
  1227. \s
  1228. (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
  1229. )?
  1230. >
  1231. |
  1232. <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
  1233. |
  1234. </[-a-zA-Z0-9:_]+\s*> # closing tag
  1235. ').'
  1236. )
  1237. }xs';
  1238. while (1) {
  1239. #
  1240. # Each loop iteration seach for either the next tag, the next
  1241. # openning code span marker, or the next escaped character.
  1242. # Each token is then passed to handleSpanToken.
  1243. #
  1244. $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
  1245. # Create token from text preceding tag.
  1246. if ($parts[0] != "") {
  1247. $output .= $parts[0];
  1248. }
  1249. # Check if we reach the end.
  1250. if (isset($parts[1])) {
  1251. $output .= $this->handleSpanToken($parts[1], $parts[2]);
  1252. $str = $parts[2];
  1253. }
  1254. else {
  1255. break;
  1256. }
  1257. }
  1258. return $output;
  1259. }
  1260. protected function handleSpanToken($token, &$str) {
  1261. #
  1262. # Handle $token provided by parseSpan by determining its nature and
  1263. # returning the corresponding value that should replace it.
  1264. #
  1265. switch ($token{0}) {
  1266. case "\\":
  1267. return $this->hashPart("&#". ord($token{1}). ";");
  1268. case "`":
  1269. # Search for end marker in remaining text.
  1270. if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
  1271. $str, $matches))
  1272. {
  1273. $str = $matches[2];
  1274. $codespan = $this->makeCodeSpan($matches[1]);
  1275. return $this->hashPart($codespan);
  1276. }
  1277. return $token; // return as text since no ending marker found.
  1278. default:
  1279. return $this->hashPart($token);
  1280. }
  1281. }
  1282. protected function outdent($text) {
  1283. #
  1284. # Remove one level of line-leading tabs or spaces
  1285. #
  1286. return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
  1287. }
  1288. # String length function for detab. `_initDetab` will create a function to
  1289. # hanlde UTF-8 if the default function does not exist.
  1290. protected $utf8_strlen = 'mb_strlen';
  1291. protected function detab($text) {
  1292. #
  1293. # Replace tabs with the appropriate amount of space.
  1294. #
  1295. # For each line we separate the line in blocks delemited by
  1296. # tab characters. Then we reconstruct every line by adding the
  1297. # appropriate number of space between each blocks.
  1298. $text = preg_replace_callback('/^.*\t.*$/m',
  1299. array($this, '_detab_callback'), $text);
  1300. return $text;
  1301. }
  1302. protected function _detab_callback($matches) {
  1303. $line = $matches[0];
  1304. $strlen = $this->utf8_strlen; # strlen function for UTF-8.
  1305. # Split in blocks.
  1306. $blocks = explode("\t", $line);
  1307. # Add each blocks to the line.
  1308. $line = $blocks[0];
  1309. unset($blocks[0]); # Do not add first block twice.
  1310. foreach ($blocks as $block) {
  1311. # Calculate amount of space, insert spaces, insert block.
  1312. $amount = $this->tab_width -
  1313. $strlen($line, 'UTF-8') % $this->tab_width;
  1314. $line .= str_repeat(" ", $amount) . $block;
  1315. }
  1316. return $line;
  1317. }
  1318. protected function _initDetab() {
  1319. #
  1320. # Check for the availability of the function in the `utf8_strlen` property
  1321. # (initially `mb_strlen`). If the function is not available, create a
  1322. # function that will loosely count the number of UTF-8 characters with a
  1323. # regular expression.
  1324. #
  1325. if (function_exists($this->utf8_strlen)) return;
  1326. $this->utf8_strlen = create_function('$text', 'return preg_match_all(
  1327. "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
  1328. $text, $m);');
  1329. }
  1330. protected function unhash($text) {
  1331. #
  1332. # Swap back in all the tags hashed by _HashHTMLBlocks.
  1333. #
  1334. return preg_replace_callback('/(.)\x1A[0-9]+\1/',
  1335. array($this, '_unhash_callback'), $text);
  1336. }
  1337. protected function _unhash_callback($matches) {
  1338. return $this->html_hashes[$matches[0]];
  1339. }
  1340. }
  1341. #
  1342. # Temporary Markdown Extra Parser Implementation Class
  1343. #
  1344. # NOTE: DON'T USE THIS CLASS
  1345. # Currently the implementation of of Extra resides here in this temporary class.
  1346. # This makes it easier to propagate the changes between the three different
  1347. # packaging styles of PHP Markdown. When this issue is resolved, this
  1348. # MarkdownExtra_TmpImpl class here will disappear and \Michelf\MarkdownExtra
  1349. # will contain the code. So please use \Michelf\MarkdownExtra and ignore this
  1350. # one.
  1351. #
  1352. abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
  1353. ### Configuration Variables ###
  1354. # Prefix for footnote ids.
  1355. public $fn_id_prefix = "";
  1356. # Optional title attribute for footnote links and backlinks.
  1357. public $fn_link_title = "";
  1358. public $fn_backlink_title = "";
  1359. # Optional class attribute for footnote links and backlinks.
  1360. public $fn_link_class = "footnote-ref";
  1361. public $fn_backlink_class = "footnote-backref";
  1362. # Class name for table cell alignment (%% replaced left/center/right)
  1363. # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
  1364. # If empty, the align attribute is used instead of a class name.
  1365. public $table_align_class_tmpl = '';
  1366. # Optional class prefix for fenced code block.
  1367. public $code_class_prefix = "";
  1368. # Class attribute for code blocks goes on the `code` tag;
  1369. # setting this to true will put attributes on the `pre` tag instead.
  1370. public $code_attr_on_pre = false;
  1371. # Predefined abbreviations.
  1372. public $predef_abbr = array();
  1373. ### Parser Implementation ###
  1374. public function __construct() {
  1375. #
  1376. # Constructor function. Initialize the parser object.
  1377. #
  1378. # Add extra escapable characters before parent constructor
  1379. # initialize the table.
  1380. $this->escape_chars .= ':|';
  1381. # Insert extra document, block, and span transformations.
  1382. # Parent constructor will do the sorting.
  1383. $this->document_gamut += array(
  1384. "doFencedCodeBlocks" => 5,
  1385. "stripFootnotes" => 15,
  1386. "stripAbbreviations" => 25,
  1387. "appendFootnotes" => 50,
  1388. );
  1389. $this->block_gamut += array(
  1390. "doFencedCodeBlocks" => 5,
  1391. "doTables" => 15,
  1392. "doDefLists" => 45,
  1393. );
  1394. $this->span_gamut += array(
  1395. "doFootnotes" => 5,
  1396. "doAbbreviations" => 70,
  1397. );
  1398. parent::__construct();
  1399. }
  1400. # Extra variables used during extra transformations.
  1401. protected $footnotes = array();
  1402. protected $footnotes_ordered = array();
  1403. protected $footnotes_ref_count = array();
  1404. protected $footnotes_numbers = array();
  1405. protected $abbr_desciptions = array();
  1406. protected $abbr_word_re = '';
  1407. # Give the current footnote number.
  1408. protected $footnote_counter = 1;
  1409. protected function setup() {
  1410. #
  1411. # Setting up Extra-specific variables.
  1412. #
  1413. parent::setup();
  1414. $this->footnotes = array();
  1415. $this->footnotes_ordered = array();
  1416. $this->footnotes_ref_count = array();
  1417. $this->footnotes_numbers = array();
  1418. $this->abbr_desciptions = array();
  1419. $this->abbr_word_re = '';
  1420. $this->footnote_counter = 1;
  1421. foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
  1422. if ($this->abbr_word_re)
  1423. $this->abbr_word_re .= '|';
  1424. $this->abbr_word_re .= preg_quote($abbr_word);
  1425. $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
  1426. }
  1427. }
  1428. protected function teardown() {
  1429. #
  1430. # Clearing Extra-specific variables.
  1431. #
  1432. $this->footnotes = array();
  1433. $this->footnotes_ordered = array();
  1434. $this->footnotes_ref_count = array();
  1435. $this->footnotes_numbers = array();
  1436. $this->abbr_desciptions = array();
  1437. $this->abbr_word_re = '';
  1438. parent::teardown();
  1439. }
  1440. ### Extra Attribute Parser ###
  1441. # Expression to use to catch attributes (includes the braces)
  1442. protected $id_class_attr_catch_re = '\{((?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
  1443. # Expression to use when parsing in a context when no capture is desired
  1444. protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
  1445. protected function doExtraAttributes($tag_name, $attr) {
  1446. #
  1447. # Parse attributes caught by the $this->id_class_attr_catch_re expression
  1448. # and return the HTML-formatted list of attributes.
  1449. #
  1450. # Currently supported attributes are .class and #id.
  1451. #
  1452. if (empty($attr)) return "";
  1453. # Split on components
  1454. preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
  1455. $elements = $matches[0];
  1456. # handle classes and ids (only first id taken into account)
  1457. $classes = array();
  1458. $attributes = array();
  1459. $id = false;
  1460. foreach ($elements as $element) {
  1461. if ($element{0} == '.') {
  1462. $classes[] = substr($element, 1);
  1463. } else if ($element{0} == '#') {
  1464. if ($id === false) $id = substr($element, 1);
  1465. } else if (strpos($element, '=') > 0) {
  1466. $parts = explode('=', $element, 2);
  1467. $attributes[] = $parts[0] . '="' . $parts[1] . '"';
  1468. }
  1469. }
  1470. # compose attributes as string
  1471. $attr_str = "";
  1472. if (!empty($id)) {
  1473. $attr_str .= ' id="'.$id.'"';
  1474. }
  1475. if (!empty($classes)) {
  1476. $attr_str .= ' class="'.implode(" ", $classes).'"';
  1477. }
  1478. if (!$this->no_markup && !empty($attributes)) {
  1479. $attr_str .= ' '.implode(" ", $attributes);
  1480. }
  1481. return $attr_str;
  1482. }
  1483. protected function stripLinkDefinitions($text) {
  1484. #
  1485. # Strips link definitions from text, stores the URLs and titles in
  1486. # hash references.
  1487. #
  1488. $less_than_tab = $this->tab_width - 1;
  1489. # Link defs are in the form: ^[id]: url "optional title"
  1490. $text = preg_replace_callback('{
  1491. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  1492. [ ]*
  1493. \n? # maybe *one* newline
  1494. [ ]*
  1495. (?:
  1496. <(.+?)> # url = $2
  1497. |
  1498. (\S+?) # url = $3
  1499. )
  1500. [ ]*
  1501. \n? # maybe one newline
  1502. [ ]*
  1503. (?:
  1504. (?<=\s) # lookbehind for whitespace
  1505. ["(]
  1506. (.*?) # title = $4
  1507. [")]
  1508. [ ]*
  1509. )? # title is optional
  1510. (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr
  1511. (?:\n+|\Z)
  1512. }xm',
  1513. array($this, '_stripLinkDefinitions_callback'),
  1514. $text);
  1515. return $text;
  1516. }
  1517. protected function _stripLinkDefinitions_callback($matches) {
  1518. $link_id = strtolower($matches[1]);
  1519. $url = $matches[2] == '' ? $matches[3] : $matches[2];
  1520. $this->urls[$link_id] = $url;
  1521. $this->titles[$link_id] =& $matches[4];
  1522. $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
  1523. return ''; # String that will replace the block
  1524. }
  1525. ### HTML Block Parser ###
  1526. # Tags that are always treated as block tags:
  1527. protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
  1528. # Tags treated as block tags only if the opening tag is alone on its line:
  1529. protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
  1530. # Tags where markdown="1" default to span mode:
  1531. protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
  1532. # Tags which must not have their contents modified, no matter where
  1533. # they appear:
  1534. protected $clean_tags_re = 'script|style|math|svg';
  1535. # Tags that do not need to be closed.
  1536. protected $auto_close_tags_re = 'hr|img|param|source|track';
  1537. protected function hashHTMLBlocks($text) {
  1538. #
  1539. # Hashify HTML Blocks and "clean tags".
  1540. #
  1541. # We only want to do this for block-level HTML tags, such as headers,
  1542. # lists, and tables. That's because we still want to wrap <p>s around
  1543. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  1544. # phrase emphasis, and spans. The list of tags we're looking for is
  1545. # hard-coded.
  1546. #
  1547. # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
  1548. # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
  1549. # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
  1550. # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
  1551. # These two functions are calling each other. It's recursive!
  1552. #
  1553. if ($this->no_markup) return $text;
  1554. #
  1555. # Call the HTML-in-Markdown hasher.
  1556. #
  1557. list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
  1558. return $text;
  1559. }
  1560. protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
  1561. $enclosing_tag_re = '', $span = false)
  1562. {
  1563. #
  1564. # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
  1565. #
  1566. # * $indent is the number of space to be ignored when checking for code
  1567. # blocks. This is important because if we don't take the indent into
  1568. # account, something like this (which looks right) won't work as expected:
  1569. #
  1570. # <div>
  1571. # <div markdown="1">
  1572. # Hello World. <-- Is this a Markdown code block or text?
  1573. # </div> <-- Is this a Markdown code block or a real tag?
  1574. # <div>
  1575. #
  1576. # If you don't like this, just don't indent the tag on which
  1577. # you apply the markdown="1" attribute.
  1578. #
  1579. # * If $enclosing_tag_re is not empty, stops at the first unmatched closing
  1580. # tag with that name. Nested tags supported.
  1581. #
  1582. # * If $span is true, text inside must treated as span. So any double
  1583. # newline will be replaced by a single newline so that it does not create
  1584. # paragraphs.
  1585. #
  1586. # Returns an array of that form: ( processed text , remaining text )
  1587. #
  1588. if ($text === '') return array('', '');
  1589. # Regex to check for the presense of newlines around a block tag.
  1590. $newline_before_re = '/(?:^\n?|\n\n)*$/';
  1591. $newline_after_re =
  1592. '{
  1593. ^ # Start of text following the tag.
  1594. (?>[ ]*<!--.*?-->)? # Optional comment.
  1595. [ ]*\n # Must be followed by newline.
  1596. }xs';
  1597. # Regex to match any tag.
  1598. $block_tag_re =
  1599. '{
  1600. ( # $2: Capture whole tag.
  1601. </? # Any opening or closing tag.
  1602. (?> # Tag name.
  1603. '.$this->block_tags_re.' |
  1604. '.$this->context_block_tags_re.' |
  1605. '.$this->clean_tags_re.' |
  1606. (?!\s)'.$enclosing_tag_re.'
  1607. )
  1608. (?:
  1609. (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
  1610. (?>
  1611. ".*?" | # Double quotes (can contain `>`)
  1612. \'.*?\' | # Single quotes (can contain `>`)
  1613. .+? # Anything but quotes and `>`.
  1614. )*?
  1615. )?
  1616. > # End of tag.
  1617. |
  1618. <!-- .*? --> # HTML Comment
  1619. |
  1620. <\?.*?\?> | <%.*?%> # Processing instruction
  1621. |
  1622. <!\[CDATA\[.*?\]\]> # CData Block
  1623. '. ( !$span ? ' # If not in span.
  1624. |
  1625. # Indented code block
  1626. (?: ^[ ]*\n | ^ | \n[ ]*\n )
  1627. [ ]{'.($indent+4).'}[^\n]* \n
  1628. (?>
  1629. (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
  1630. )*
  1631. |
  1632. # Fenced code block marker
  1633. (?<= ^ | \n )
  1634. [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
  1635. [ ]*
  1636. (?:
  1637. \.?[-_:a-zA-Z0-9]+ # standalone class name
  1638. |
  1639. '.$this->id_class_attr_nocatch_re.' # extra attributes
  1640. )?
  1641. [ ]*
  1642. (?= \n )
  1643. ' : '' ). ' # End (if not is span).
  1644. |
  1645. # Code span marker
  1646. # Note, this regex needs to go after backtick fenced
  1647. # code blocks but it should also be kept outside of the
  1648. # "if not in span" condition adding backticks to the parser
  1649. `+
  1650. )
  1651. }xs';
  1652. $depth = 0; # Current depth inside the tag tree.
  1653. $parsed = ""; # Parsed text that will be returned.
  1654. #
  1655. # Loop through every tag until we find the closing tag of the parent
  1656. # or loop until reaching the end of text if no parent tag specified.
  1657. #
  1658. do {
  1659. #
  1660. # Split the text using the first $tag_match pattern found.
  1661. # Text before pattern will be first in the array, text after
  1662. # pattern will be at the end, and between will be any catches made
  1663. # by the pattern.
  1664. #
  1665. $parts = preg_split($block_tag_re, $text, 2,
  1666. PREG_SPLIT_DELIM_CAPTURE);
  1667. # If in Markdown span mode, add a empty-string span-level hash
  1668. # after each newline to prevent triggering any block element.
  1669. if ($span) {
  1670. $void = $this->hashPart("", ':');
  1671. $newline = "$void\n";
  1672. $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
  1673. }
  1674. $parsed .= $parts[0]; # Text before current tag.
  1675. # If end of $text has been reached. Stop loop.
  1676. if (count($parts) < 3) {
  1677. $text = "";
  1678. break;
  1679. }
  1680. $tag = $parts[1]; # Tag to handle.
  1681. $text = $parts[2]; # Remaining text after current tag.
  1682. $tag_re = preg_quote($tag); # For use in a regular expression.
  1683. #
  1684. # Check for: Fenced code block marker.
  1685. # Note: need to recheck the whole tag to disambiguate backtick
  1686. # fences from code spans
  1687. #
  1688. if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
  1689. # Fenced code block marker: find matching end marker.
  1690. $fence_indent = strlen($capture[1]); # use captured indent in re
  1691. $fence_re = $capture[2]; # use captured fence in re
  1692. if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
  1693. $matches))
  1694. {
  1695. # End marker found: pass text unchanged until marker.
  1696. $parsed .= $tag . $matches[0];
  1697. $text = substr($text, strlen($matches[0]));
  1698. }
  1699. else {
  1700. # No end marker: just skip it.
  1701. $parsed .= $tag;
  1702. }
  1703. }
  1704. #
  1705. # Check for: Indented code block.
  1706. #
  1707. else if ($tag{0} == "\n" || $tag{0} == " ") {
  1708. # Indented code block: pass it unchanged, will be handled
  1709. # later.
  1710. $parsed .= $tag;
  1711. }
  1712. #
  1713. # Check for: Code span marker
  1714. # Note: need to check this after backtick fenced code blocks
  1715. #
  1716. else if ($tag{0} == "`") {
  1717. # Find corresponding end marker.
  1718. $tag_re = preg_quote($tag);
  1719. if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
  1720. $text, $matches))
  1721. {
  1722. # End marker found: pass text unchanged until marker.
  1723. $parsed .= $tag . $matches[0];
  1724. $text = substr($text, strlen($matches[0]));
  1725. }
  1726. else {
  1727. # Unmatched marker: just skip it.
  1728. $parsed .= $tag;
  1729. }
  1730. }
  1731. #
  1732. # Check for: Opening Block level tag or
  1733. # Opening Context Block tag (like ins and del)
  1734. # used as a block tag (tag is alone on it's line).
  1735. #
  1736. else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
  1737. ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
  1738. preg_match($newline_before_re, $parsed) &&
  1739. preg_match($newline_after_re, $text) )
  1740. )
  1741. {
  1742. # Need to parse tag and following text using the HTML parser.
  1743. list($block_text, $text) =
  1744. $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
  1745. # Make sure it stays outside of any paragraph by adding newlines.
  1746. $parsed .= "\n\n$block_text\n\n";
  1747. }
  1748. #
  1749. # Check for: Clean tag (like script, math)
  1750. # HTML Comments, processing instructions.
  1751. #
  1752. else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
  1753. $tag{1} == '!' || $tag{1} == '?')
  1754. {
  1755. # Need to parse tag and following text using the HTML parser.
  1756. # (don't check for markdown attribute)
  1757. list($block_text, $text) =
  1758. $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
  1759. $parsed .= $block_text;
  1760. }
  1761. #
  1762. # Check for: Tag with same name as enclosing tag.
  1763. #
  1764. else if ($enclosing_tag_re !== '' &&
  1765. # Same name as enclosing tag.
  1766. preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
  1767. {
  1768. #
  1769. # Increase/decrease nested tag count.
  1770. #
  1771. if ($tag{1} == '/') $depth--;
  1772. else if ($tag{strlen($tag)-2} != '/') $depth++;
  1773. if ($depth < 0) {
  1774. #
  1775. # Going out of parent element. Clean up and break so we
  1776. # return to the calling function.
  1777. #
  1778. $text = $tag . $text;
  1779. break;
  1780. }
  1781. $parsed .= $tag;
  1782. }
  1783. else {
  1784. $parsed .= $tag;
  1785. }
  1786. } while ($depth >= 0);
  1787. return array($parsed, $text);
  1788. }
  1789. protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
  1790. #
  1791. # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
  1792. #
  1793. # * Calls $hash_method to convert any blocks.
  1794. # * Stops when the first opening tag closes.
  1795. # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
  1796. # (it is not inside clean tags)
  1797. #
  1798. # Returns an array of that form: ( processed text , remaining text )
  1799. #
  1800. if ($text === '') return array('', '');
  1801. # Regex to match `markdown` attribute inside of a tag.
  1802. $markdown_attr_re = '
  1803. {
  1804. \s* # Eat whitespace before the `markdown` attribute
  1805. markdown
  1806. \s*=\s*
  1807. (?>
  1808. (["\']) # $1: quote delimiter
  1809. (.*?) # $2: attribute value
  1810. \1 # matching delimiter
  1811. |
  1812. ([^\s>]*) # $3: unquoted attribute value
  1813. )
  1814. () # $4: make $3 always defined (avoid warnings)
  1815. }xs';
  1816. # Regex to match any tag.
  1817. $tag_re = '{
  1818. ( # $2: Capture whole tag.
  1819. </? # Any opening or closing tag.
  1820. [\w:$]+ # Tag name.
  1821. (?:
  1822. (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
  1823. (?>
  1824. ".*?" | # Double quotes (can contain `>`)
  1825. \'.*?\' | # Single quotes (can contain `>`)
  1826. .+? # Anything but quotes and `>`.
  1827. )*?
  1828. )?
  1829. > # End of tag.
  1830. |
  1831. <!-- .*? --> # HTML Comment
  1832. |
  1833. <\?.*?\?> | <%.*?%> # Processing instruction
  1834. |
  1835. <!\[CDATA\[.*?\]\]> # CData Block
  1836. )
  1837. }xs';
  1838. $original_text = $text; # Save original text in case of faliure.
  1839. $depth = 0; # Current depth inside the tag tree.
  1840. $block_text = ""; # Temporary text holder for current text.
  1841. $parsed = ""; # Parsed text that will be returned.
  1842. #
  1843. # Get the name of the starting tag.
  1844. # (This pattern makes $base_tag_name_re safe without quoting.)
  1845. #
  1846. if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
  1847. $base_tag_name_re = $matches[1];
  1848. #
  1849. # Loop through every tag until we find the corresponding closing tag.
  1850. #
  1851. do {
  1852. #
  1853. # Split the text using the first $tag_match pattern found.
  1854. # Text before pattern will be first in the array, text after
  1855. # pattern will be at the end, and between will be any catches made
  1856. # by the pattern.
  1857. #
  1858. $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  1859. if (count($parts) < 3) {
  1860. #
  1861. # End of $text reached with unbalenced tag(s).
  1862. # In that case, we return original text unchanged and pass the
  1863. # first character as filtered to prevent an infinite loop in the
  1864. # parent function.
  1865. #
  1866. return array($original_text{0}, substr($original_text, 1));
  1867. }
  1868. $block_text .= $parts[0]; # Text before current tag.
  1869. $tag = $parts[1]; # Tag to handle.
  1870. $text = $parts[2]; # Remaining text after current tag.
  1871. #
  1872. # Check for: Auto-close tag (like <hr/>)
  1873. # Comments and Processing Instructions.
  1874. #
  1875. if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
  1876. $tag{1} == '!' || $tag{1} == '?')
  1877. {
  1878. # Just add the tag to the block as if it was text.
  1879. $block_text .= $tag;
  1880. }
  1881. else {
  1882. #
  1883. # Increase/decrease nested tag count. Only do so if
  1884. # the tag's name match base tag's.
  1885. #
  1886. if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
  1887. if ($tag{1} == '/') $depth--;
  1888. else if ($tag{strlen($tag)-2} != '/') $depth++;
  1889. }
  1890. #
  1891. # Check for `markdown="1"` attribute and handle it.
  1892. #
  1893. if ($md_attr &&
  1894. preg_match($markdown_attr_re, $tag, $attr_m) &&
  1895. preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
  1896. {
  1897. # Remove `markdown` attribute from opening tag.
  1898. $tag = preg_replace($markdown_attr_re, '', $tag);
  1899. # Check if text inside this tag must be parsed in span mode.
  1900. $this->mode = $attr_m[2] . $attr_m[3];
  1901. $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
  1902. preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
  1903. # Calculate indent before tag.
  1904. if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
  1905. $strlen = $this->utf8_strlen;
  1906. $indent = $strlen($matches[1], 'UTF-8');
  1907. } else {
  1908. $indent = 0;
  1909. }
  1910. # End preceding block with this tag.
  1911. $block_text .= $tag;
  1912. $parsed .= $this->$hash_method($block_text);
  1913. # Get enclosing tag name for the ParseMarkdown function.
  1914. # (This pattern makes $tag_name_re safe without quoting.)
  1915. preg_match('/^<([\w:$]*)\b/', $tag, $matches);
  1916. $tag_name_re = $matches[1];
  1917. # Parse the content using the HTML-in-Markdown parser.
  1918. list ($block_text, $text)
  1919. = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
  1920. $tag_name_re, $span_mode);
  1921. # Outdent markdown text.
  1922. if ($indent > 0) {
  1923. $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
  1924. $block_text);
  1925. }
  1926. # Append tag content to parsed text.
  1927. if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
  1928. else $parsed .= "$block_text";
  1929. # Start over with a new block.
  1930. $block_text = "";
  1931. }
  1932. else $block_text .= $tag;
  1933. }
  1934. } while ($depth > 0);
  1935. #
  1936. # Hash last block text that wasn't processed inside the loop.
  1937. #
  1938. $parsed .= $this->$hash_method($block_text);
  1939. return array($parsed, $text);
  1940. }
  1941. protected function hashClean($text) {
  1942. #
  1943. # Called whenever a tag must be hashed when a function inserts a "clean" tag
  1944. # in $text, it passes through this function and is automaticaly escaped,
  1945. # blocking invalid nested overlap.
  1946. #
  1947. return $this->hashPart($text, 'C');
  1948. }
  1949. protected function doAnchors($text) {
  1950. #
  1951. # Turn Markdown link shortcuts into XHTML <a> tags.
  1952. #
  1953. if ($this->in_anchor) return $text;
  1954. $this->in_anchor = true;
  1955. #
  1956. # First, handle reference-style links: [link text] [id]
  1957. #
  1958. $text = preg_replace_callback('{
  1959. ( # wrap whole match in $1
  1960. \[
  1961. ('.$this->nested_brackets_re.') # link text = $2
  1962. \]
  1963. [ ]? # one optional space
  1964. (?:\n[ ]*)? # one optional newline followed by spaces
  1965. \[
  1966. (.*?) # id = $3
  1967. \]
  1968. )
  1969. }xs',
  1970. array($this, '_doAnchors_reference_callback'), $text);
  1971. #
  1972. # Next, inline-style links: [link text](url "optional title")
  1973. #
  1974. $text = preg_replace_callback('{
  1975. ( # wrap whole match in $1
  1976. \[
  1977. ('.$this->nested_brackets_re.') # link text = $2
  1978. \]
  1979. \( # literal paren
  1980. [ \n]*
  1981. (?:
  1982. <(.+?)> # href = $3
  1983. |
  1984. ('.$this->nested_url_parenthesis_re.') # href = $4
  1985. )
  1986. [ \n]*
  1987. ( # $5
  1988. ([\'"]) # quote char = $6
  1989. (.*?) # Title = $7
  1990. \6 # matching quote
  1991. [ \n]* # ignore any spaces/tabs between closing quote and )
  1992. )? # title is optional
  1993. \)
  1994. (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes
  1995. )
  1996. }xs',
  1997. array($this, '_doAnchors_inline_callback'), $text);
  1998. #
  1999. # Last, handle reference-style shortcuts: [link text]
  2000. # These must come last in case you've also got [link text][1]
  2001. # or [link text](/foo)
  2002. #
  2003. $text = preg_replace_callback('{
  2004. ( # wrap whole match in $1
  2005. \[
  2006. ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  2007. \]
  2008. )
  2009. }xs',
  2010. array($this, '_doAnchors_reference_callback'), $text);
  2011. $this->in_anchor = false;
  2012. return $text;
  2013. }
  2014. protected function _doAnchors_reference_callback($matches) {
  2015. $whole_match = $matches[1];
  2016. $link_text = $matches[2];
  2017. $link_id =& $matches[3];
  2018. if ($link_id == "") {
  2019. # for shortcut links like [this][] or [this].
  2020. $link_id = $link_text;
  2021. }
  2022. # lower-case and turn embedded newlines into spaces
  2023. $link_id = strtolower($link_id);
  2024. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  2025. if (isset($this->urls[$link_id])) {
  2026. $url = $this->urls[$link_id];
  2027. $url = $this->encodeURLAttribute($url);
  2028. $result = "<a href=\"$url\"";
  2029. if ( isset( $this->titles[$link_id] ) ) {
  2030. $title = $this->titles[$link_id];
  2031. $title = $this->encodeAttribute($title);
  2032. $result .= " title=\"$title\"";
  2033. }
  2034. if (isset($this->ref_attr[$link_id]))
  2035. $result .= $this->ref_attr[$link_id];
  2036. $link_text = $this->runSpanGamut($link_text);
  2037. $result .= ">$link_text</a>";
  2038. $result = $this->hashPart($result);
  2039. }
  2040. else {
  2041. $result = $whole_match;
  2042. }
  2043. return $result;
  2044. }
  2045. protected function _doAnchors_inline_callback($matches) {
  2046. $whole_match = $matches[1];
  2047. $link_text = $this->runSpanGamut($matches[2]);
  2048. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  2049. $title =& $matches[7];
  2050. $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]);
  2051. // if the URL was of the form <s p a c e s> it got caught by the HTML
  2052. // tag parser and hashed. Need to reverse the process before using the URL.
  2053. $unhashed = $this->unhash($url);
  2054. if ($unhashed != $url)
  2055. $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
  2056. $url = $this->encodeURLAttribute($url);
  2057. $result = "<a href=\"$url\"";
  2058. if (isset($title)) {
  2059. $title = $this->encodeAttribute($title);
  2060. $result .= " title=\"$title\"";
  2061. }
  2062. $result .= $attr;
  2063. $link_text = $this->runSpanGamut($link_text);
  2064. $result .= ">$link_text</a>";
  2065. return $this->hashPart($result);
  2066. }
  2067. protected function doImages($text) {
  2068. #
  2069. # Turn Markdown image shortcuts into <img> tags.
  2070. #
  2071. #
  2072. # First, handle reference-style labeled images: ![alt text][id]
  2073. #
  2074. $text = preg_replace_callback('{
  2075. ( # wrap whole match in $1
  2076. !\[
  2077. ('.$this->nested_brackets_re.') # alt text = $2
  2078. \]
  2079. [ ]? # one optional space
  2080. (?:\n[ ]*)? # one optional newline followed by spaces
  2081. \[
  2082. (.*?) # id = $3
  2083. \]
  2084. )
  2085. }xs',
  2086. array($this, '_doImages_reference_callback'), $text);
  2087. #
  2088. # Next, handle inline images: ![alt text](url "optional title")
  2089. # Don't forget: encode * and _
  2090. #
  2091. $text = preg_replace_callback('{
  2092. ( # wrap whole match in $1
  2093. !\[
  2094. ('.$this->nested_brackets_re.') # alt text = $2
  2095. \]
  2096. \s? # One optional whitespace character
  2097. \( # literal paren
  2098. [ \n]*
  2099. (?:
  2100. <(\S*)> # src url = $3
  2101. |
  2102. ('.$this->nested_url_parenthesis_re.') # src url = $4
  2103. )
  2104. [ \n]*
  2105. ( # $5
  2106. ([\'"]) # quote char = $6
  2107. (.*?) # title = $7
  2108. \6 # matching quote
  2109. [ \n]*
  2110. )? # title is optional
  2111. \)
  2112. (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes
  2113. )
  2114. }xs',
  2115. array($this, '_doImages_inline_callback'), $text);
  2116. return $text;
  2117. }
  2118. protected function _doImages_reference_callback($matches) {
  2119. $whole_match = $matches[1];
  2120. $alt_text = $matches[2];
  2121. $link_id = strtolower($matches[3]);
  2122. if ($link_id == "") {
  2123. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  2124. }
  2125. $alt_text = $this->encodeAttribute($alt_text);
  2126. if (isset($this->urls[$link_id])) {
  2127. $url = $this->encodeURLAttribute($this->urls[$link_id]);
  2128. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  2129. if (isset($this->titles[$link_id])) {
  2130. $title = $this->titles[$link_id];
  2131. $title = $this->encodeAttribute($title);
  2132. $result .= " title=\"$title\"";
  2133. }
  2134. if (isset($this->ref_attr[$link_id]))
  2135. $result .= $this->ref_attr[$link_id];
  2136. $result .= $this->empty_element_suffix;
  2137. $result = $this->hashPart($result);
  2138. }
  2139. else {
  2140. # If there's no such link ID, leave intact:
  2141. $result = $whole_match;
  2142. }
  2143. return $result;
  2144. }
  2145. protected function _doImages_inline_callback($matches) {
  2146. $whole_match = $matches[1];
  2147. $alt_text = $matches[2];
  2148. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  2149. $title =& $matches[7];
  2150. $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]);
  2151. $alt_text = $this->encodeAttribute($alt_text);
  2152. $url = $this->encodeURLAttribute($url);
  2153. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  2154. if (isset($title)) {
  2155. $title = $this->encodeAttribute($title);
  2156. $result .= " title=\"$title\""; # $title already quoted
  2157. }
  2158. $result .= $attr;
  2159. $result .= $this->empty_element_suffix;
  2160. return $this->hashPart($result);
  2161. }
  2162. protected function doHeaders($text) {
  2163. #
  2164. # Redefined to add id and class attribute support.
  2165. #
  2166. # Setext-style headers:
  2167. # Header 1 {#header1}
  2168. # ========
  2169. #
  2170. # Header 2 {#header2 .class1 .class2}
  2171. # --------
  2172. #
  2173. $text = preg_replace_callback(
  2174. '{
  2175. (^.+?) # $1: Header text
  2176. (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes
  2177. [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
  2178. }mx',
  2179. array($this, '_doHeaders_callback_setext'), $text);
  2180. # atx-style headers:
  2181. # # Header 1 {#header1}
  2182. # ## Header 2 {#header2}
  2183. # ## Header 2 with closing hashes ## {#header3.class1.class2}
  2184. # ...
  2185. # ###### Header 6 {.class2}
  2186. #
  2187. $text = preg_replace_callback('{
  2188. ^(\#{1,6}) # $1 = string of #\'s
  2189. [ ]*
  2190. (.+?) # $2 = Header text
  2191. [ ]*
  2192. \#* # optional closing #\'s (not counted)
  2193. (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes
  2194. [ ]*
  2195. \n+
  2196. }xm',
  2197. array($this, '_doHeaders_callback_atx'), $text);
  2198. return $text;
  2199. }
  2200. protected function _doHeaders_callback_setext($matches) {
  2201. if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
  2202. return $matches[0];
  2203. $level = $matches[3]{0} == '=' ? 1 : 2;
  2204. $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2]);
  2205. $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
  2206. return "\n" . $this->hashBlock($block) . "\n\n";
  2207. }
  2208. protected function _doHeaders_callback_atx($matches) {
  2209. $level = strlen($matches[1]);
  2210. $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3]);
  2211. $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
  2212. return "\n" . $this->hashBlock($block) . "\n\n";
  2213. }
  2214. protected function doTables($text) {
  2215. #
  2216. # Form HTML tables.
  2217. #
  2218. $less_than_tab = $this->tab_width - 1;
  2219. #
  2220. # Find tables with leading pipe.
  2221. #
  2222. # | Header 1 | Header 2
  2223. # | -------- | --------
  2224. # | Cell 1 | Cell 2
  2225. # | Cell 3 | Cell 4
  2226. #
  2227. $text = preg_replace_callback('
  2228. {
  2229. ^ # Start of a line
  2230. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  2231. [|] # Optional leading pipe (present)
  2232. (.+) \n # $1: Header row (at least one pipe)
  2233. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  2234. [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
  2235. ( # $3: Cells
  2236. (?>
  2237. [ ]* # Allowed whitespace.
  2238. [|] .* \n # Row content.
  2239. )*
  2240. )
  2241. (?=\n|\Z) # Stop at final double newline.
  2242. }xm',
  2243. array($this, '_doTable_leadingPipe_callback'), $text);
  2244. #
  2245. # Find tables without leading pipe.
  2246. #
  2247. # Header 1 | Header 2
  2248. # -------- | --------
  2249. # Cell 1 | Cell 2
  2250. # Cell 3 | Cell 4
  2251. #
  2252. $text = preg_replace_callback('
  2253. {
  2254. ^ # Start of a line
  2255. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  2256. (\S.*[|].*) \n # $1: Header row (at least one pipe)
  2257. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  2258. ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
  2259. ( # $3: Cells
  2260. (?>
  2261. .* [|] .* \n # Row content
  2262. )*
  2263. )
  2264. (?=\n|\Z) # Stop at final double newline.
  2265. }xm',
  2266. array($this, '_DoTable_callback'), $text);
  2267. return $text;
  2268. }
  2269. protected function _doTable_leadingPipe_callback($matches) {
  2270. $head = $matches[1];
  2271. $underline = $matches[2];
  2272. $content = $matches[3];
  2273. # Remove leading pipe for each row.
  2274. $content = preg_replace('/^ *[|]/m', '', $content);
  2275. return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
  2276. }
  2277. protected function _doTable_makeAlignAttr($alignname)
  2278. {
  2279. if (empty($this->table_align_class_tmpl))
  2280. return " align=\"$alignname\"";
  2281. $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
  2282. return " class=\"$classname\"";
  2283. }
  2284. protected function _doTable_callback($matches) {
  2285. $head = $matches[1];
  2286. $underline = $matches[2];
  2287. $content = $matches[3];
  2288. # Remove any tailing pipes for each line.
  2289. $head = preg_replace('/[|] *$/m', '', $head);
  2290. $underline = preg_replace('/[|] *$/m', '', $underline);
  2291. $content = preg_replace('/[|] *$/m', '', $content);
  2292. # Reading alignement from header underline.
  2293. $separators = preg_split('/ *[|] */', $underline);
  2294. foreach ($separators as $n => $s) {
  2295. if (preg_match('/^ *-+: *$/', $s))
  2296. $attr[$n] = $this->_doTable_makeAlignAttr('right');
  2297. else if (preg_match('/^ *:-+: *$/', $s))
  2298. $attr[$n] = $this->_doTable_makeAlignAttr('center');
  2299. else if (preg_match('/^ *:-+ *$/', $s))
  2300. $attr[$n] = $this->_doTable_makeAlignAttr('left');
  2301. else
  2302. $attr[$n] = '';
  2303. }
  2304. # Parsing span elements, including code spans, character escapes,
  2305. # and inline HTML tags, so that pipes inside those gets ignored.
  2306. $head = $this->parseSpan($head);
  2307. $headers = preg_split('/ *[|] */', $head);
  2308. $col_count = count($headers);
  2309. $attr = array_pad($attr, $col_count, '');
  2310. # Write column headers.
  2311. $text = "<table>\n";
  2312. $text .= "<thead>\n";
  2313. $text .= "<tr>\n";
  2314. foreach ($headers as $n => $header)
  2315. $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
  2316. $text .= "</tr>\n";
  2317. $text .= "</thead>\n";
  2318. # Split content by row.
  2319. $rows = explode("\n", trim($content, "\n"));
  2320. $text .= "<tbody>\n";
  2321. foreach ($rows as $row) {
  2322. # Parsing span elements, including code spans, character escapes,
  2323. # and inline HTML tags, so that pipes inside those gets ignored.
  2324. $row = $this->parseSpan($row);
  2325. # Split row by cell.
  2326. $row_cells = preg_split('/ *[|] */', $row, $col_count);
  2327. $row_cells = array_pad($row_cells, $col_count, '');
  2328. $text .= "<tr>\n";
  2329. foreach ($row_cells as $n => $cell)
  2330. $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
  2331. $text .= "</tr>\n";
  2332. }
  2333. $text .= "</tbody>\n";
  2334. $text .= "</table>";
  2335. return $this->hashBlock($text) . "\n";
  2336. }
  2337. protected function doDefLists($text) {
  2338. #
  2339. # Form HTML definition lists.
  2340. #
  2341. $less_than_tab = $this->tab_width - 1;
  2342. # Re-usable pattern to match any entire dl list:
  2343. $whole_list_re = '(?>
  2344. ( # $1 = whole list
  2345. ( # $2
  2346. [ ]{0,'.$less_than_tab.'}
  2347. ((?>.*\S.*\n)+) # $3 = defined term
  2348. \n?
  2349. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  2350. )
  2351. (?s:.+?)
  2352. ( # $4
  2353. \z
  2354. |
  2355. \n{2,}
  2356. (?=\S)
  2357. (?! # Negative lookahead for another term
  2358. [ ]{0,'.$less_than_tab.'}
  2359. (?: \S.*\n )+? # defined term
  2360. \n?
  2361. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  2362. )
  2363. (?! # Negative lookahead for another definition
  2364. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  2365. )
  2366. )
  2367. )
  2368. )'; // mx
  2369. $text = preg_replace_callback('{
  2370. (?>\A\n?|(?<=\n\n))
  2371. '.$whole_list_re.'
  2372. }mx',
  2373. array($this, '_doDefLists_callback'), $text);
  2374. return $text;
  2375. }
  2376. protected function _doDefLists_callback($matches) {
  2377. # Re-usable patterns to match list item bullets and number markers:
  2378. $list = $matches[1];
  2379. # Turn double returns into triple returns, so that we can make a
  2380. # paragraph for the last item in a list, if necessary:
  2381. $result = trim($this->processDefListItems($list));
  2382. $result = "<dl>\n" . $result . "\n</dl>";
  2383. return $this->hashBlock($result) . "\n\n";
  2384. }
  2385. protected function processDefListItems($list_str) {
  2386. #
  2387. # Process the contents of a single definition list, splitting it
  2388. # into individual term and definition list items.
  2389. #
  2390. $less_than_tab = $this->tab_width - 1;
  2391. # trim trailing blank lines:
  2392. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  2393. # Process definition terms.
  2394. $list_str = preg_replace_callback('{
  2395. (?>\A\n?|\n\n+) # leading line
  2396. ( # definition terms = $1
  2397. [ ]{0,'.$less_than_tab.'} # leading whitespace
  2398. (?!\:[ ]|[ ]) # negative lookahead for a definition
  2399. # mark (colon) or more whitespace.
  2400. (?> \S.* \n)+? # actual term (not whitespace).
  2401. )
  2402. (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
  2403. # with a definition mark.
  2404. }xm',
  2405. array($this, '_processDefListItems_callback_dt'), $list_str);
  2406. # Process actual definitions.
  2407. $list_str = preg_replace_callback('{
  2408. \n(\n+)? # leading line = $1
  2409. ( # marker space = $2
  2410. [ ]{0,'.$less_than_tab.'} # whitespace before colon
  2411. \:[ ]+ # definition mark (colon)
  2412. )
  2413. ((?s:.+?)) # definition text = $3
  2414. (?= \n+ # stop at next definition mark,
  2415. (?: # next term or end of text
  2416. [ ]{0,'.$less_than_tab.'} \:[ ] |
  2417. <dt> | \z
  2418. )
  2419. )
  2420. }xm',
  2421. array($this, '_processDefListItems_callback_dd'), $list_str);
  2422. return $list_str;
  2423. }
  2424. protected function _processDefListItems_callback_dt($matches) {
  2425. $terms = explode("\n", trim($matches[1]));
  2426. $text = '';
  2427. foreach ($terms as $term) {
  2428. $term = $this->runSpanGamut(trim($term));
  2429. $text .= "\n<dt>" . $term . "</dt>";
  2430. }
  2431. return $text . "\n";
  2432. }
  2433. protected function _processDefListItems_callback_dd($matches) {
  2434. $leading_line = $matches[1];
  2435. $marker_space = $matches[2];
  2436. $def = $matches[3];
  2437. if ($leading_line || preg_match('/\n{2,}/', $def)) {
  2438. # Replace marker with the appropriate whitespace indentation
  2439. $def = str_repeat(' ', strlen($marker_space)) . $def;
  2440. $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
  2441. $def = "\n". $def ."\n";
  2442. }
  2443. else {
  2444. $def = rtrim($def);
  2445. $def = $this->runSpanGamut($this->outdent($def));
  2446. }
  2447. return "\n<dd>" . $def . "</dd>\n";
  2448. }
  2449. protected function doFencedCodeBlocks($text) {
  2450. #
  2451. # Adding the fenced code block syntax to regular Markdown:
  2452. #
  2453. # ~~~
  2454. # Code block
  2455. # ~~~
  2456. #
  2457. $less_than_tab = $this->tab_width;
  2458. $text = preg_replace_callback('{
  2459. (?:\n|\A)
  2460. # 1: Opening marker
  2461. (
  2462. (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
  2463. )
  2464. [ ]*
  2465. (?:
  2466. \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
  2467. |
  2468. '.$this->id_class_attr_catch_re.' # 3: Extra attributes
  2469. )?
  2470. [ ]* \n # Whitespace and newline following marker.
  2471. # 4: Content
  2472. (
  2473. (?>
  2474. (?!\1 [ ]* \n) # Not a closing marker.
  2475. .*\n+
  2476. )+
  2477. )
  2478. # Closing marker.
  2479. \1 [ ]* (?= \n )
  2480. }xm',
  2481. array($this, '_doFencedCodeBlocks_callback'), $text);
  2482. return $text;
  2483. }
  2484. protected function _doFencedCodeBlocks_callback($matches) {
  2485. $classname =& $matches[2];
  2486. $attrs =& $matches[3];
  2487. $codeblock = $matches[4];
  2488. $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
  2489. $codeblock = preg_replace_callback('/^\n+/',
  2490. array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
  2491. if ($classname != "") {
  2492. if ($classname{0} == '.')
  2493. $classname = substr($classname, 1);
  2494. $attr_str = ' class="'.$this->code_class_prefix.$classname.'"';
  2495. } else {
  2496. $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs);
  2497. }
  2498. $pre_attr_str = $this->code_attr_on_pre ? $attr_str : '';
  2499. $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
  2500. $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
  2501. return "\n\n".$this->hashBlock($codeblock)."\n\n";
  2502. }
  2503. protected function _doFencedCodeBlocks_newlines($matches) {
  2504. return str_repeat("<br$this->empty_element_suffix",
  2505. strlen($matches[0]));
  2506. }
  2507. #
  2508. # Redefining emphasis markers so that emphasis by underscore does not
  2509. # work in the middle of a word.
  2510. #
  2511. protected $em_relist = array(
  2512. '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
  2513. '*' => '(?<![\s*])\*(?!\*)',
  2514. '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
  2515. );
  2516. protected $strong_relist = array(
  2517. '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
  2518. '**' => '(?<![\s*])\*\*(?!\*)',
  2519. '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
  2520. );
  2521. protected $em_strong_relist = array(
  2522. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
  2523. '***' => '(?<![\s*])\*\*\*(?!\*)',
  2524. '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
  2525. );
  2526. protected function formParagraphs($text) {
  2527. #
  2528. # Params:
  2529. # $text - string to process with html <p> tags
  2530. #
  2531. # Strip leading and trailing lines:
  2532. $text = preg_replace('/\A\n+|\n+\z/', '', $text);
  2533. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  2534. #
  2535. # Wrap <p> tags and unhashify HTML blocks
  2536. #
  2537. foreach ($grafs as $key => $value) {
  2538. $value = trim($this->runSpanGamut($value));
  2539. # Check if this should be enclosed in a paragraph.
  2540. # Clean tag hashes & block tag hashes are left alone.
  2541. $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
  2542. if ($is_p) {
  2543. $value = "<p>$value</p>";
  2544. }
  2545. $grafs[$key] = $value;
  2546. }
  2547. # Join grafs in one text, then unhash HTML tags.
  2548. $text = implode("\n\n", $grafs);
  2549. # Finish by removing any tag hashes still present in $text.
  2550. $text = $this->unhash($text);
  2551. return $text;
  2552. }
  2553. ### Footnotes
  2554. protected function stripFootnotes($text) {
  2555. #
  2556. # Strips link definitions from text, stores the URLs and titles in
  2557. # hash references.
  2558. #
  2559. $less_than_tab = $this->tab_width - 1;
  2560. # Link defs are in the form: [^id]: url "optional title"
  2561. $text = preg_replace_callback('{
  2562. ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
  2563. [ ]*
  2564. \n? # maybe *one* newline
  2565. ( # text = $2 (no blank lines allowed)
  2566. (?:
  2567. .+ # actual text
  2568. |
  2569. \n # newlines but
  2570. (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
  2571. (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
  2572. # by non-indented content
  2573. )*
  2574. )
  2575. }xm',
  2576. array($this, '_stripFootnotes_callback'),
  2577. $text);
  2578. return $text;
  2579. }
  2580. protected function _stripFootnotes_callback($matches) {
  2581. $note_id = $this->fn_id_prefix . $matches[1];
  2582. $this->footnotes[$note_id] = $this->outdent($matches[2]);
  2583. return ''; # String that will replace the block
  2584. }
  2585. protected function doFootnotes($text) {
  2586. #
  2587. # Replace footnote references in $text [^id] with a special text-token
  2588. # which will be replaced by the actual footnote marker in appendFootnotes.
  2589. #
  2590. if (!$this->in_anchor) {
  2591. $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
  2592. }
  2593. return $text;
  2594. }
  2595. protected function appendFootnotes($text) {
  2596. #
  2597. # Append footnote list to text.
  2598. #
  2599. $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
  2600. array($this, '_appendFootnotes_callback'), $text);
  2601. if (!empty($this->footnotes_ordered)) {
  2602. $text .= "\n\n";
  2603. $text .= "<div class=\"footnotes\">\n";
  2604. $text .= "<hr". $this->empty_element_suffix ."\n";
  2605. $text .= "<ol>\n\n";
  2606. $attr = "";
  2607. if ($this->fn_backlink_class != "") {
  2608. $class = $this->fn_backlink_class;
  2609. $class = $this->encodeAttribute($class);
  2610. $attr .= " class=\"$class\"";
  2611. }
  2612. if ($this->fn_backlink_title != "") {
  2613. $title = $this->fn_backlink_title;
  2614. $title = $this->encodeAttribute($title);
  2615. $attr .= " title=\"$title\"";
  2616. }
  2617. $num = 0;
  2618. while (!empty($this->footnotes_ordered)) {
  2619. $footnote = reset($this->footnotes_ordered);
  2620. $note_id = key($this->footnotes_ordered);
  2621. unset($this->footnotes_ordered[$note_id]);
  2622. $ref_count = $this->footnotes_ref_count[$note_id];
  2623. unset($this->footnotes_ref_count[$note_id]);
  2624. unset($this->footnotes[$note_id]);
  2625. $footnote .= "\n"; # Need to append newline before parsing.
  2626. $footnote = $this->runBlockGamut("$footnote\n");
  2627. $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
  2628. array($this, '_appendFootnotes_callback'), $footnote);
  2629. $attr = str_replace("%%", ++$num, $attr);
  2630. $note_id = $this->encodeAttribute($note_id);
  2631. # Prepare backlink, multiple backlinks if multiple references
  2632. $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
  2633. for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
  2634. $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>&#8617;</a>";
  2635. }
  2636. # Add backlink to last paragraph; create new paragraph if needed.
  2637. if (preg_match('{</p>$}', $footnote)) {
  2638. $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
  2639. } else {
  2640. $footnote .= "\n\n<p>$backlink</p>";
  2641. }
  2642. $text .= "<li id=\"fn:$note_id\">\n";
  2643. $text .= $footnote . "\n";
  2644. $text .= "</li>\n\n";
  2645. }
  2646. $text .= "</ol>\n";
  2647. $text .= "</div>";
  2648. }
  2649. return $text;
  2650. }
  2651. protected function _appendFootnotes_callback($matches) {
  2652. $node_id = $this->fn_id_prefix . $matches[1];
  2653. # Create footnote marker only if it has a corresponding footnote *and*
  2654. # the footnote hasn't been used by another marker.
  2655. if (isset($this->footnotes[$node_id])) {
  2656. $num =& $this->footnotes_numbers[$node_id];
  2657. if (!isset($num)) {
  2658. # Transfer footnote content to the ordered list and give it its
  2659. # number
  2660. $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
  2661. $this->footnotes_ref_count[$node_id] = 1;
  2662. $num = $this->footnote_counter++;
  2663. $ref_count_mark = '';
  2664. } else {
  2665. $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
  2666. }
  2667. $attr = "";
  2668. if ($this->fn_link_class != "") {
  2669. $class = $this->fn_link_class;
  2670. $class = $this->encodeAttribute($class);
  2671. $attr .= " class=\"$class\"";
  2672. }
  2673. if ($this->fn_link_title != "") {
  2674. $title = $this->fn_link_title;
  2675. $title = $this->encodeAttribute($title);
  2676. $attr .= " title=\"$title\"";
  2677. }
  2678. $attr = str_replace("%%", $num, $attr);
  2679. $node_id = $this->encodeAttribute($node_id);
  2680. return
  2681. "<sup id=\"fnref$ref_count_mark:$node_id\">".
  2682. "<a href=\"#fn:$node_id\"$attr>$num</a>".
  2683. "</sup>";
  2684. }
  2685. return "[^".$matches[1]."]";
  2686. }
  2687. ### Abbreviations ###
  2688. protected function stripAbbreviations($text) {
  2689. #
  2690. # Strips abbreviations from text, stores titles in hash references.
  2691. #
  2692. $less_than_tab = $this->tab_width - 1;
  2693. # Link defs are in the form: [id]*: url "optional title"
  2694. $text = preg_replace_callback('{
  2695. ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
  2696. (.*) # text = $2 (no blank lines allowed)
  2697. }xm',
  2698. array($this, '_stripAbbreviations_callback'),
  2699. $text);
  2700. return $text;
  2701. }
  2702. protected function _stripAbbreviations_callback($matches) {
  2703. $abbr_word = $matches[1];
  2704. $abbr_desc = $matches[2];
  2705. if ($this->abbr_word_re)
  2706. $this->abbr_word_re .= '|';
  2707. $this->abbr_word_re .= preg_quote($abbr_word);
  2708. $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
  2709. return ''; # String that will replace the block
  2710. }
  2711. protected function doAbbreviations($text) {
  2712. #
  2713. # Find defined abbreviations in text and wrap them in <abbr> elements.
  2714. #
  2715. if ($this->abbr_word_re) {
  2716. // cannot use the /x modifier because abbr_word_re may
  2717. // contain significant spaces:
  2718. $text = preg_replace_callback('{'.
  2719. '(?<![\w\x1A])'.
  2720. '(?:'.$this->abbr_word_re.')'.
  2721. '(?![\w\x1A])'.
  2722. '}',
  2723. array($this, '_doAbbreviations_callback'), $text);
  2724. }
  2725. return $text;
  2726. }
  2727. protected function _doAbbreviations_callback($matches) {
  2728. $abbr = $matches[0];
  2729. if (isset($this->abbr_desciptions[$abbr])) {
  2730. $desc = $this->abbr_desciptions[$abbr];
  2731. if (empty($desc)) {
  2732. return $this->hashPart("<abbr>$abbr</abbr>");
  2733. } else {
  2734. $desc = $this->encodeAttribute($desc);
  2735. return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
  2736. }
  2737. } else {
  2738. return $matches[0];
  2739. }
  2740. }
  2741. }