Parsedown.php 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713
  1. <?php
  2. #
  3. #
  4. # Parsedown
  5. # http://parsedown.org
  6. #
  7. # (c) Emanuil Rusev
  8. # http://erusev.com
  9. #
  10. # For the full license information, view the LICENSE file that was distributed
  11. # with this source code.
  12. #
  13. #
  14. class Parsedown
  15. {
  16. # ~
  17. const version = '1.7.4';
  18. # ~
  19. function text($text)
  20. {
  21. # make sure no definitions are set
  22. $this->DefinitionData = array();
  23. # standardize line breaks
  24. $text = str_replace(array("\r\n", "\r"), "\n", $text);
  25. # remove surrounding line breaks
  26. $text = trim($text, "\n");
  27. # split text into lines
  28. $lines = explode("\n", $text);
  29. # iterate through lines to identify blocks
  30. $markup = $this->lines($lines);
  31. # trim line breaks
  32. $markup = trim($markup, "\n");
  33. return $markup;
  34. }
  35. #
  36. # Setters
  37. #
  38. function setBreaksEnabled($breaksEnabled)
  39. {
  40. $this->breaksEnabled = $breaksEnabled;
  41. return $this;
  42. }
  43. protected $breaksEnabled;
  44. function setMarkupEscaped($markupEscaped)
  45. {
  46. $this->markupEscaped = $markupEscaped;
  47. return $this;
  48. }
  49. protected $markupEscaped;
  50. function setUrlsLinked($urlsLinked)
  51. {
  52. $this->urlsLinked = $urlsLinked;
  53. return $this;
  54. }
  55. protected $urlsLinked = true;
  56. function setSafeMode($safeMode)
  57. {
  58. $this->safeMode = (bool) $safeMode;
  59. return $this;
  60. }
  61. protected $safeMode;
  62. protected $safeLinksWhitelist = array(
  63. 'http://',
  64. 'https://',
  65. 'ftp://',
  66. 'ftps://',
  67. 'mailto:',
  68. 'data:image/png;base64,',
  69. 'data:image/gif;base64,',
  70. 'data:image/jpeg;base64,',
  71. 'irc:',
  72. 'ircs:',
  73. 'git:',
  74. 'ssh:',
  75. 'news:',
  76. 'steam:',
  77. );
  78. #
  79. # Lines
  80. #
  81. protected $BlockTypes = array(
  82. '#' => array('Header'),
  83. '*' => array('Rule', 'List'),
  84. '+' => array('List'),
  85. '-' => array('SetextHeader', 'Table', 'Rule', 'List'),
  86. '0' => array('List'),
  87. '1' => array('List'),
  88. '2' => array('List'),
  89. '3' => array('List'),
  90. '4' => array('List'),
  91. '5' => array('List'),
  92. '6' => array('List'),
  93. '7' => array('List'),
  94. '8' => array('List'),
  95. '9' => array('List'),
  96. ':' => array('Table'),
  97. '<' => array('Comment', 'Markup'),
  98. '=' => array('SetextHeader'),
  99. '>' => array('Quote'),
  100. '[' => array('Reference'),
  101. '_' => array('Rule'),
  102. '`' => array('FencedCode'),
  103. '|' => array('Table'),
  104. '~' => array('FencedCode'),
  105. );
  106. # ~
  107. protected $unmarkedBlockTypes = array(
  108. 'Code',
  109. );
  110. #
  111. # Blocks
  112. #
  113. protected function lines(array $lines)
  114. {
  115. $CurrentBlock = null;
  116. foreach ($lines as $line)
  117. {
  118. if (chop($line) === '')
  119. {
  120. if (isset($CurrentBlock))
  121. {
  122. $CurrentBlock['interrupted'] = true;
  123. }
  124. continue;
  125. }
  126. if (strpos($line, "\t") !== false)
  127. {
  128. $parts = explode("\t", $line);
  129. $line = $parts[0];
  130. unset($parts[0]);
  131. foreach ($parts as $part)
  132. {
  133. $shortage = 4 - mb_strlen($line, 'utf-8') % 4;
  134. $line .= str_repeat(' ', $shortage);
  135. $line .= $part;
  136. }
  137. }
  138. $indent = 0;
  139. while (isset($line[$indent]) and $line[$indent] === ' ')
  140. {
  141. $indent ++;
  142. }
  143. $text = $indent > 0 ? substr($line, $indent) : $line;
  144. # ~
  145. $Line = array('body' => $line, 'indent' => $indent, 'text' => $text);
  146. # ~
  147. if (isset($CurrentBlock['continuable']))
  148. {
  149. $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock);
  150. if (isset($Block))
  151. {
  152. $CurrentBlock = $Block;
  153. continue;
  154. }
  155. else
  156. {
  157. if ($this->isBlockCompletable($CurrentBlock['type']))
  158. {
  159. $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
  160. }
  161. }
  162. }
  163. # ~
  164. $marker = $text[0];
  165. # ~
  166. $blockTypes = $this->unmarkedBlockTypes;
  167. if (isset($this->BlockTypes[$marker]))
  168. {
  169. foreach ($this->BlockTypes[$marker] as $blockType)
  170. {
  171. $blockTypes []= $blockType;
  172. }
  173. }
  174. #
  175. # ~
  176. foreach ($blockTypes as $blockType)
  177. {
  178. $Block = $this->{'block'.$blockType}($Line, $CurrentBlock);
  179. if (isset($Block))
  180. {
  181. $Block['type'] = $blockType;
  182. if ( ! isset($Block['identified']))
  183. {
  184. $Blocks []= $CurrentBlock;
  185. $Block['identified'] = true;
  186. }
  187. if ($this->isBlockContinuable($blockType))
  188. {
  189. $Block['continuable'] = true;
  190. }
  191. $CurrentBlock = $Block;
  192. continue 2;
  193. }
  194. }
  195. # ~
  196. if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted']))
  197. {
  198. $CurrentBlock['element']['text'] .= "\n".$text;
  199. }
  200. else
  201. {
  202. $Blocks []= $CurrentBlock;
  203. $CurrentBlock = $this->paragraph($Line);
  204. $CurrentBlock['identified'] = true;
  205. }
  206. }
  207. # ~
  208. if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type']))
  209. {
  210. $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
  211. }
  212. # ~
  213. $Blocks []= $CurrentBlock;
  214. unset($Blocks[0]);
  215. # ~
  216. $markup = '';
  217. foreach ($Blocks as $Block)
  218. {
  219. if (isset($Block['hidden']))
  220. {
  221. continue;
  222. }
  223. $markup .= "\n";
  224. $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']);
  225. }
  226. $markup .= "\n";
  227. # ~
  228. return $markup;
  229. }
  230. protected function isBlockContinuable($Type)
  231. {
  232. return method_exists($this, 'block'.$Type.'Continue');
  233. }
  234. protected function isBlockCompletable($Type)
  235. {
  236. return method_exists($this, 'block'.$Type.'Complete');
  237. }
  238. #
  239. # Code
  240. protected function blockCode($Line, $Block = null)
  241. {
  242. if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted']))
  243. {
  244. return;
  245. }
  246. if ($Line['indent'] >= 4)
  247. {
  248. $text = substr($Line['body'], 4);
  249. $Block = array(
  250. 'element' => array(
  251. 'name' => 'pre',
  252. 'handler' => 'element',
  253. 'text' => array(
  254. 'name' => 'code',
  255. 'text' => $text,
  256. ),
  257. ),
  258. );
  259. return $Block;
  260. }
  261. }
  262. protected function blockCodeContinue($Line, $Block)
  263. {
  264. if ($Line['indent'] >= 4)
  265. {
  266. if (isset($Block['interrupted']))
  267. {
  268. $Block['element']['text']['text'] .= "\n";
  269. unset($Block['interrupted']);
  270. }
  271. $Block['element']['text']['text'] .= "\n";
  272. $text = substr($Line['body'], 4);
  273. $Block['element']['text']['text'] .= $text;
  274. return $Block;
  275. }
  276. }
  277. protected function blockCodeComplete($Block)
  278. {
  279. $text = $Block['element']['text']['text'];
  280. $Block['element']['text']['text'] = $text;
  281. return $Block;
  282. }
  283. #
  284. # Comment
  285. protected function blockComment($Line)
  286. {
  287. if ($this->markupEscaped or $this->safeMode)
  288. {
  289. return;
  290. }
  291. if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!')
  292. {
  293. $Block = array(
  294. 'markup' => $Line['body'],
  295. );
  296. if (preg_match('/-->$/', $Line['text']))
  297. {
  298. $Block['closed'] = true;
  299. }
  300. return $Block;
  301. }
  302. }
  303. protected function blockCommentContinue($Line, array $Block)
  304. {
  305. if (isset($Block['closed']))
  306. {
  307. return;
  308. }
  309. $Block['markup'] .= "\n" . $Line['body'];
  310. if (preg_match('/-->$/', $Line['text']))
  311. {
  312. $Block['closed'] = true;
  313. }
  314. return $Block;
  315. }
  316. #
  317. # Fenced Code
  318. protected function blockFencedCode($Line)
  319. {
  320. if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches))
  321. {
  322. $Element = array(
  323. 'name' => 'code',
  324. 'text' => '',
  325. );
  326. if (isset($matches[1]))
  327. {
  328. /**
  329. * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes
  330. * Every HTML element may have a class attribute specified.
  331. * The attribute, if specified, must have a value that is a set
  332. * of space-separated tokens representing the various classes
  333. * that the element belongs to.
  334. * [...]
  335. * The space characters, for the purposes of this specification,
  336. * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab),
  337. * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and
  338. * U+000D CARRIAGE RETURN (CR).
  339. */
  340. $language = substr($matches[1], 0, strcspn($matches[1], " \t\n\f\r"));
  341. $class = 'language-'.$language;
  342. $Element['attributes'] = array(
  343. 'class' => $class,
  344. );
  345. }
  346. $Block = array(
  347. 'char' => $Line['text'][0],
  348. 'element' => array(
  349. 'name' => 'pre',
  350. 'handler' => 'element',
  351. 'text' => $Element,
  352. ),
  353. );
  354. return $Block;
  355. }
  356. }
  357. protected function blockFencedCodeContinue($Line, $Block)
  358. {
  359. if (isset($Block['complete']))
  360. {
  361. return;
  362. }
  363. if (isset($Block['interrupted']))
  364. {
  365. $Block['element']['text']['text'] .= "\n";
  366. unset($Block['interrupted']);
  367. }
  368. if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text']))
  369. {
  370. $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1);
  371. $Block['complete'] = true;
  372. return $Block;
  373. }
  374. $Block['element']['text']['text'] .= "\n".$Line['body'];
  375. return $Block;
  376. }
  377. protected function blockFencedCodeComplete($Block)
  378. {
  379. $text = $Block['element']['text']['text'];
  380. $Block['element']['text']['text'] = $text;
  381. return $Block;
  382. }
  383. #
  384. # Header
  385. protected function blockHeader($Line)
  386. {
  387. if (isset($Line['text'][1]))
  388. {
  389. $level = 1;
  390. while (isset($Line['text'][$level]) and $Line['text'][$level] === '#')
  391. {
  392. $level ++;
  393. }
  394. if ($level > 6)
  395. {
  396. return;
  397. }
  398. $text = trim($Line['text'], '# ');
  399. $Block = array(
  400. 'element' => array(
  401. 'name' => 'h' . min(6, $level),
  402. 'text' => $text,
  403. 'handler' => 'line',
  404. ),
  405. );
  406. return $Block;
  407. }
  408. }
  409. #
  410. # List
  411. protected function blockList($Line)
  412. {
  413. list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]');
  414. if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches))
  415. {
  416. $Block = array(
  417. 'indent' => $Line['indent'],
  418. 'pattern' => $pattern,
  419. 'element' => array(
  420. 'name' => $name,
  421. 'handler' => 'elements',
  422. ),
  423. );
  424. if($name === 'ol')
  425. {
  426. $listStart = stristr($matches[0], '.', true);
  427. if($listStart !== '1')
  428. {
  429. $Block['element']['attributes'] = array('start' => $listStart);
  430. }
  431. }
  432. $Block['li'] = array(
  433. 'name' => 'li',
  434. 'handler' => 'li',
  435. 'text' => array(
  436. $matches[2],
  437. ),
  438. );
  439. $Block['element']['text'] []= & $Block['li'];
  440. return $Block;
  441. }
  442. }
  443. protected function blockListContinue($Line, array $Block)
  444. {
  445. if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches))
  446. {
  447. if (isset($Block['interrupted']))
  448. {
  449. $Block['li']['text'] []= '';
  450. $Block['loose'] = true;
  451. unset($Block['interrupted']);
  452. }
  453. unset($Block['li']);
  454. $text = isset($matches[1]) ? $matches[1] : '';
  455. $Block['li'] = array(
  456. 'name' => 'li',
  457. 'handler' => 'li',
  458. 'text' => array(
  459. $text,
  460. ),
  461. );
  462. $Block['element']['text'] []= & $Block['li'];
  463. return $Block;
  464. }
  465. if ($Line['text'][0] === '[' and $this->blockReference($Line))
  466. {
  467. return $Block;
  468. }
  469. if ( ! isset($Block['interrupted']))
  470. {
  471. $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
  472. $Block['li']['text'] []= $text;
  473. return $Block;
  474. }
  475. if ($Line['indent'] > 0)
  476. {
  477. $Block['li']['text'] []= '';
  478. $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
  479. $Block['li']['text'] []= $text;
  480. unset($Block['interrupted']);
  481. return $Block;
  482. }
  483. }
  484. protected function blockListComplete(array $Block)
  485. {
  486. if (isset($Block['loose']))
  487. {
  488. foreach ($Block['element']['text'] as &$li)
  489. {
  490. if (end($li['text']) !== '')
  491. {
  492. $li['text'] []= '';
  493. }
  494. }
  495. }
  496. return $Block;
  497. }
  498. #
  499. # Quote
  500. protected function blockQuote($Line)
  501. {
  502. if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
  503. {
  504. $Block = array(
  505. 'element' => array(
  506. 'name' => 'blockquote',
  507. 'handler' => 'lines',
  508. 'text' => (array) $matches[1],
  509. ),
  510. );
  511. return $Block;
  512. }
  513. }
  514. protected function blockQuoteContinue($Line, array $Block)
  515. {
  516. if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
  517. {
  518. if (isset($Block['interrupted']))
  519. {
  520. $Block['element']['text'] []= '';
  521. unset($Block['interrupted']);
  522. }
  523. $Block['element']['text'] []= $matches[1];
  524. return $Block;
  525. }
  526. if ( ! isset($Block['interrupted']))
  527. {
  528. $Block['element']['text'] []= $Line['text'];
  529. return $Block;
  530. }
  531. }
  532. #
  533. # Rule
  534. protected function blockRule($Line)
  535. {
  536. if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text']))
  537. {
  538. $Block = array(
  539. 'element' => array(
  540. 'name' => 'hr'
  541. ),
  542. );
  543. return $Block;
  544. }
  545. }
  546. #
  547. # Setext
  548. protected function blockSetextHeader($Line, array $Block = null)
  549. {
  550. if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
  551. {
  552. return;
  553. }
  554. if (chop($Line['text'], $Line['text'][0]) === '')
  555. {
  556. $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2';
  557. return $Block;
  558. }
  559. }
  560. #
  561. # Markup
  562. protected function blockMarkup($Line)
  563. {
  564. if ($this->markupEscaped or $this->safeMode)
  565. {
  566. return;
  567. }
  568. if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
  569. {
  570. $element = strtolower($matches[1]);
  571. if (in_array($element, $this->textLevelElements))
  572. {
  573. return;
  574. }
  575. $Block = array(
  576. 'name' => $matches[1],
  577. 'depth' => 0,
  578. 'markup' => $Line['text'],
  579. );
  580. $length = strlen($matches[0]);
  581. $remainder = substr($Line['text'], $length);
  582. if (trim($remainder) === '')
  583. {
  584. if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
  585. {
  586. $Block['closed'] = true;
  587. $Block['void'] = true;
  588. }
  589. }
  590. else
  591. {
  592. if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
  593. {
  594. return;
  595. }
  596. if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder))
  597. {
  598. $Block['closed'] = true;
  599. }
  600. }
  601. return $Block;
  602. }
  603. }
  604. protected function blockMarkupContinue($Line, array $Block)
  605. {
  606. if (isset($Block['closed']))
  607. {
  608. return;
  609. }
  610. if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open
  611. {
  612. $Block['depth'] ++;
  613. }
  614. if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close
  615. {
  616. if ($Block['depth'] > 0)
  617. {
  618. $Block['depth'] --;
  619. }
  620. else
  621. {
  622. $Block['closed'] = true;
  623. }
  624. }
  625. if (isset($Block['interrupted']))
  626. {
  627. $Block['markup'] .= "\n";
  628. unset($Block['interrupted']);
  629. }
  630. $Block['markup'] .= "\n".$Line['body'];
  631. return $Block;
  632. }
  633. #
  634. # Reference
  635. protected function blockReference($Line)
  636. {
  637. if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches))
  638. {
  639. $id = strtolower($matches[1]);
  640. $Data = array(
  641. 'url' => $matches[2],
  642. 'title' => null,
  643. );
  644. if (isset($matches[3]))
  645. {
  646. $Data['title'] = $matches[3];
  647. }
  648. $this->DefinitionData['Reference'][$id] = $Data;
  649. $Block = array(
  650. 'hidden' => true,
  651. );
  652. return $Block;
  653. }
  654. }
  655. #
  656. # Table
  657. protected function blockTable($Line, array $Block = null)
  658. {
  659. if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
  660. {
  661. return;
  662. }
  663. if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '')
  664. {
  665. $alignments = array();
  666. $divider = $Line['text'];
  667. $divider = trim($divider);
  668. $divider = trim($divider, '|');
  669. $dividerCells = explode('|', $divider);
  670. foreach ($dividerCells as $dividerCell)
  671. {
  672. $dividerCell = trim($dividerCell);
  673. if ($dividerCell === '')
  674. {
  675. continue;
  676. }
  677. $alignment = null;
  678. if ($dividerCell[0] === ':')
  679. {
  680. $alignment = 'left';
  681. }
  682. if (substr($dividerCell, - 1) === ':')
  683. {
  684. $alignment = $alignment === 'left' ? 'center' : 'right';
  685. }
  686. $alignments []= $alignment;
  687. }
  688. # ~
  689. $HeaderElements = array();
  690. $header = $Block['element']['text'];
  691. $header = trim($header);
  692. $header = trim($header, '|');
  693. $headerCells = explode('|', $header);
  694. foreach ($headerCells as $index => $headerCell)
  695. {
  696. $headerCell = trim($headerCell);
  697. $HeaderElement = array(
  698. 'name' => 'th',
  699. 'text' => $headerCell,
  700. 'handler' => 'line',
  701. );
  702. if (isset($alignments[$index]))
  703. {
  704. $alignment = $alignments[$index];
  705. $HeaderElement['attributes'] = array(
  706. 'style' => 'text-align: '.$alignment.';',
  707. );
  708. }
  709. $HeaderElements []= $HeaderElement;
  710. }
  711. # ~
  712. $Block = array(
  713. 'alignments' => $alignments,
  714. 'identified' => true,
  715. 'element' => array(
  716. 'name' => 'table',
  717. 'handler' => 'elements',
  718. ),
  719. );
  720. $Block['element']['text'] []= array(
  721. 'name' => 'thead',
  722. 'handler' => 'elements',
  723. );
  724. $Block['element']['text'] []= array(
  725. 'name' => 'tbody',
  726. 'handler' => 'elements',
  727. 'text' => array(),
  728. );
  729. $Block['element']['text'][0]['text'] []= array(
  730. 'name' => 'tr',
  731. 'handler' => 'elements',
  732. 'text' => $HeaderElements,
  733. );
  734. return $Block;
  735. }
  736. }
  737. protected function blockTableContinue($Line, array $Block)
  738. {
  739. if (isset($Block['interrupted']))
  740. {
  741. return;
  742. }
  743. if ($Line['text'][0] === '|' or strpos($Line['text'], '|'))
  744. {
  745. $Elements = array();
  746. $row = $Line['text'];
  747. $row = trim($row);
  748. $row = trim($row, '|');
  749. preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches);
  750. foreach ($matches[0] as $index => $cell)
  751. {
  752. $cell = trim($cell);
  753. $Element = array(
  754. 'name' => 'td',
  755. 'handler' => 'line',
  756. 'text' => $cell,
  757. );
  758. if (isset($Block['alignments'][$index]))
  759. {
  760. $Element['attributes'] = array(
  761. 'style' => 'text-align: '.$Block['alignments'][$index].';',
  762. );
  763. }
  764. $Elements []= $Element;
  765. }
  766. $Element = array(
  767. 'name' => 'tr',
  768. 'handler' => 'elements',
  769. 'text' => $Elements,
  770. );
  771. $Block['element']['text'][1]['text'] []= $Element;
  772. return $Block;
  773. }
  774. }
  775. #
  776. # ~
  777. #
  778. protected function paragraph($Line)
  779. {
  780. $Block = array(
  781. 'element' => array(
  782. 'name' => 'p',
  783. 'text' => $Line['text'],
  784. 'handler' => 'line',
  785. ),
  786. );
  787. return $Block;
  788. }
  789. #
  790. # Inline Elements
  791. #
  792. protected $InlineTypes = array(
  793. '"' => array('SpecialCharacter'),
  794. '!' => array('Image'),
  795. '&' => array('SpecialCharacter'),
  796. '*' => array('Emphasis'),
  797. ':' => array('Url'),
  798. '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'),
  799. '>' => array('SpecialCharacter'),
  800. '[' => array('Link'),
  801. '_' => array('Emphasis'),
  802. '`' => array('Code'),
  803. '~' => array('Strikethrough'),
  804. '\\' => array('EscapeSequence'),
  805. );
  806. # ~
  807. protected $inlineMarkerList = '!"*_&[:<>`~\\';
  808. #
  809. # ~
  810. #
  811. public function line($text, $nonNestables=array())
  812. {
  813. $markup = '';
  814. # $excerpt is based on the first occurrence of a marker
  815. while ($excerpt = strpbrk($text, $this->inlineMarkerList))
  816. {
  817. $marker = $excerpt[0];
  818. $markerPosition = strpos($text, $marker);
  819. $Excerpt = array('text' => $excerpt, 'context' => $text);
  820. foreach ($this->InlineTypes[$marker] as $inlineType)
  821. {
  822. # check to see if the current inline type is nestable in the current context
  823. if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables))
  824. {
  825. continue;
  826. }
  827. $Inline = $this->{'inline'.$inlineType}($Excerpt);
  828. if ( ! isset($Inline))
  829. {
  830. continue;
  831. }
  832. # makes sure that the inline belongs to "our" marker
  833. if (isset($Inline['position']) and $Inline['position'] > $markerPosition)
  834. {
  835. continue;
  836. }
  837. # sets a default inline position
  838. if ( ! isset($Inline['position']))
  839. {
  840. $Inline['position'] = $markerPosition;
  841. }
  842. # cause the new element to 'inherit' our non nestables
  843. foreach ($nonNestables as $non_nestable)
  844. {
  845. $Inline['element']['nonNestables'][] = $non_nestable;
  846. }
  847. # the text that comes before the inline
  848. $unmarkedText = substr($text, 0, $Inline['position']);
  849. # compile the unmarked text
  850. $markup .= $this->unmarkedText($unmarkedText);
  851. # compile the inline
  852. $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']);
  853. # remove the examined text
  854. $text = substr($text, $Inline['position'] + $Inline['extent']);
  855. continue 2;
  856. }
  857. # the marker does not belong to an inline
  858. $unmarkedText = substr($text, 0, $markerPosition + 1);
  859. $markup .= $this->unmarkedText($unmarkedText);
  860. $text = substr($text, $markerPosition + 1);
  861. }
  862. $markup .= $this->unmarkedText($text);
  863. return $markup;
  864. }
  865. #
  866. # ~
  867. #
  868. protected function inlineCode($Excerpt)
  869. {
  870. $marker = $Excerpt['text'][0];
  871. if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
  872. {
  873. $text = $matches[2];
  874. $text = preg_replace("/[ ]*\n/", ' ', $text);
  875. return array(
  876. 'extent' => strlen($matches[0]),
  877. 'element' => array(
  878. 'name' => 'code',
  879. 'text' => $text,
  880. ),
  881. );
  882. }
  883. }
  884. protected function inlineEmailTag($Excerpt)
  885. {
  886. if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches))
  887. {
  888. $url = $matches[1];
  889. if ( ! isset($matches[2]))
  890. {
  891. $url = 'mailto:' . $url;
  892. }
  893. return array(
  894. 'extent' => strlen($matches[0]),
  895. 'element' => array(
  896. 'name' => 'a',
  897. 'text' => $matches[1],
  898. 'attributes' => array(
  899. 'href' => $url,
  900. ),
  901. ),
  902. );
  903. }
  904. }
  905. protected function inlineEmphasis($Excerpt)
  906. {
  907. if ( ! isset($Excerpt['text'][1]))
  908. {
  909. return;
  910. }
  911. $marker = $Excerpt['text'][0];
  912. if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches))
  913. {
  914. $emphasis = 'strong';
  915. }
  916. elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches))
  917. {
  918. $emphasis = 'em';
  919. }
  920. else
  921. {
  922. return;
  923. }
  924. return array(
  925. 'extent' => strlen($matches[0]),
  926. 'element' => array(
  927. 'name' => $emphasis,
  928. 'handler' => 'line',
  929. 'text' => $matches[1],
  930. ),
  931. );
  932. }
  933. protected function inlineEscapeSequence($Excerpt)
  934. {
  935. if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters))
  936. {
  937. return array(
  938. 'markup' => $Excerpt['text'][1],
  939. 'extent' => 2,
  940. );
  941. }
  942. }
  943. protected function inlineImage($Excerpt)
  944. {
  945. if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[')
  946. {
  947. return;
  948. }
  949. $Excerpt['text']= substr($Excerpt['text'], 1);
  950. $Link = $this->inlineLink($Excerpt);
  951. if ($Link === null)
  952. {
  953. return;
  954. }
  955. $Inline = array(
  956. 'extent' => $Link['extent'] + 1,
  957. 'element' => array(
  958. 'name' => 'img',
  959. 'attributes' => array(
  960. 'src' => $Link['element']['attributes']['href'],
  961. 'alt' => $Link['element']['text'],
  962. ),
  963. ),
  964. );
  965. $Inline['element']['attributes'] += $Link['element']['attributes'];
  966. unset($Inline['element']['attributes']['href']);
  967. return $Inline;
  968. }
  969. protected function inlineLink($Excerpt)
  970. {
  971. $Element = array(
  972. 'name' => 'a',
  973. 'handler' => 'line',
  974. 'nonNestables' => array('Url', 'Link'),
  975. 'text' => null,
  976. 'attributes' => array(
  977. 'href' => null,
  978. 'title' => null,
  979. ),
  980. );
  981. $extent = 0;
  982. $remainder = $Excerpt['text'];
  983. if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches))
  984. {
  985. $Element['text'] = $matches[1];
  986. $extent += strlen($matches[0]);
  987. $remainder = substr($remainder, $extent);
  988. }
  989. else
  990. {
  991. return;
  992. }
  993. if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches))
  994. {
  995. $Element['attributes']['href'] = $matches[1];
  996. if (isset($matches[2]))
  997. {
  998. $Element['attributes']['title'] = substr($matches[2], 1, - 1);
  999. }
  1000. $extent += strlen($matches[0]);
  1001. }
  1002. else
  1003. {
  1004. if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches))
  1005. {
  1006. $definition = strlen($matches[1]) ? $matches[1] : $Element['text'];
  1007. $definition = strtolower($definition);
  1008. $extent += strlen($matches[0]);
  1009. }
  1010. else
  1011. {
  1012. $definition = strtolower($Element['text']);
  1013. }
  1014. if ( ! isset($this->DefinitionData['Reference'][$definition]))
  1015. {
  1016. return;
  1017. }
  1018. $Definition = $this->DefinitionData['Reference'][$definition];
  1019. $Element['attributes']['href'] = $Definition['url'];
  1020. $Element['attributes']['title'] = $Definition['title'];
  1021. }
  1022. return array(
  1023. 'extent' => $extent,
  1024. 'element' => $Element,
  1025. );
  1026. }
  1027. protected function inlineMarkup($Excerpt)
  1028. {
  1029. if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false)
  1030. {
  1031. return;
  1032. }
  1033. if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches))
  1034. {
  1035. return array(
  1036. 'markup' => $matches[0],
  1037. 'extent' => strlen($matches[0]),
  1038. );
  1039. }
  1040. if ($Excerpt['text'][1] === '!' and preg_match('/^<!---?[^>-](?:-?[^-])*-->/s', $Excerpt['text'], $matches))
  1041. {
  1042. return array(
  1043. 'markup' => $matches[0],
  1044. 'extent' => strlen($matches[0]),
  1045. );
  1046. }
  1047. if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches))
  1048. {
  1049. return array(
  1050. 'markup' => $matches[0],
  1051. 'extent' => strlen($matches[0]),
  1052. );
  1053. }
  1054. }
  1055. protected function inlineSpecialCharacter($Excerpt)
  1056. {
  1057. if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text']))
  1058. {
  1059. return array(
  1060. 'markup' => '&amp;',
  1061. 'extent' => 1,
  1062. );
  1063. }
  1064. $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot');
  1065. if (isset($SpecialCharacter[$Excerpt['text'][0]]))
  1066. {
  1067. return array(
  1068. 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';',
  1069. 'extent' => 1,
  1070. );
  1071. }
  1072. }
  1073. protected function inlineStrikethrough($Excerpt)
  1074. {
  1075. if ( ! isset($Excerpt['text'][1]))
  1076. {
  1077. return;
  1078. }
  1079. if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches))
  1080. {
  1081. return array(
  1082. 'extent' => strlen($matches[0]),
  1083. 'element' => array(
  1084. 'name' => 'del',
  1085. 'text' => $matches[1],
  1086. 'handler' => 'line',
  1087. ),
  1088. );
  1089. }
  1090. }
  1091. protected function inlineUrl($Excerpt)
  1092. {
  1093. if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/')
  1094. {
  1095. return;
  1096. }
  1097. if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE))
  1098. {
  1099. $url = $matches[0][0];
  1100. $Inline = array(
  1101. 'extent' => strlen($matches[0][0]),
  1102. 'position' => $matches[0][1],
  1103. 'element' => array(
  1104. 'name' => 'a',
  1105. 'text' => $url,
  1106. 'attributes' => array(
  1107. 'href' => $url,
  1108. ),
  1109. ),
  1110. );
  1111. return $Inline;
  1112. }
  1113. }
  1114. protected function inlineUrlTag($Excerpt)
  1115. {
  1116. if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches))
  1117. {
  1118. $url = $matches[1];
  1119. return array(
  1120. 'extent' => strlen($matches[0]),
  1121. 'element' => array(
  1122. 'name' => 'a',
  1123. 'text' => $url,
  1124. 'attributes' => array(
  1125. 'href' => $url,
  1126. ),
  1127. ),
  1128. );
  1129. }
  1130. }
  1131. # ~
  1132. protected function unmarkedText($text)
  1133. {
  1134. if ($this->breaksEnabled)
  1135. {
  1136. $text = preg_replace('/[ ]*\n/', "<br />\n", $text);
  1137. }
  1138. else
  1139. {
  1140. $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "<br />\n", $text);
  1141. $text = str_replace(" \n", "\n", $text);
  1142. }
  1143. return $text;
  1144. }
  1145. #
  1146. # Handlers
  1147. #
  1148. protected function element(array $Element)
  1149. {
  1150. if ($this->safeMode)
  1151. {
  1152. $Element = $this->sanitiseElement($Element);
  1153. }
  1154. $markup = '<'.$Element['name'];
  1155. if (isset($Element['attributes']))
  1156. {
  1157. foreach ($Element['attributes'] as $name => $value)
  1158. {
  1159. if ($value === null)
  1160. {
  1161. continue;
  1162. }
  1163. $markup .= ' '.$name.'="'.self::escape($value).'"';
  1164. }
  1165. }
  1166. $permitRawHtml = false;
  1167. if (isset($Element['text']))
  1168. {
  1169. $text = $Element['text'];
  1170. }
  1171. // very strongly consider an alternative if you're writing an
  1172. // extension
  1173. elseif (isset($Element['rawHtml']))
  1174. {
  1175. $text = $Element['rawHtml'];
  1176. $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode'];
  1177. $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode;
  1178. }
  1179. if (isset($text))
  1180. {
  1181. $markup .= '>';
  1182. if (!isset($Element['nonNestables']))
  1183. {
  1184. $Element['nonNestables'] = array();
  1185. }
  1186. if (isset($Element['handler']))
  1187. {
  1188. $markup .= $this->{$Element['handler']}($text, $Element['nonNestables']);
  1189. }
  1190. elseif (!$permitRawHtml)
  1191. {
  1192. $markup .= self::escape($text, true);
  1193. }
  1194. else
  1195. {
  1196. $markup .= $text;
  1197. }
  1198. $markup .= '</'.$Element['name'].'>';
  1199. }
  1200. else
  1201. {
  1202. $markup .= ' />';
  1203. }
  1204. return $markup;
  1205. }
  1206. protected function elements(array $Elements)
  1207. {
  1208. $markup = '';
  1209. foreach ($Elements as $Element)
  1210. {
  1211. $markup .= "\n" . $this->element($Element);
  1212. }
  1213. $markup .= "\n";
  1214. return $markup;
  1215. }
  1216. # ~
  1217. protected function li($lines)
  1218. {
  1219. $markup = $this->lines($lines);
  1220. $trimmedMarkup = trim($markup);
  1221. if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '<p>')
  1222. {
  1223. $markup = $trimmedMarkup;
  1224. $markup = substr($markup, 3);
  1225. $position = strpos($markup, "</p>");
  1226. $markup = substr_replace($markup, '', $position, 4);
  1227. }
  1228. return $markup;
  1229. }
  1230. #
  1231. # Deprecated Methods
  1232. #
  1233. function parse($text)
  1234. {
  1235. $markup = $this->text($text);
  1236. return $markup;
  1237. }
  1238. protected function sanitiseElement(array $Element)
  1239. {
  1240. static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/';
  1241. static $safeUrlNameToAtt = array(
  1242. 'a' => 'href',
  1243. 'img' => 'src',
  1244. );
  1245. if (isset($safeUrlNameToAtt[$Element['name']]))
  1246. {
  1247. $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]);
  1248. }
  1249. if ( ! empty($Element['attributes']))
  1250. {
  1251. foreach ($Element['attributes'] as $att => $val)
  1252. {
  1253. # filter out badly parsed attribute
  1254. if ( ! preg_match($goodAttribute, $att))
  1255. {
  1256. unset($Element['attributes'][$att]);
  1257. }
  1258. # dump onevent attribute
  1259. elseif (self::striAtStart($att, 'on'))
  1260. {
  1261. unset($Element['attributes'][$att]);
  1262. }
  1263. }
  1264. }
  1265. return $Element;
  1266. }
  1267. protected function filterUnsafeUrlInAttribute(array $Element, $attribute)
  1268. {
  1269. foreach ($this->safeLinksWhitelist as $scheme)
  1270. {
  1271. if (self::striAtStart($Element['attributes'][$attribute], $scheme))
  1272. {
  1273. return $Element;
  1274. }
  1275. }
  1276. $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]);
  1277. return $Element;
  1278. }
  1279. #
  1280. # Static Methods
  1281. #
  1282. protected static function escape($text, $allowQuotes = false)
  1283. {
  1284. return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8');
  1285. }
  1286. protected static function striAtStart($string, $needle)
  1287. {
  1288. $len = strlen($needle);
  1289. if ($len > strlen($string))
  1290. {
  1291. return false;
  1292. }
  1293. else
  1294. {
  1295. return strtolower(substr($string, 0, $len)) === strtolower($needle);
  1296. }
  1297. }
  1298. static function instance($name = 'default')
  1299. {
  1300. if (isset(self::$instances[$name]))
  1301. {
  1302. return self::$instances[$name];
  1303. }
  1304. $instance = new static();
  1305. self::$instances[$name] = $instance;
  1306. return $instance;
  1307. }
  1308. private static $instances = array();
  1309. #
  1310. # Fields
  1311. #
  1312. protected $DefinitionData;
  1313. #
  1314. # Read-Only
  1315. protected $specialCharacters = array(
  1316. '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|',
  1317. );
  1318. protected $StrongRegex = array(
  1319. '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s',
  1320. '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us',
  1321. );
  1322. protected $EmRegex = array(
  1323. '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s',
  1324. '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us',
  1325. );
  1326. protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?';
  1327. protected $voidElements = array(
  1328. 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source',
  1329. );
  1330. protected $textLevelElements = array(
  1331. 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont',
  1332. 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing',
  1333. 'i', 'rp', 'del', 'code', 'strike', 'marquee',
  1334. 'q', 'rt', 'ins', 'font', 'strong',
  1335. 's', 'tt', 'kbd', 'mark',
  1336. 'u', 'xm', 'sub', 'nobr',
  1337. 'sup', 'ruby',
  1338. 'var', 'span',
  1339. 'wbr', 'time',
  1340. );
  1341. }