Parsedown.php 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528
  1. <?php
  2. #
  3. #
  4. # Parsedown
  5. # http://parsedown.org
  6. #
  7. # (c) Emanuil Rusev
  8. # http://erusev.com
  9. #
  10. # For the full license information, view the LICENSE file that was distributed
  11. # with this source code.
  12. #
  13. #
  14. class Parsedown
  15. {
  16. # ~
  17. const version = '1.6.0';
  18. # ~
  19. function text($text)
  20. {
  21. # make sure no definitions are set
  22. $this->DefinitionData = array();
  23. # standardize line breaks
  24. $text = str_replace(array("\r\n", "\r"), "\n", $text);
  25. # remove surrounding line breaks
  26. $text = trim($text, "\n");
  27. # split text into lines
  28. $lines = explode("\n", $text);
  29. # iterate through lines to identify blocks
  30. $markup = $this->lines($lines);
  31. # trim line breaks
  32. $markup = trim($markup, "\n");
  33. return $markup;
  34. }
  35. #
  36. # Setters
  37. #
  38. function setBreaksEnabled($breaksEnabled)
  39. {
  40. $this->breaksEnabled = $breaksEnabled;
  41. return $this;
  42. }
  43. protected $breaksEnabled;
  44. function setMarkupEscaped($markupEscaped)
  45. {
  46. $this->markupEscaped = $markupEscaped;
  47. return $this;
  48. }
  49. protected $markupEscaped;
  50. function setUrlsLinked($urlsLinked)
  51. {
  52. $this->urlsLinked = $urlsLinked;
  53. return $this;
  54. }
  55. protected $urlsLinked = true;
  56. #
  57. # Lines
  58. #
  59. protected $BlockTypes = array(
  60. '#' => array('Header'),
  61. '*' => array('Rule', 'List'),
  62. '+' => array('List'),
  63. '-' => array('SetextHeader', 'Table', 'Rule', 'List'),
  64. '0' => array('List'),
  65. '1' => array('List'),
  66. '2' => array('List'),
  67. '3' => array('List'),
  68. '4' => array('List'),
  69. '5' => array('List'),
  70. '6' => array('List'),
  71. '7' => array('List'),
  72. '8' => array('List'),
  73. '9' => array('List'),
  74. ':' => array('Table'),
  75. '<' => array('Comment', 'Markup'),
  76. '=' => array('SetextHeader'),
  77. '>' => array('Quote'),
  78. '[' => array('Reference'),
  79. '_' => array('Rule'),
  80. '`' => array('FencedCode'),
  81. '|' => array('Table'),
  82. '~' => array('FencedCode'),
  83. );
  84. # ~
  85. protected $unmarkedBlockTypes = array(
  86. 'Code',
  87. );
  88. #
  89. # Blocks
  90. #
  91. private function lines(array $lines)
  92. {
  93. $CurrentBlock = null;
  94. foreach ($lines as $line)
  95. {
  96. if (chop($line) === '')
  97. {
  98. if (isset($CurrentBlock))
  99. {
  100. $CurrentBlock['interrupted'] = true;
  101. }
  102. continue;
  103. }
  104. if (strpos($line, "\t") !== false)
  105. {
  106. $parts = explode("\t", $line);
  107. $line = $parts[0];
  108. unset($parts[0]);
  109. foreach ($parts as $part)
  110. {
  111. $shortage = 4 - mb_strlen($line, 'utf-8') % 4;
  112. $line .= str_repeat(' ', $shortage);
  113. $line .= $part;
  114. }
  115. }
  116. $indent = 0;
  117. while (isset($line[$indent]) and $line[$indent] === ' ')
  118. {
  119. $indent ++;
  120. }
  121. $text = $indent > 0 ? substr($line, $indent) : $line;
  122. # ~
  123. $Line = array('body' => $line, 'indent' => $indent, 'text' => $text);
  124. # ~
  125. if (isset($CurrentBlock['continuable']))
  126. {
  127. $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock);
  128. if (isset($Block))
  129. {
  130. $CurrentBlock = $Block;
  131. continue;
  132. }
  133. else
  134. {
  135. if (method_exists($this, 'block'.$CurrentBlock['type'].'Complete'))
  136. {
  137. $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
  138. }
  139. }
  140. }
  141. # ~
  142. $marker = $text[0];
  143. # ~
  144. $blockTypes = $this->unmarkedBlockTypes;
  145. if (isset($this->BlockTypes[$marker]))
  146. {
  147. foreach ($this->BlockTypes[$marker] as $blockType)
  148. {
  149. $blockTypes []= $blockType;
  150. }
  151. }
  152. #
  153. # ~
  154. foreach ($blockTypes as $blockType)
  155. {
  156. $Block = $this->{'block'.$blockType}($Line, $CurrentBlock);
  157. if (isset($Block))
  158. {
  159. $Block['type'] = $blockType;
  160. if ( ! isset($Block['identified']))
  161. {
  162. $Blocks []= $CurrentBlock;
  163. $Block['identified'] = true;
  164. }
  165. if (method_exists($this, 'block'.$blockType.'Continue'))
  166. {
  167. $Block['continuable'] = true;
  168. }
  169. $CurrentBlock = $Block;
  170. continue 2;
  171. }
  172. }
  173. # ~
  174. if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted']))
  175. {
  176. $CurrentBlock['element']['text'] .= "\n".$text;
  177. }
  178. else
  179. {
  180. $Blocks []= $CurrentBlock;
  181. $CurrentBlock = $this->paragraph($Line);
  182. $CurrentBlock['identified'] = true;
  183. }
  184. }
  185. # ~
  186. if (isset($CurrentBlock['continuable']) and method_exists($this, 'block'.$CurrentBlock['type'].'Complete'))
  187. {
  188. $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
  189. }
  190. # ~
  191. $Blocks []= $CurrentBlock;
  192. unset($Blocks[0]);
  193. # ~
  194. $markup = '';
  195. foreach ($Blocks as $Block)
  196. {
  197. if (isset($Block['hidden']))
  198. {
  199. continue;
  200. }
  201. $markup .= "\n";
  202. $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']);
  203. }
  204. $markup .= "\n";
  205. # ~
  206. return $markup;
  207. }
  208. #
  209. # Code
  210. protected function blockCode($Line, $Block = null)
  211. {
  212. if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted']))
  213. {
  214. return;
  215. }
  216. if ($Line['indent'] >= 4)
  217. {
  218. $text = substr($Line['body'], 4);
  219. $Block = array(
  220. 'element' => array(
  221. 'name' => 'pre',
  222. 'handler' => 'element',
  223. 'text' => array(
  224. 'name' => 'code',
  225. 'text' => $text,
  226. ),
  227. ),
  228. );
  229. return $Block;
  230. }
  231. }
  232. protected function blockCodeContinue($Line, $Block)
  233. {
  234. if ($Line['indent'] >= 4)
  235. {
  236. if (isset($Block['interrupted']))
  237. {
  238. $Block['element']['text']['text'] .= "\n";
  239. unset($Block['interrupted']);
  240. }
  241. $Block['element']['text']['text'] .= "\n";
  242. $text = substr($Line['body'], 4);
  243. $Block['element']['text']['text'] .= $text;
  244. return $Block;
  245. }
  246. }
  247. protected function blockCodeComplete($Block)
  248. {
  249. $text = $Block['element']['text']['text'];
  250. $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
  251. $Block['element']['text']['text'] = $text;
  252. return $Block;
  253. }
  254. #
  255. # Comment
  256. protected function blockComment($Line)
  257. {
  258. if ($this->markupEscaped)
  259. {
  260. return;
  261. }
  262. if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!')
  263. {
  264. $Block = array(
  265. 'markup' => $Line['body'],
  266. );
  267. if (preg_match('/-->$/', $Line['text']))
  268. {
  269. $Block['closed'] = true;
  270. }
  271. return $Block;
  272. }
  273. }
  274. protected function blockCommentContinue($Line, array $Block)
  275. {
  276. if (isset($Block['closed']))
  277. {
  278. return;
  279. }
  280. $Block['markup'] .= "\n" . $Line['body'];
  281. if (preg_match('/-->$/', $Line['text']))
  282. {
  283. $Block['closed'] = true;
  284. }
  285. return $Block;
  286. }
  287. #
  288. # Fenced Code
  289. protected function blockFencedCode($Line)
  290. {
  291. if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches))
  292. {
  293. $Element = array(
  294. 'name' => 'code',
  295. 'text' => '',
  296. );
  297. if (isset($matches[1]))
  298. {
  299. $class = 'language-'.$matches[1];
  300. $Element['attributes'] = array(
  301. 'class' => $class,
  302. );
  303. }
  304. $Block = array(
  305. 'char' => $Line['text'][0],
  306. 'element' => array(
  307. 'name' => 'pre',
  308. 'handler' => 'element',
  309. 'text' => $Element,
  310. ),
  311. );
  312. return $Block;
  313. }
  314. }
  315. protected function blockFencedCodeContinue($Line, $Block)
  316. {
  317. if (isset($Block['complete']))
  318. {
  319. return;
  320. }
  321. if (isset($Block['interrupted']))
  322. {
  323. $Block['element']['text']['text'] .= "\n";
  324. unset($Block['interrupted']);
  325. }
  326. if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text']))
  327. {
  328. $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1);
  329. $Block['complete'] = true;
  330. return $Block;
  331. }
  332. $Block['element']['text']['text'] .= "\n".$Line['body'];;
  333. return $Block;
  334. }
  335. protected function blockFencedCodeComplete($Block)
  336. {
  337. $text = $Block['element']['text']['text'];
  338. $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
  339. $Block['element']['text']['text'] = $text;
  340. return $Block;
  341. }
  342. #
  343. # Header
  344. protected function blockHeader($Line)
  345. {
  346. if (isset($Line['text'][1]))
  347. {
  348. $level = 1;
  349. while (isset($Line['text'][$level]) and $Line['text'][$level] === '#')
  350. {
  351. $level ++;
  352. }
  353. if ($level > 6)
  354. {
  355. return;
  356. }
  357. $text = trim($Line['text'], '# ');
  358. $Block = array(
  359. 'element' => array(
  360. 'name' => 'h' . min(6, $level),
  361. 'text' => $text,
  362. 'handler' => 'line',
  363. ),
  364. );
  365. return $Block;
  366. }
  367. }
  368. #
  369. # List
  370. protected function blockList($Line)
  371. {
  372. list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]');
  373. if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches))
  374. {
  375. $Block = array(
  376. 'indent' => $Line['indent'],
  377. 'pattern' => $pattern,
  378. 'element' => array(
  379. 'name' => $name,
  380. 'handler' => 'elements',
  381. ),
  382. );
  383. $Block['li'] = array(
  384. 'name' => 'li',
  385. 'handler' => 'li',
  386. 'text' => array(
  387. $matches[2],
  388. ),
  389. );
  390. $Block['element']['text'] []= & $Block['li'];
  391. return $Block;
  392. }
  393. }
  394. protected function blockListContinue($Line, array $Block)
  395. {
  396. if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches))
  397. {
  398. if (isset($Block['interrupted']))
  399. {
  400. $Block['li']['text'] []= '';
  401. unset($Block['interrupted']);
  402. }
  403. unset($Block['li']);
  404. $text = isset($matches[1]) ? $matches[1] : '';
  405. $Block['li'] = array(
  406. 'name' => 'li',
  407. 'handler' => 'li',
  408. 'text' => array(
  409. $text,
  410. ),
  411. );
  412. $Block['element']['text'] []= & $Block['li'];
  413. return $Block;
  414. }
  415. if ($Line['text'][0] === '[' and $this->blockReference($Line))
  416. {
  417. return $Block;
  418. }
  419. if ( ! isset($Block['interrupted']))
  420. {
  421. $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
  422. $Block['li']['text'] []= $text;
  423. return $Block;
  424. }
  425. if ($Line['indent'] > 0)
  426. {
  427. $Block['li']['text'] []= '';
  428. $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
  429. $Block['li']['text'] []= $text;
  430. unset($Block['interrupted']);
  431. return $Block;
  432. }
  433. }
  434. #
  435. # Quote
  436. protected function blockQuote($Line)
  437. {
  438. if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
  439. {
  440. $Block = array(
  441. 'element' => array(
  442. 'name' => 'blockquote',
  443. 'handler' => 'lines',
  444. 'text' => (array) $matches[1],
  445. ),
  446. );
  447. return $Block;
  448. }
  449. }
  450. protected function blockQuoteContinue($Line, array $Block)
  451. {
  452. if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
  453. {
  454. if (isset($Block['interrupted']))
  455. {
  456. $Block['element']['text'] []= '';
  457. unset($Block['interrupted']);
  458. }
  459. $Block['element']['text'] []= $matches[1];
  460. return $Block;
  461. }
  462. if ( ! isset($Block['interrupted']))
  463. {
  464. $Block['element']['text'] []= $Line['text'];
  465. return $Block;
  466. }
  467. }
  468. #
  469. # Rule
  470. protected function blockRule($Line)
  471. {
  472. if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text']))
  473. {
  474. $Block = array(
  475. 'element' => array(
  476. 'name' => 'hr'
  477. ),
  478. );
  479. return $Block;
  480. }
  481. }
  482. #
  483. # Setext
  484. protected function blockSetextHeader($Line, array $Block = null)
  485. {
  486. if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
  487. {
  488. return;
  489. }
  490. if (chop($Line['text'], $Line['text'][0]) === '')
  491. {
  492. $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2';
  493. return $Block;
  494. }
  495. }
  496. #
  497. # Markup
  498. protected function blockMarkup($Line)
  499. {
  500. if ($this->markupEscaped)
  501. {
  502. return;
  503. }
  504. if (preg_match('/^<(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
  505. {
  506. $element = strtolower($matches[1]);
  507. if (in_array($element, $this->textLevelElements))
  508. {
  509. return;
  510. }
  511. $Block = array(
  512. 'name' => $matches[1],
  513. 'depth' => 0,
  514. 'markup' => $Line['text'],
  515. );
  516. $length = strlen($matches[0]);
  517. $remainder = substr($Line['text'], $length);
  518. if (trim($remainder) === '')
  519. {
  520. if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
  521. {
  522. $Block['closed'] = true;
  523. $Block['void'] = true;
  524. }
  525. }
  526. else
  527. {
  528. if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
  529. {
  530. return;
  531. }
  532. if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder))
  533. {
  534. $Block['closed'] = true;
  535. }
  536. }
  537. return $Block;
  538. }
  539. }
  540. protected function blockMarkupContinue($Line, array $Block)
  541. {
  542. if (isset($Block['closed']))
  543. {
  544. return;
  545. }
  546. if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open
  547. {
  548. $Block['depth'] ++;
  549. }
  550. if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close
  551. {
  552. if ($Block['depth'] > 0)
  553. {
  554. $Block['depth'] --;
  555. }
  556. else
  557. {
  558. $Block['closed'] = true;
  559. }
  560. }
  561. if (isset($Block['interrupted']))
  562. {
  563. $Block['markup'] .= "\n";
  564. unset($Block['interrupted']);
  565. }
  566. $Block['markup'] .= "\n".$Line['body'];
  567. return $Block;
  568. }
  569. #
  570. # Reference
  571. protected function blockReference($Line)
  572. {
  573. if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches))
  574. {
  575. $id = strtolower($matches[1]);
  576. $Data = array(
  577. 'url' => $matches[2],
  578. 'title' => null,
  579. );
  580. if (isset($matches[3]))
  581. {
  582. $Data['title'] = $matches[3];
  583. }
  584. $this->DefinitionData['Reference'][$id] = $Data;
  585. $Block = array(
  586. 'hidden' => true,
  587. );
  588. return $Block;
  589. }
  590. }
  591. #
  592. # Table
  593. protected function blockTable($Line, array $Block = null)
  594. {
  595. if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
  596. {
  597. return;
  598. }
  599. if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '')
  600. {
  601. $alignments = array();
  602. $divider = $Line['text'];
  603. $divider = trim($divider);
  604. $divider = trim($divider, '|');
  605. $dividerCells = explode('|', $divider);
  606. foreach ($dividerCells as $dividerCell)
  607. {
  608. $dividerCell = trim($dividerCell);
  609. if ($dividerCell === '')
  610. {
  611. continue;
  612. }
  613. $alignment = null;
  614. if ($dividerCell[0] === ':')
  615. {
  616. $alignment = 'left';
  617. }
  618. if (substr($dividerCell, - 1) === ':')
  619. {
  620. $alignment = $alignment === 'left' ? 'center' : 'right';
  621. }
  622. $alignments []= $alignment;
  623. }
  624. # ~
  625. $HeaderElements = array();
  626. $header = $Block['element']['text'];
  627. $header = trim($header);
  628. $header = trim($header, '|');
  629. $headerCells = explode('|', $header);
  630. foreach ($headerCells as $index => $headerCell)
  631. {
  632. $headerCell = trim($headerCell);
  633. $HeaderElement = array(
  634. 'name' => 'th',
  635. 'text' => $headerCell,
  636. 'handler' => 'line',
  637. );
  638. if (isset($alignments[$index]))
  639. {
  640. $alignment = $alignments[$index];
  641. $HeaderElement['attributes'] = array(
  642. 'style' => 'text-align: '.$alignment.';',
  643. );
  644. }
  645. $HeaderElements []= $HeaderElement;
  646. }
  647. # ~
  648. $Block = array(
  649. 'alignments' => $alignments,
  650. 'identified' => true,
  651. 'element' => array(
  652. 'name' => 'table',
  653. 'handler' => 'elements',
  654. ),
  655. );
  656. $Block['element']['text'] []= array(
  657. 'name' => 'thead',
  658. 'handler' => 'elements',
  659. );
  660. $Block['element']['text'] []= array(
  661. 'name' => 'tbody',
  662. 'handler' => 'elements',
  663. 'text' => array(),
  664. );
  665. $Block['element']['text'][0]['text'] []= array(
  666. 'name' => 'tr',
  667. 'handler' => 'elements',
  668. 'text' => $HeaderElements,
  669. );
  670. return $Block;
  671. }
  672. }
  673. protected function blockTableContinue($Line, array $Block)
  674. {
  675. if (isset($Block['interrupted']))
  676. {
  677. return;
  678. }
  679. if ($Line['text'][0] === '|' or strpos($Line['text'], '|'))
  680. {
  681. $Elements = array();
  682. $row = $Line['text'];
  683. $row = trim($row);
  684. $row = trim($row, '|');
  685. preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches);
  686. foreach ($matches[0] as $index => $cell)
  687. {
  688. $cell = trim($cell);
  689. $Element = array(
  690. 'name' => 'td',
  691. 'handler' => 'line',
  692. 'text' => $cell,
  693. );
  694. if (isset($Block['alignments'][$index]))
  695. {
  696. $Element['attributes'] = array(
  697. 'style' => 'text-align: '.$Block['alignments'][$index].';',
  698. );
  699. }
  700. $Elements []= $Element;
  701. }
  702. $Element = array(
  703. 'name' => 'tr',
  704. 'handler' => 'elements',
  705. 'text' => $Elements,
  706. );
  707. $Block['element']['text'][1]['text'] []= $Element;
  708. return $Block;
  709. }
  710. }
  711. #
  712. # ~
  713. #
  714. protected function paragraph($Line)
  715. {
  716. $Block = array(
  717. 'element' => array(
  718. 'name' => 'p',
  719. 'text' => $Line['text'],
  720. 'handler' => 'line',
  721. ),
  722. );
  723. return $Block;
  724. }
  725. #
  726. # Inline Elements
  727. #
  728. protected $InlineTypes = array(
  729. '"' => array('SpecialCharacter'),
  730. '!' => array('Image'),
  731. '&' => array('SpecialCharacter'),
  732. '*' => array('Emphasis'),
  733. ':' => array('Url'),
  734. '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'),
  735. '>' => array('SpecialCharacter'),
  736. '[' => array('Link'),
  737. '_' => array('Emphasis'),
  738. '`' => array('Code'),
  739. '~' => array('Strikethrough'),
  740. '\\' => array('EscapeSequence'),
  741. );
  742. # ~
  743. protected $inlineMarkerList = '!"*_&[:<>`~\\';
  744. #
  745. # ~
  746. #
  747. public function line($text)
  748. {
  749. $markup = '';
  750. # $excerpt is based on the first occurrence of a marker
  751. while ($excerpt = strpbrk($text, $this->inlineMarkerList))
  752. {
  753. $marker = $excerpt[0];
  754. $markerPosition = strpos($text, $marker);
  755. $Excerpt = array('text' => $excerpt, 'context' => $text);
  756. foreach ($this->InlineTypes[$marker] as $inlineType)
  757. {
  758. $Inline = $this->{'inline'.$inlineType}($Excerpt);
  759. if ( ! isset($Inline))
  760. {
  761. continue;
  762. }
  763. # makes sure that the inline belongs to "our" marker
  764. if (isset($Inline['position']) and $Inline['position'] > $markerPosition)
  765. {
  766. continue;
  767. }
  768. # sets a default inline position
  769. if ( ! isset($Inline['position']))
  770. {
  771. $Inline['position'] = $markerPosition;
  772. }
  773. # the text that comes before the inline
  774. $unmarkedText = substr($text, 0, $Inline['position']);
  775. # compile the unmarked text
  776. $markup .= $this->unmarkedText($unmarkedText);
  777. # compile the inline
  778. $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']);
  779. # remove the examined text
  780. $text = substr($text, $Inline['position'] + $Inline['extent']);
  781. continue 2;
  782. }
  783. # the marker does not belong to an inline
  784. $unmarkedText = substr($text, 0, $markerPosition + 1);
  785. $markup .= $this->unmarkedText($unmarkedText);
  786. $text = substr($text, $markerPosition + 1);
  787. }
  788. $markup .= $this->unmarkedText($text);
  789. return $markup;
  790. }
  791. #
  792. # ~
  793. #
  794. protected function inlineCode($Excerpt)
  795. {
  796. $marker = $Excerpt['text'][0];
  797. if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
  798. {
  799. $text = $matches[2];
  800. $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
  801. $text = preg_replace("/[ ]*\n/", ' ', $text);
  802. return array(
  803. 'extent' => strlen($matches[0]),
  804. 'element' => array(
  805. 'name' => 'code',
  806. 'text' => $text,
  807. ),
  808. );
  809. }
  810. }
  811. protected function inlineEmailTag($Excerpt)
  812. {
  813. if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches))
  814. {
  815. $url = $matches[1];
  816. if ( ! isset($matches[2]))
  817. {
  818. $url = 'mailto:' . $url;
  819. }
  820. return array(
  821. 'extent' => strlen($matches[0]),
  822. 'element' => array(
  823. 'name' => 'a',
  824. 'text' => $matches[1],
  825. 'attributes' => array(
  826. 'href' => $url,
  827. ),
  828. ),
  829. );
  830. }
  831. }
  832. protected function inlineEmphasis($Excerpt)
  833. {
  834. if ( ! isset($Excerpt['text'][1]))
  835. {
  836. return;
  837. }
  838. $marker = $Excerpt['text'][0];
  839. if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches))
  840. {
  841. $emphasis = 'strong';
  842. }
  843. elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches))
  844. {
  845. $emphasis = 'em';
  846. }
  847. else
  848. {
  849. return;
  850. }
  851. return array(
  852. 'extent' => strlen($matches[0]),
  853. 'element' => array(
  854. 'name' => $emphasis,
  855. 'handler' => 'line',
  856. 'text' => $matches[1],
  857. ),
  858. );
  859. }
  860. protected function inlineEscapeSequence($Excerpt)
  861. {
  862. if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters))
  863. {
  864. return array(
  865. 'markup' => $Excerpt['text'][1],
  866. 'extent' => 2,
  867. );
  868. }
  869. }
  870. protected function inlineImage($Excerpt)
  871. {
  872. if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[')
  873. {
  874. return;
  875. }
  876. $Excerpt['text']= substr($Excerpt['text'], 1);
  877. $Link = $this->inlineLink($Excerpt);
  878. if ($Link === null)
  879. {
  880. return;
  881. }
  882. $Inline = array(
  883. 'extent' => $Link['extent'] + 1,
  884. 'element' => array(
  885. 'name' => 'img',
  886. 'attributes' => array(
  887. 'src' => $Link['element']['attributes']['href'],
  888. 'alt' => $Link['element']['text'],
  889. ),
  890. ),
  891. );
  892. $Inline['element']['attributes'] += $Link['element']['attributes'];
  893. unset($Inline['element']['attributes']['href']);
  894. return $Inline;
  895. }
  896. protected function inlineLink($Excerpt)
  897. {
  898. $Element = array(
  899. 'name' => 'a',
  900. 'handler' => 'line',
  901. 'text' => null,
  902. 'attributes' => array(
  903. 'href' => null,
  904. 'title' => null,
  905. ),
  906. );
  907. $extent = 0;
  908. $remainder = $Excerpt['text'];
  909. if (preg_match('/\[((?:[^][]|(?R))*)\]/', $remainder, $matches))
  910. {
  911. $Element['text'] = $matches[1];
  912. $extent += strlen($matches[0]);
  913. $remainder = substr($remainder, $extent);
  914. }
  915. else
  916. {
  917. return;
  918. }
  919. if (preg_match('/^[(]((?:[^ ()]|[(][^ )]+[)])+)(?:[ ]+("[^"]*"|\'[^\']*\'))?[)]/', $remainder, $matches))
  920. {
  921. $Element['attributes']['href'] = $matches[1];
  922. if (isset($matches[2]))
  923. {
  924. $Element['attributes']['title'] = substr($matches[2], 1, - 1);
  925. }
  926. $extent += strlen($matches[0]);
  927. }
  928. else
  929. {
  930. if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches))
  931. {
  932. $definition = strlen($matches[1]) ? $matches[1] : $Element['text'];
  933. $definition = strtolower($definition);
  934. $extent += strlen($matches[0]);
  935. }
  936. else
  937. {
  938. $definition = strtolower($Element['text']);
  939. }
  940. if ( ! isset($this->DefinitionData['Reference'][$definition]))
  941. {
  942. return;
  943. }
  944. $Definition = $this->DefinitionData['Reference'][$definition];
  945. $Element['attributes']['href'] = $Definition['url'];
  946. $Element['attributes']['title'] = $Definition['title'];
  947. }
  948. $Element['attributes']['href'] = str_replace(array('&', '<'), array('&amp;', '&lt;'), $Element['attributes']['href']);
  949. return array(
  950. 'extent' => $extent,
  951. 'element' => $Element,
  952. );
  953. }
  954. protected function inlineMarkup($Excerpt)
  955. {
  956. if ($this->markupEscaped or strpos($Excerpt['text'], '>') === false)
  957. {
  958. return;
  959. }
  960. if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w*[ ]*>/s', $Excerpt['text'], $matches))
  961. {
  962. return array(
  963. 'markup' => $matches[0],
  964. 'extent' => strlen($matches[0]),
  965. );
  966. }
  967. if ($Excerpt['text'][1] === '!' and preg_match('/^<!---?[^>-](?:-?[^-])*-->/s', $Excerpt['text'], $matches))
  968. {
  969. return array(
  970. 'markup' => $matches[0],
  971. 'extent' => strlen($matches[0]),
  972. );
  973. }
  974. if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches))
  975. {
  976. return array(
  977. 'markup' => $matches[0],
  978. 'extent' => strlen($matches[0]),
  979. );
  980. }
  981. }
  982. protected function inlineSpecialCharacter($Excerpt)
  983. {
  984. if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text']))
  985. {
  986. return array(
  987. 'markup' => '&amp;',
  988. 'extent' => 1,
  989. );
  990. }
  991. $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot');
  992. if (isset($SpecialCharacter[$Excerpt['text'][0]]))
  993. {
  994. return array(
  995. 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';',
  996. 'extent' => 1,
  997. );
  998. }
  999. }
  1000. protected function inlineStrikethrough($Excerpt)
  1001. {
  1002. if ( ! isset($Excerpt['text'][1]))
  1003. {
  1004. return;
  1005. }
  1006. if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches))
  1007. {
  1008. return array(
  1009. 'extent' => strlen($matches[0]),
  1010. 'element' => array(
  1011. 'name' => 'del',
  1012. 'text' => $matches[1],
  1013. 'handler' => 'line',
  1014. ),
  1015. );
  1016. }
  1017. }
  1018. protected function inlineUrl($Excerpt)
  1019. {
  1020. if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/')
  1021. {
  1022. return;
  1023. }
  1024. if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE))
  1025. {
  1026. $Inline = array(
  1027. 'extent' => strlen($matches[0][0]),
  1028. 'position' => $matches[0][1],
  1029. 'element' => array(
  1030. 'name' => 'a',
  1031. 'text' => $matches[0][0],
  1032. 'attributes' => array(
  1033. 'href' => $matches[0][0],
  1034. ),
  1035. ),
  1036. );
  1037. return $Inline;
  1038. }
  1039. }
  1040. protected function inlineUrlTag($Excerpt)
  1041. {
  1042. if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches))
  1043. {
  1044. $url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $matches[1]);
  1045. return array(
  1046. 'extent' => strlen($matches[0]),
  1047. 'element' => array(
  1048. 'name' => 'a',
  1049. 'text' => $url,
  1050. 'attributes' => array(
  1051. 'href' => $url,
  1052. ),
  1053. ),
  1054. );
  1055. }
  1056. }
  1057. # ~
  1058. protected function unmarkedText($text)
  1059. {
  1060. if ($this->breaksEnabled)
  1061. {
  1062. $text = preg_replace('/[ ]*\n/', "<br />\n", $text);
  1063. }
  1064. else
  1065. {
  1066. $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "<br />\n", $text);
  1067. $text = str_replace(" \n", "\n", $text);
  1068. }
  1069. return $text;
  1070. }
  1071. #
  1072. # Handlers
  1073. #
  1074. protected function element(array $Element)
  1075. {
  1076. $markup = '<'.$Element['name'];
  1077. if (isset($Element['attributes']))
  1078. {
  1079. foreach ($Element['attributes'] as $name => $value)
  1080. {
  1081. if ($value === null)
  1082. {
  1083. continue;
  1084. }
  1085. $markup .= ' '.$name.'="'.$value.'"';
  1086. }
  1087. }
  1088. if (isset($Element['text']))
  1089. {
  1090. $markup .= '>';
  1091. if (isset($Element['handler']))
  1092. {
  1093. $markup .= $this->{$Element['handler']}($Element['text']);
  1094. }
  1095. else
  1096. {
  1097. $markup .= $Element['text'];
  1098. }
  1099. $markup .= '</'.$Element['name'].'>';
  1100. }
  1101. else
  1102. {
  1103. $markup .= ' />';
  1104. }
  1105. return $markup;
  1106. }
  1107. protected function elements(array $Elements)
  1108. {
  1109. $markup = '';
  1110. foreach ($Elements as $Element)
  1111. {
  1112. $markup .= "\n" . $this->element($Element);
  1113. }
  1114. $markup .= "\n";
  1115. return $markup;
  1116. }
  1117. # ~
  1118. protected function li($lines)
  1119. {
  1120. $markup = $this->lines($lines);
  1121. $trimmedMarkup = trim($markup);
  1122. if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '<p>')
  1123. {
  1124. $markup = $trimmedMarkup;
  1125. $markup = substr($markup, 3);
  1126. $position = strpos($markup, "</p>");
  1127. $markup = substr_replace($markup, '', $position, 4);
  1128. }
  1129. return $markup;
  1130. }
  1131. #
  1132. # Deprecated Methods
  1133. #
  1134. function parse($text)
  1135. {
  1136. $markup = $this->text($text);
  1137. return $markup;
  1138. }
  1139. #
  1140. # Static Methods
  1141. #
  1142. static function instance($name = 'default')
  1143. {
  1144. if (isset(self::$instances[$name]))
  1145. {
  1146. return self::$instances[$name];
  1147. }
  1148. $instance = new static();
  1149. self::$instances[$name] = $instance;
  1150. return $instance;
  1151. }
  1152. private static $instances = array();
  1153. #
  1154. # Fields
  1155. #
  1156. protected $DefinitionData;
  1157. #
  1158. # Read-Only
  1159. protected $specialCharacters = array(
  1160. '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|',
  1161. );
  1162. protected $StrongRegex = array(
  1163. '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s',
  1164. '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us',
  1165. );
  1166. protected $EmRegex = array(
  1167. '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s',
  1168. '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us',
  1169. );
  1170. protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?';
  1171. protected $voidElements = array(
  1172. 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source',
  1173. );
  1174. protected $textLevelElements = array(
  1175. 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont',
  1176. 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing',
  1177. 'i', 'rp', 'del', 'code', 'strike', 'marquee',
  1178. 'q', 'rt', 'ins', 'font', 'strong',
  1179. 's', 'tt', 'sub', 'mark',
  1180. 'u', 'xm', 'sup', 'nobr',
  1181. 'var', 'ruby',
  1182. 'wbr', 'span',
  1183. 'time',
  1184. );
  1185. }