250 lines
6.7KB

  1. <?php
  2. namespace cebe\markdown;
  3. use cebe\markdown\block\TableTrait;
  4. // work around https://github.com/facebook/hhvm/issues/1120
  5. defined('ENT_HTML401') || define('ENT_HTML401', 0);
  6. /**
  7. * Markdown parser for the [markdown extra](http://michelf.ca/projects/php-markdown/extra/) flavor.
  8. *
  9. * @author Carsten Brandt <mail@cebe.cc>
  10. * @license https://github.com/cebe/markdown/blob/master/LICENSE
  11. * @link https://github.com/cebe/markdown#readme
  12. */
  13. class MarkdownExtra extends Markdown
  14. {
  15. // include block element parsing using traits
  16. use block\TableTrait;
  17. use block\FencedCodeTrait;
  18. // include inline element parsing using traits
  19. // TODO
  20. /**
  21. * @var bool whether special attributes on code blocks should be applied on the `<pre>` element.
  22. * The default behavior is to put them on the `<code>` element.
  23. */
  24. public $codeAttributesOnPre = false;
  25. /**
  26. * @inheritDoc
  27. */
  28. protected $escapeCharacters = [
  29. // from Markdown
  30. '\\', // backslash
  31. '`', // backtick
  32. '*', // asterisk
  33. '_', // underscore
  34. '{', '}', // curly braces
  35. '[', ']', // square brackets
  36. '(', ')', // parentheses
  37. '#', // hash mark
  38. '+', // plus sign
  39. '-', // minus sign (hyphen)
  40. '.', // dot
  41. '!', // exclamation mark
  42. '<', '>',
  43. // added by MarkdownExtra
  44. ':', // colon
  45. '|', // pipe
  46. ];
  47. private $_specialAttributesRegex = '\{(([#\.][A-z0-9-_]+\s*)+)\}';
  48. // TODO allow HTML intended 3 spaces
  49. // TODO add markdown inside HTML blocks
  50. // TODO implement definition lists
  51. // TODO implement footnotes
  52. // TODO implement Abbreviations
  53. // block parsing
  54. protected function identifyReference($line)
  55. {
  56. return ($line[0] === ' ' || $line[0] === '[') && preg_match('/^ {0,3}\[(.+?)\]:\s*([^\s]+?)(?:\s+[\'"](.+?)[\'"])?\s*('.$this->_specialAttributesRegex.')?\s*$/', $line);
  57. }
  58. /**
  59. * Consume link references
  60. */
  61. protected function consumeReference($lines, $current)
  62. {
  63. while (isset($lines[$current]) && preg_match('/^ {0,3}\[(.+?)\]:\s*(.+?)(?:\s+[\(\'"](.+?)[\)\'"])?\s*('.$this->_specialAttributesRegex.')?\s*$/', $lines[$current], $matches)) {
  64. $label = strtolower($matches[1]);
  65. $this->references[$label] = [
  66. 'url' => $this->replaceEscape($matches[2]),
  67. ];
  68. if (isset($matches[3])) {
  69. $this->references[$label]['title'] = $matches[3];
  70. } else {
  71. // title may be on the next line
  72. if (isset($lines[$current + 1]) && preg_match('/^\s+[\(\'"](.+?)[\)\'"]\s*$/', $lines[$current + 1], $matches)) {
  73. $this->references[$label]['title'] = $matches[1];
  74. $current++;
  75. }
  76. }
  77. if (isset($matches[5])) {
  78. $this->references[$label]['attributes'] = $matches[5];
  79. }
  80. $current++;
  81. }
  82. return [false, --$current];
  83. }
  84. /**
  85. * Consume lines for a fenced code block
  86. */
  87. protected function consumeFencedCode($lines, $current)
  88. {
  89. // consume until ```
  90. $block = [
  91. 'code',
  92. ];
  93. $line = rtrim($lines[$current]);
  94. if (($pos = strrpos($line, '`')) === false) {
  95. $pos = strrpos($line, '~');
  96. }
  97. $fence = substr($line, 0, $pos + 1);
  98. $block['attributes'] = substr($line, $pos);
  99. $content = [];
  100. for($i = $current + 1, $count = count($lines); $i < $count; $i++) {
  101. if (rtrim($line = $lines[$i]) !== $fence) {
  102. $content[] = $line;
  103. } else {
  104. break;
  105. }
  106. }
  107. $block['content'] = implode("\n", $content);
  108. return [$block, $i];
  109. }
  110. protected function renderCode($block)
  111. {
  112. $attributes = $this->renderAttributes($block);
  113. return ($this->codeAttributesOnPre ? "<pre$attributes><code>" : "<pre><code$attributes>")
  114. . htmlspecialchars($block['content'] . "\n", ENT_NOQUOTES | ENT_SUBSTITUTE, 'UTF-8')
  115. . "</code></pre>\n";
  116. }
  117. /**
  118. * Renders a headline
  119. */
  120. protected function renderHeadline($block)
  121. {
  122. foreach($block['content'] as $i => $element) {
  123. if ($element[0] === 'specialAttributes') {
  124. unset($block['content'][$i]);
  125. $block['attributes'] = $element[1];
  126. }
  127. }
  128. $tag = 'h' . $block['level'];
  129. $attributes = $this->renderAttributes($block);
  130. return "<$tag$attributes>" . rtrim($this->renderAbsy($block['content']), "# \t") . "</$tag>\n";
  131. }
  132. protected function renderAttributes($block)
  133. {
  134. $html = [];
  135. if (isset($block['attributes'])) {
  136. $attributes = preg_split('/\s+/', $block['attributes'], -1, PREG_SPLIT_NO_EMPTY);
  137. foreach($attributes as $attribute) {
  138. if ($attribute[0] === '#') {
  139. $html['id'] = substr($attribute, 1);
  140. } else {
  141. $html['class'][] = substr($attribute, 1);
  142. }
  143. }
  144. }
  145. $result = '';
  146. foreach($html as $attr => $value) {
  147. if (is_array($value)) {
  148. $value = trim(implode(' ', $value));
  149. }
  150. if (!empty($value)) {
  151. $result .= " $attr=\"$value\"";
  152. }
  153. }
  154. return $result;
  155. }
  156. // inline parsing
  157. /**
  158. * @marker {
  159. */
  160. protected function parseSpecialAttributes($text)
  161. {
  162. if (preg_match("~$this->_specialAttributesRegex~", $text, $matches)) {
  163. return [['specialAttributes', $matches[1]], strlen($matches[0])];
  164. }
  165. return [['text', '{'], 1];
  166. }
  167. protected function renderSpecialAttributes($block)
  168. {
  169. return '{' . $block[1] . '}';
  170. }
  171. protected function parseInline($text)
  172. {
  173. $elements = parent::parseInline($text);
  174. // merge special attribute elements to links and images as they are not part of the final absy later
  175. $relatedElement = null;
  176. foreach($elements as $i => $element) {
  177. if ($element[0] === 'link' || $element[0] === 'image') {
  178. $relatedElement = $i;
  179. } elseif ($element[0] === 'specialAttributes') {
  180. if ($relatedElement !== null) {
  181. $elements[$relatedElement]['attributes'] = $element[1];
  182. unset($elements[$i]);
  183. }
  184. $relatedElement = null;
  185. } else {
  186. $relatedElement = null;
  187. }
  188. }
  189. return $elements;
  190. }
  191. protected function renderLink($block)
  192. {
  193. if (isset($block['refkey'])) {
  194. if (($ref = $this->lookupReference($block['refkey'])) !== false) {
  195. $block = array_merge($block, $ref);
  196. } else {
  197. return $block['orig'];
  198. }
  199. }
  200. $attributes = $this->renderAttributes($block);
  201. return '<a href="' . htmlspecialchars($block['url'], ENT_COMPAT | ENT_HTML401, 'UTF-8') . '"'
  202. . (empty($block['title']) ? '' : ' title="' . htmlspecialchars($block['title'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"')
  203. . $attributes . '>' . $this->renderAbsy($block['text']) . '</a>';
  204. }
  205. protected function renderImage($block)
  206. {
  207. if (isset($block['refkey'])) {
  208. if (($ref = $this->lookupReference($block['refkey'])) !== false) {
  209. $block = array_merge($block, $ref);
  210. } else {
  211. return $block['orig'];
  212. }
  213. }
  214. $attributes = $this->renderAttributes($block);
  215. return '<img src="' . htmlspecialchars($block['url'], ENT_COMPAT | ENT_HTML401, 'UTF-8') . '"'
  216. . ' alt="' . htmlspecialchars($block['text'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"'
  217. . (empty($block['title']) ? '' : ' title="' . htmlspecialchars($block['title'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"')
  218. . $attributes . ($this->html5 ? '>' : ' />');
  219. }
  220. }