390 lines
9.4KB

  1. <?php
  2. /**
  3. * @copyright Copyright (c) 2014 Carsten Brandt
  4. * @license https://github.com/cebe/markdown/blob/master/LICENSE
  5. * @link https://github.com/cebe/markdown#readme
  6. */
  7. namespace cebe\markdown;
  8. use ReflectionMethod;
  9. /**
  10. * A generic parser for markdown-like languages.
  11. *
  12. * @author Carsten Brandt <mail@cebe.cc>
  13. */
  14. abstract class Parser
  15. {
  16. /**
  17. * @var integer the maximum nesting level for language elements.
  18. */
  19. public $maximumNestingLevel = 32;
  20. /**
  21. * @var string the current context the parser is in.
  22. * TODO remove in favor of absy
  23. */
  24. protected $context = [];
  25. /**
  26. * @var array these are "escapeable" characters. When using one of these prefixed with a
  27. * backslash, the character will be outputted without the backslash and is not interpreted
  28. * as markdown.
  29. */
  30. protected $escapeCharacters = [
  31. '\\', // backslash
  32. ];
  33. private $_depth = 0;
  34. /**
  35. * Parses the given text considering the full language.
  36. *
  37. * This includes parsing block elements as well as inline elements.
  38. *
  39. * @param string $text the text to parse
  40. * @return string parsed markup
  41. */
  42. public function parse($text)
  43. {
  44. $this->prepare();
  45. if (ltrim($text) === '') {
  46. return '';
  47. }
  48. $text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
  49. $this->prepareMarkers($text);
  50. $absy = $this->parseBlocks(explode("\n", $text));
  51. $markup = $this->renderAbsy($absy);
  52. $this->cleanup();
  53. return $markup;
  54. }
  55. /**
  56. * Parses a paragraph without block elements (block elements are ignored).
  57. *
  58. * @param string $text the text to parse
  59. * @return string parsed markup
  60. */
  61. public function parseParagraph($text)
  62. {
  63. $this->prepare();
  64. if (ltrim($text) === '') {
  65. return '';
  66. }
  67. $text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
  68. $this->prepareMarkers($text);
  69. $absy = $this->parseInline($text);
  70. $markup = $this->renderAbsy($absy);
  71. $this->cleanup();
  72. return $markup;
  73. }
  74. /**
  75. * This method will be called before `parse()` and `parseParagraph()`.
  76. * You can override it to do some initialization work.
  77. */
  78. protected function prepare()
  79. {
  80. }
  81. /**
  82. * This method will be called after `parse()` and `parseParagraph()`.
  83. * You can override it to do cleanup.
  84. */
  85. protected function cleanup()
  86. {
  87. }
  88. // block parsing
  89. private $_blockTypes;
  90. /**
  91. * @return array a list of block element types available.
  92. */
  93. protected function blockTypes()
  94. {
  95. if ($this->_blockTypes === null) {
  96. // detect block types via "identify" functions
  97. $reflection = new \ReflectionClass($this);
  98. $this->_blockTypes = array_filter(array_map(function($method) {
  99. $name = $method->getName();
  100. return strncmp($name, 'identify', 8) === 0 ? strtolower(substr($name, 8)) : false;
  101. }, $reflection->getMethods(ReflectionMethod::IS_PROTECTED)));
  102. sort($this->_blockTypes);
  103. }
  104. return $this->_blockTypes;
  105. }
  106. /**
  107. * Given a set of lines and an index of a current line it uses the registed block types to
  108. * detect the type of this line.
  109. * @param array $lines
  110. * @param integer $current
  111. * @return string name of the block type in lower case
  112. */
  113. protected function detectLineType($lines, $current)
  114. {
  115. $line = $lines[$current];
  116. $blockTypes = $this->blockTypes();
  117. foreach($blockTypes as $blockType) {
  118. if ($this->{'identify' . $blockType}($line, $lines, $current)) {
  119. return $blockType;
  120. }
  121. }
  122. // consider the line a normal paragraph if no other block type matches
  123. return 'paragraph';
  124. }
  125. /**
  126. * Parse block elements by calling `detectLineType()` to identify them
  127. * and call consume function afterwards.
  128. */
  129. protected function parseBlocks($lines)
  130. {
  131. if ($this->_depth >= $this->maximumNestingLevel) {
  132. // maximum depth is reached, do not parse input
  133. return [['text', implode("\n", $lines)]];
  134. }
  135. $this->_depth++;
  136. $blocks = [];
  137. // convert lines to blocks
  138. for ($i = 0, $count = count($lines); $i < $count; $i++) {
  139. $line = $lines[$i];
  140. if ($line !== '' && rtrim($line) !== '') { // skip empty lines
  141. // identify a blocks beginning and parse the content
  142. list($block, $i) = $this->parseBlock($lines, $i);
  143. if ($block !== false) {
  144. $blocks[] = $block;
  145. }
  146. }
  147. }
  148. $this->_depth--;
  149. return $blocks;
  150. }
  151. /**
  152. * Parses the block at current line by identifying the block type and parsing the content
  153. * @param $lines
  154. * @param $current
  155. * @return array Array of two elements, the first element contains the block,
  156. * the second contains the next line index to be parsed.
  157. */
  158. protected function parseBlock($lines, $current)
  159. {
  160. // identify block type for this line
  161. $blockType = $this->detectLineType($lines, $current);
  162. // call consume method for the detected block type to consume further lines
  163. return $this->{'consume' . $blockType}($lines, $current);
  164. }
  165. protected function renderAbsy($blocks)
  166. {
  167. $output = '';
  168. foreach ($blocks as $block) {
  169. array_unshift($this->context, $block[0]);
  170. $output .= $this->{'render' . $block[0]}($block);
  171. array_shift($this->context);
  172. }
  173. return $output;
  174. }
  175. /**
  176. * Consume lines for a paragraph
  177. *
  178. * @param $lines
  179. * @param $current
  180. * @return array
  181. */
  182. protected function consumeParagraph($lines, $current)
  183. {
  184. // consume until newline
  185. $content = [];
  186. for ($i = $current, $count = count($lines); $i < $count; $i++) {
  187. if (ltrim($lines[$i]) !== '') {
  188. $content[] = $lines[$i];
  189. } else {
  190. break;
  191. }
  192. }
  193. $block = [
  194. 'paragraph',
  195. 'content' => $this->parseInline(implode("\n", $content)),
  196. ];
  197. return [$block, --$i];
  198. }
  199. /**
  200. * Render a paragraph block
  201. *
  202. * @param $block
  203. * @return string
  204. */
  205. protected function renderParagraph($block)
  206. {
  207. return '<p>' . $this->renderAbsy($block['content']) . "</p>\n";
  208. }
  209. // inline parsing
  210. /**
  211. * @var array the set of inline markers to use in different contexts.
  212. */
  213. private $_inlineMarkers = [];
  214. /**
  215. * Returns a map of inline markers to the corresponding parser methods.
  216. *
  217. * This array defines handler methods for inline markdown markers.
  218. * When a marker is found in the text, the handler method is called with the text
  219. * starting at the position of the marker.
  220. *
  221. * Note that markers starting with whitespace may slow down the parser,
  222. * you may want to use [[renderText]] to deal with them.
  223. *
  224. * You may override this method to define a set of markers and parsing methods.
  225. * The default implementation looks for protected methods starting with `parse` that
  226. * also have an `@marker` annotation in PHPDoc.
  227. *
  228. * @return array a map of markers to parser methods
  229. */
  230. protected function inlineMarkers()
  231. {
  232. $markers = [];
  233. // detect "parse" functions
  234. $reflection = new \ReflectionClass($this);
  235. foreach($reflection->getMethods(ReflectionMethod::IS_PROTECTED) as $method) {
  236. $methodName = $method->getName();
  237. if (strncmp($methodName, 'parse', 5) === 0) {
  238. preg_match_all('/@marker ([^\s]+)/', $method->getDocComment(), $matches);
  239. foreach($matches[1] as $match) {
  240. $markers[$match] = $methodName;
  241. }
  242. }
  243. }
  244. return $markers;
  245. }
  246. /**
  247. * Prepare markers that are used in the text to parse
  248. *
  249. * Add all markers that are present in markdown.
  250. * Check is done to avoid iterations in parseInline(), good for huge markdown files
  251. * @param string $text
  252. */
  253. protected function prepareMarkers($text)
  254. {
  255. $this->_inlineMarkers = [];
  256. foreach ($this->inlineMarkers() as $marker => $method) {
  257. if (strpos($text, $marker) !== false) {
  258. $m = $marker[0];
  259. // put the longest marker first
  260. if (isset($this->_inlineMarkers[$m])) {
  261. reset($this->_inlineMarkers[$m]);
  262. if (strlen($marker) > strlen(key($this->_inlineMarkers[$m]))) {
  263. $this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]);
  264. continue;
  265. }
  266. }
  267. $this->_inlineMarkers[$m][$marker] = $method;
  268. }
  269. }
  270. }
  271. /**
  272. * Parses inline elements of the language.
  273. *
  274. * @param string $text the inline text to parse.
  275. * @return array
  276. */
  277. protected function parseInline($text)
  278. {
  279. if ($this->_depth >= $this->maximumNestingLevel) {
  280. // maximum depth is reached, do not parse input
  281. return [['text', $text]];
  282. }
  283. $this->_depth++;
  284. $markers = implode('', array_keys($this->_inlineMarkers));
  285. $paragraph = [];
  286. while (!empty($markers) && ($found = strpbrk($text, $markers)) !== false) {
  287. $pos = strpos($text, $found);
  288. // add the text up to next marker to the paragraph
  289. if ($pos !== 0) {
  290. $paragraph[] = ['text', substr($text, 0, $pos)];
  291. }
  292. $text = $found;
  293. $parsed = false;
  294. foreach ($this->_inlineMarkers[$text[0]] as $marker => $method) {
  295. if (strncmp($text, $marker, strlen($marker)) === 0) {
  296. // parse the marker
  297. array_unshift($this->context, $method);
  298. list($output, $offset) = $this->$method($text);
  299. array_shift($this->context);
  300. $paragraph[] = $output;
  301. $text = substr($text, $offset);
  302. $parsed = true;
  303. break;
  304. }
  305. }
  306. if (!$parsed) {
  307. $paragraph[] = ['text', substr($text, 0, 1)];
  308. $text = substr($text, 1);
  309. }
  310. }
  311. $paragraph[] = ['text', $text];
  312. $this->_depth--;
  313. return $paragraph;
  314. }
  315. /**
  316. * Parses escaped special characters.
  317. * @marker \
  318. */
  319. protected function parseEscape($text)
  320. {
  321. if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) {
  322. return [['text', $text[1]], 2];
  323. }
  324. return [['text', $text[0]], 1];
  325. }
  326. /**
  327. * This function renders plain text sections in the markdown text.
  328. * It can be used to work on normal text sections for example to highlight keywords or
  329. * do special escaping.
  330. */
  331. protected function renderText($block)
  332. {
  333. return $block[1];
  334. }
  335. }