*/ abstract class Parser { /** * @var integer the maximum nesting level for language elements. */ public $maximumNestingLevel = 32; /** * @var string the current context the parser is in. * TODO remove in favor of absy */ protected $context = []; /** * @var array these are "escapeable" characters. When using one of these prefixed with a * backslash, the character will be outputted without the backslash and is not interpreted * as markdown. */ protected $escapeCharacters = [ '\\', // backslash ]; private $_depth = 0; /** * Parses the given text considering the full language. * * This includes parsing block elements as well as inline elements. * * @param string $text the text to parse * @return string parsed markup */ public function parse($text) { $this->prepare(); if (ltrim($text) === '') { return ''; } $text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text); $this->prepareMarkers($text); $absy = $this->parseBlocks(explode("\n", $text)); $markup = $this->renderAbsy($absy); $this->cleanup(); return $markup; } /** * Parses a paragraph without block elements (block elements are ignored). * * @param string $text the text to parse * @return string parsed markup */ public function parseParagraph($text) { $this->prepare(); if (ltrim($text) === '') { return ''; } $text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text); $this->prepareMarkers($text); $absy = $this->parseInline($text); $markup = $this->renderAbsy($absy); $this->cleanup(); return $markup; } /** * This method will be called before `parse()` and `parseParagraph()`. * You can override it to do some initialization work. */ protected function prepare() { } /** * This method will be called after `parse()` and `parseParagraph()`. * You can override it to do cleanup. */ protected function cleanup() { } // block parsing private $_blockTypes; /** * @return array a list of block element types available. */ protected function blockTypes() { if ($this->_blockTypes === null) { // detect block types via "identify" functions $reflection = new \ReflectionClass($this); $this->_blockTypes = array_filter(array_map(function($method) { $name = $method->getName(); return strncmp($name, 'identify', 8) === 0 ? strtolower(substr($name, 8)) : false; }, $reflection->getMethods(ReflectionMethod::IS_PROTECTED))); sort($this->_blockTypes); } return $this->_blockTypes; } /** * Given a set of lines and an index of a current line it uses the registed block types to * detect the type of this line. * @param array $lines * @param integer $current * @return string name of the block type in lower case */ protected function detectLineType($lines, $current) { $line = $lines[$current]; $blockTypes = $this->blockTypes(); foreach($blockTypes as $blockType) { if ($this->{'identify' . $blockType}($line, $lines, $current)) { return $blockType; } } // consider the line a normal paragraph if no other block type matches return 'paragraph'; } /** * Parse block elements by calling `detectLineType()` to identify them * and call consume function afterwards. */ protected function parseBlocks($lines) { if ($this->_depth >= $this->maximumNestingLevel) { // maximum depth is reached, do not parse input return [['text', implode("\n", $lines)]]; } $this->_depth++; $blocks = []; // convert lines to blocks for ($i = 0, $count = count($lines); $i < $count; $i++) { $line = $lines[$i]; if ($line !== '' && rtrim($line) !== '') { // skip empty lines // identify a blocks beginning and parse the content list($block, $i) = $this->parseBlock($lines, $i); if ($block !== false) { $blocks[] = $block; } } } $this->_depth--; return $blocks; } /** * Parses the block at current line by identifying the block type and parsing the content * @param $lines * @param $current * @return array Array of two elements, the first element contains the block, * the second contains the next line index to be parsed. */ protected function parseBlock($lines, $current) { // identify block type for this line $blockType = $this->detectLineType($lines, $current); // call consume method for the detected block type to consume further lines return $this->{'consume' . $blockType}($lines, $current); } protected function renderAbsy($blocks) { $output = ''; foreach ($blocks as $block) { array_unshift($this->context, $block[0]); $output .= $this->{'render' . $block[0]}($block); array_shift($this->context); } return $output; } /** * Consume lines for a paragraph * * @param $lines * @param $current * @return array */ protected function consumeParagraph($lines, $current) { // consume until newline $content = []; for ($i = $current, $count = count($lines); $i < $count; $i++) { if (ltrim($lines[$i]) !== '') { $content[] = $lines[$i]; } else { break; } } $block = [ 'paragraph', 'content' => $this->parseInline(implode("\n", $content)), ]; return [$block, --$i]; } /** * Render a paragraph block * * @param $block * @return string */ protected function renderParagraph($block) { return '
' . $this->renderAbsy($block['content']) . "
\n"; } // inline parsing /** * @var array the set of inline markers to use in different contexts. */ private $_inlineMarkers = []; /** * Returns a map of inline markers to the corresponding parser methods. * * This array defines handler methods for inline markdown markers. * When a marker is found in the text, the handler method is called with the text * starting at the position of the marker. * * Note that markers starting with whitespace may slow down the parser, * you may want to use [[renderText]] to deal with them. * * You may override this method to define a set of markers and parsing methods. * The default implementation looks for protected methods starting with `parse` that * also have an `@marker` annotation in PHPDoc. * * @return array a map of markers to parser methods */ protected function inlineMarkers() { $markers = []; // detect "parse" functions $reflection = new \ReflectionClass($this); foreach($reflection->getMethods(ReflectionMethod::IS_PROTECTED) as $method) { $methodName = $method->getName(); if (strncmp($methodName, 'parse', 5) === 0) { preg_match_all('/@marker ([^\s]+)/', $method->getDocComment(), $matches); foreach($matches[1] as $match) { $markers[$match] = $methodName; } } } return $markers; } /** * Prepare markers that are used in the text to parse * * Add all markers that are present in markdown. * Check is done to avoid iterations in parseInline(), good for huge markdown files * @param string $text */ protected function prepareMarkers($text) { $this->_inlineMarkers = []; foreach ($this->inlineMarkers() as $marker => $method) { if (strpos($text, $marker) !== false) { $m = $marker[0]; // put the longest marker first if (isset($this->_inlineMarkers[$m])) { reset($this->_inlineMarkers[$m]); if (strlen($marker) > strlen(key($this->_inlineMarkers[$m]))) { $this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]); continue; } } $this->_inlineMarkers[$m][$marker] = $method; } } } /** * Parses inline elements of the language. * * @param string $text the inline text to parse. * @return array */ protected function parseInline($text) { if ($this->_depth >= $this->maximumNestingLevel) { // maximum depth is reached, do not parse input return [['text', $text]]; } $this->_depth++; $markers = implode('', array_keys($this->_inlineMarkers)); $paragraph = []; while (!empty($markers) && ($found = strpbrk($text, $markers)) !== false) { $pos = strpos($text, $found); // add the text up to next marker to the paragraph if ($pos !== 0) { $paragraph[] = ['text', substr($text, 0, $pos)]; } $text = $found; $parsed = false; foreach ($this->_inlineMarkers[$text[0]] as $marker => $method) { if (strncmp($text, $marker, strlen($marker)) === 0) { // parse the marker array_unshift($this->context, $method); list($output, $offset) = $this->$method($text); array_shift($this->context); $paragraph[] = $output; $text = substr($text, $offset); $parsed = true; break; } } if (!$parsed) { $paragraph[] = ['text', substr($text, 0, 1)]; $text = substr($text, 1); } } $paragraph[] = ['text', $text]; $this->_depth--; return $paragraph; } /** * Parses escaped special characters. * @marker \ */ protected function parseEscape($text) { if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) { return [['text', $text[1]], 2]; } return [['text', $text[0]], 1]; } /** * This function renders plain text sections in the markdown text. * It can be used to work on normal text sections for example to highlight keywords or * do special escaping. */ protected function renderText($block) { return $block[1]; } }