|
-
-
- @@ -65,7 +65,7 @@
-
- public function __construct($data) {
- $data = str_replace("\r\n", "\n", $data);
- - $date = str_replace("\r", null, $data);
- + $data = str_replace("\r", null, $data);
-
- $this->data = $data;
- $this->char = -1;
- @@ -211,7 +211,10 @@
- // If nothing is returned, emit a U+0026 AMPERSAND character token.
- // Otherwise, emit the character token that was returned.
- $char = (!$entity) ? '&' : $entity;
- - $this->emitToken($char);
- + $this->emitToken(array(
- + 'type' => self::CHARACTR,
- + 'data' => $char
- + ));
-
- // Finally, switch to the data state.
- $this->state = 'data';
- @@ -708,7 +711,7 @@
- } elseif($char
- /* U+0026 AMPERSAND (&)
- Switch to the entity in attribute value state. */
- - $this->entityInAttributeValueState('non');
- + $this->entityInAttributeValueState();
-
- } elseif($char
- /* U+003E GREATER-THAN SIGN (>)
- @@ -738,7 +741,8 @@
- ? '&'
- : $entity;
-
- - $this->emitToken($char);
- + $last = count($this->token['attr']) - 1;
- + $this->token['attr'][$last]['value'] .= $char;
- }
-
- private function bogusCommentState() {
- @@ -1066,6 +1070,11 @@
- $this->char++;
-
- if(in_array($id, $this->entities)) {
- + if ($e_name[$c-1] !== ';') {
- + if ($c < $len && $e_name[$c] == ';') {
- + $this->char++; // consume extra semicolon
- + }
- + }
- $entity = $id;
- break;
- }
- @@ -2084,7 +2093,7 @@
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
-
- - $this->insertElement($token);
- + $this->insertElement($token, true, true);
- break;
- }
- break;
- @@ -3465,7 +3474,18 @@
- }
- }
-
- - private function insertElement($token, $append = true) {
- + private function insertElement($token, $append = true, $check = false) {
- + // Proprietary workaround for libxml2's limitations with tag names
- + if ($check) {
- + // Slightly modified HTML5 tag-name modification,
- + // removing anything that's not an ASCII letter, digit, or hyphen
- + $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
- + // Remove leading hyphens and numbers
- + $token['name'] = ltrim($token['name'], '-0..9');
- + // In theory, this should ever be needed, but just in case
- + if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
- + }
- +
- $el = $this->dom->createElement($token['name']);
-
- foreach($token['attr'] as $attr) {
- @@ -3659,7 +3679,7 @@
- }
- }
-
- - private function generateImpliedEndTags(array $exclude = array()) {
- + private function generateImpliedEndTags($exclude = array()) {
- /* When the steps below require the UA to generate implied end tags,
- then, if the current node is a dd element, a dt element, an li element,
- a p element, a td element, a th element, or a tr element, the UA must
- @@ -3673,7 +3693,8 @@
- }
- }
-
- - private function getElementCategory($name) {
- + private function getElementCategory($node) {
- + $name = $node->tagName;
- if(in_array($name, $this->special))
- return self::SPECIAL;
-
|