103 lines
3.8KB

  1. --- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2008-07-07 09:12:12.000000000 -0400
  2. +++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2008-12-06 02:29:34.988800000 -0500
  3. @@ -65,7 +65,7 @@
  4. public function __construct($data) {
  5. $data = str_replace("\r\n", "\n", $data);
  6. - $date = str_replace("\r", null, $data);
  7. + $data = str_replace("\r", null, $data);
  8. $this->data = $data;
  9. $this->char = -1;
  10. @@ -211,7 +211,10 @@
  11. // If nothing is returned, emit a U+0026 AMPERSAND character token.
  12. // Otherwise, emit the character token that was returned.
  13. $char = (!$entity) ? '&' : $entity;
  14. - $this->emitToken($char);
  15. + $this->emitToken(array(
  16. + 'type' => self::CHARACTR,
  17. + 'data' => $char
  18. + ));
  19. // Finally, switch to the data state.
  20. $this->state = 'data';
  21. @@ -708,7 +711,7 @@
  22. } elseif($char === '&') {
  23. /* U+0026 AMPERSAND (&)
  24. Switch to the entity in attribute value state. */
  25. - $this->entityInAttributeValueState('non');
  26. + $this->entityInAttributeValueState();
  27. } elseif($char === '>') {
  28. /* U+003E GREATER-THAN SIGN (>)
  29. @@ -738,7 +741,8 @@
  30. ? '&'
  31. : $entity;
  32. - $this->emitToken($char);
  33. + $last = count($this->token['attr']) - 1;
  34. + $this->token['attr'][$last]['value'] .= $char;
  35. }
  36. private function bogusCommentState() {
  37. @@ -1066,6 +1070,11 @@
  38. $this->char++;
  39. if(in_array($id, $this->entities)) {
  40. + if ($e_name[$c-1] !== ';') {
  41. + if ($c < $len && $e_name[$c] == ';') {
  42. + $this->char++; // consume extra semicolon
  43. + }
  44. + }
  45. $entity = $id;
  46. break;
  47. }
  48. @@ -2084,7 +2093,7 @@
  49. /* Reconstruct the active formatting elements, if any. */
  50. $this->reconstructActiveFormattingElements();
  51. - $this->insertElement($token);
  52. + $this->insertElement($token, true, true);
  53. break;
  54. }
  55. break;
  56. @@ -3465,7 +3474,18 @@
  57. }
  58. }
  59. - private function insertElement($token, $append = true) {
  60. + private function insertElement($token, $append = true, $check = false) {
  61. + // Proprietary workaround for libxml2's limitations with tag names
  62. + if ($check) {
  63. + // Slightly modified HTML5 tag-name modification,
  64. + // removing anything that's not an ASCII letter, digit, or hyphen
  65. + $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
  66. + // Remove leading hyphens and numbers
  67. + $token['name'] = ltrim($token['name'], '-0..9');
  68. + // In theory, this should ever be needed, but just in case
  69. + if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
  70. + }
  71. +
  72. $el = $this->dom->createElement($token['name']);
  73. foreach($token['attr'] as $attr) {
  74. @@ -3659,7 +3679,7 @@
  75. }
  76. }
  77. - private function generateImpliedEndTags(array $exclude = array()) {
  78. + private function generateImpliedEndTags($exclude = array()) {
  79. /* When the steps below require the UA to generate implied end tags,
  80. then, if the current node is a dd element, a dt element, an li element,
  81. a p element, a td element, a th element, or a tr element, the UA must
  82. @@ -3673,7 +3693,8 @@
  83. }
  84. }
  85. - private function getElementCategory($name) {
  86. + private function getElementCategory($node) {
  87. + $name = $node->tagName;
  88. if(in_array($name, $this->special))
  89. return self::SPECIAL;