You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

139 lines
4.9KB

  1. <?php
  2. /**
  3. * Base class for all validating attribute definitions.
  4. *
  5. * This family of classes forms the core for not only HTML attribute validation,
  6. * but also any sort of string that needs to be validated or cleaned (which
  7. * means CSS properties and composite definitions are defined here too).
  8. * Besides defining (through code) what precisely makes the string valid,
  9. * subclasses are also responsible for cleaning the code if possible.
  10. */
  11. abstract class HTMLPurifier_AttrDef
  12. {
  13. /**
  14. * Tells us whether or not an HTML attribute is minimized.
  15. * Has no meaning in other contexts.
  16. * @type bool
  17. */
  18. public $minimized = false;
  19. /**
  20. * Tells us whether or not an HTML attribute is required.
  21. * Has no meaning in other contexts
  22. * @type bool
  23. */
  24. public $required = false;
  25. /**
  26. * Validates and cleans passed string according to a definition.
  27. *
  28. * @param string $string String to be validated and cleaned.
  29. * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
  30. * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
  31. */
  32. abstract public function validate($string, $config, $context);
  33. /**
  34. * Convenience method that parses a string as if it were CDATA.
  35. *
  36. * This method process a string in the manner specified at
  37. * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
  38. * leading and trailing whitespace, ignoring line feeds, and replacing
  39. * carriage returns and tabs with spaces. While most useful for HTML
  40. * attributes specified as CDATA, it can also be applied to most CSS
  41. * values.
  42. *
  43. * @note This method is not entirely standards compliant, as trim() removes
  44. * more types of whitespace than specified in the spec. In practice,
  45. * this is rarely a problem, as those extra characters usually have
  46. * already been removed by HTMLPurifier_Encoder.
  47. *
  48. * @warning This processing is inconsistent with XML's whitespace handling
  49. * as specified by section 3.3.3 and referenced XHTML 1.0 section
  50. * 4.7. However, note that we are NOT necessarily
  51. * parsing XML, thus, this behavior may still be correct. We
  52. * assume that newlines have been normalized.
  53. */
  54. public function parseCDATA($string)
  55. {
  56. $string = trim($string);
  57. $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
  58. return $string;
  59. }
  60. /**
  61. * Factory method for creating this class from a string.
  62. * @param string $string String construction info
  63. * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
  64. */
  65. public function make($string)
  66. {
  67. // default implementation, return a flyweight of this object.
  68. // If $string has an effect on the returned object (i.e. you
  69. // need to overload this method), it is best
  70. // to clone or instantiate new copies. (Instantiation is safer.)
  71. return $this;
  72. }
  73. /**
  74. * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
  75. * properly. THIS IS A HACK!
  76. * @param string $string a CSS colour definition
  77. * @return string
  78. */
  79. protected function mungeRgb($string)
  80. {
  81. return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
  82. }
  83. /**
  84. * Parses a possibly escaped CSS string and returns the "pure"
  85. * version of it.
  86. */
  87. protected function expandCSSEscape($string)
  88. {
  89. // flexibly parse it
  90. $ret = '';
  91. for ($i = 0, $c = strlen($string); $i < $c; $i++) {
  92. if ($string[$i] === '\\') {
  93. $i++;
  94. if ($i >= $c) {
  95. $ret .= '\\';
  96. break;
  97. }
  98. if (ctype_xdigit($string[$i])) {
  99. $code = $string[$i];
  100. for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
  101. if (!ctype_xdigit($string[$i])) {
  102. break;
  103. }
  104. $code .= $string[$i];
  105. }
  106. // We have to be extremely careful when adding
  107. // new characters, to make sure we're not breaking
  108. // the encoding.
  109. $char = HTMLPurifier_Encoder::unichr(hexdec($code));
  110. if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
  111. continue;
  112. }
  113. $ret .= $char;
  114. if ($i < $c && trim($string[$i]) !== '') {
  115. $i--;
  116. }
  117. continue;
  118. }
  119. if ($string[$i] === "\n") {
  120. continue;
  121. }
  122. }
  123. $ret .= $string[$i];
  124. }
  125. return $ret;
  126. }
  127. }
  128. // vim: et sw=4 sts=4