You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

LangTest.php 3.6KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. <?php
  2. class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
  3. {
  4. public function test()
  5. {
  6. $this->def = new HTMLPurifier_AttrDef_Lang();
  7. // basic good uses
  8. $this->assertDef('en');
  9. $this->assertDef('en-us');
  10. $this->assertDef(' en ', 'en'); // trim
  11. $this->assertDef('EN', 'en'); // case insensitivity
  12. // (thanks Eugen Pankratz for noticing the typos!)
  13. $this->assertDef('En-Us-Edison', 'en-us-edison'); // complex ci
  14. $this->assertDef('fr en', false); // multiple languages
  15. $this->assertDef('%', false); // bad character
  16. // test overlong language according to syntax
  17. $this->assertDef('thisistoolongsoitgetscut', false);
  18. // primary subtag rules
  19. // I'm somewhat hesitant to allow x and i as primary language codes,
  20. // because they usually are never used in real life. However,
  21. // theoretically speaking, having them alone is permissable, so
  22. // I'll be lenient. No XML parser is going to complain anyway.
  23. $this->assertDef('x');
  24. $this->assertDef('i');
  25. // real world use-cases
  26. $this->assertDef('x-klingon');
  27. $this->assertDef('i-mingo');
  28. // because the RFC only defines two and three letter primary codes,
  29. // anything with a length of four or greater is invalid, despite
  30. // the syntax stipulation of 1 to 8 characters. Because the RFC
  31. // specifically states that this reservation is in order to allow
  32. // for future versions to expand, the adoption of a new RFC will
  33. // require these test cases to be rewritten, even if backwards-
  34. // compatibility is largely retained (i.e. this is not forwards
  35. // compatible)
  36. $this->assertDef('four', false);
  37. // for similar reasons, disallow any other one character language
  38. $this->assertDef('f', false);
  39. // second subtag rules
  40. // one letter subtags prohibited until revision. This is, however,
  41. // less volatile than the restrictions on the primary subtags.
  42. // Also note that this test-case tests fix-behavior: chop
  43. // off subtags until you get a valid language code.
  44. $this->assertDef('en-a', 'en');
  45. // however, x is a reserved single-letter subtag that is allowed
  46. $this->assertDef('en-x', 'en-x');
  47. // 2-8 chars are permitted, but have special meaning that cannot
  48. // be checked without maintaining country code lookup tables (for
  49. // two characters) or special registration tables (for all above).
  50. $this->assertDef('en-uk', true);
  51. // further subtag rules: only syntactic constraints
  52. $this->assertDef('en-us-edison');
  53. $this->assertDef('en-us-toolonghaha', 'en-us');
  54. $this->assertDef('en-us-a-silly-long-one');
  55. // rfc 3066 stipulates that if a three letter and a two letter code
  56. // are available, the two letter one MUST be used. Without a language
  57. // code lookup table, we cannot implement this functionality.
  58. // although the HTML protocol, technically speaking, allows you to
  59. // omit language tags, this implicitly means that the parent element's
  60. // language is the one applicable, which, in some cases, is incorrect.
  61. // Thus, we allow und, only slightly defying the RFC's SHOULD NOT
  62. // designation.
  63. $this->assertDef('und');
  64. // because attributes only allow one language, mul is allowed, complying
  65. // with the RFC's SHOULD NOT designation.
  66. $this->assertDef('mul');
  67. }
  68. }
  69. // vim: et sw=4 sts=4