|
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- <?php
-
- class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
- {
-
- public function test()
- {
- $this->def = new HTMLPurifier_AttrDef_Lang();
-
- // basic good uses
- $this->assertDef('en');
- $this->assertDef('en-us');
-
- $this->assertDef(' en ', 'en'); // trim
- $this->assertDef('EN', 'en'); // case insensitivity
-
- // (thanks Eugen Pankratz for noticing the typos!)
- $this->assertDef('En-Us-Edison', 'en-us-edison'); // complex ci
-
- $this->assertDef('fr en', false); // multiple languages
- $this->assertDef('%', false); // bad character
-
- // test overlong language according to syntax
- $this->assertDef('thisistoolongsoitgetscut', false);
-
- // primary subtag rules
- // I'm somewhat hesitant to allow x and i as primary language codes,
- // because they usually are never used in real life. However,
- // theoretically speaking, having them alone is permissable, so
- // I'll be lenient. No XML parser is going to complain anyway.
- $this->assertDef('x');
- $this->assertDef('i');
- // real world use-cases
- $this->assertDef('x-klingon');
- $this->assertDef('i-mingo');
- // because the RFC only defines two and three letter primary codes,
- // anything with a length of four or greater is invalid, despite
- // the syntax stipulation of 1 to 8 characters. Because the RFC
- // specifically states that this reservation is in order to allow
- // for future versions to expand, the adoption of a new RFC will
- // require these test cases to be rewritten, even if backwards-
- // compatibility is largely retained (i.e. this is not forwards
- // compatible)
- $this->assertDef('four', false);
- // for similar reasons, disallow any other one character language
- $this->assertDef('f', false);
-
- // second subtag rules
- // one letter subtags prohibited until revision. This is, however,
- // less volatile than the restrictions on the primary subtags.
- // Also note that this test-case tests fix-behavior: chop
- // off subtags until you get a valid language code.
- $this->assertDef('en-a', 'en');
- // however, x is a reserved single-letter subtag that is allowed
- $this->assertDef('en-x', 'en-x');
- // 2-8 chars are permitted, but have special meaning that cannot
- // be checked without maintaining country code lookup tables (for
- // two characters) or special registration tables (for all above).
- $this->assertDef('en-uk', true);
-
- // further subtag rules: only syntactic constraints
- $this->assertDef('en-us-edison');
- $this->assertDef('en-us-toolonghaha', 'en-us');
- $this->assertDef('en-us-a-silly-long-one');
-
- // rfc 3066 stipulates that if a three letter and a two letter code
- // are available, the two letter one MUST be used. Without a language
- // code lookup table, we cannot implement this functionality.
-
- // although the HTML protocol, technically speaking, allows you to
- // omit language tags, this implicitly means that the parent element's
- // language is the one applicable, which, in some cases, is incorrect.
- // Thus, we allow und, only slightly defying the RFC's SHOULD NOT
- // designation.
- $this->assertDef('und');
-
- // because attributes only allow one language, mul is allowed, complying
- // with the RFC's SHOULD NOT designation.
- $this->assertDef('mul');
-
- }
-
- }
-
- // vim: et sw=4 sts=4
|