def = new HTMLPurifier_AttrDef_Lang ();
// basic good uses
$this->assertDef ( 'en' );
$this->assertDef ( 'en-us' );
$this->assertDef ( ' en ', 'en' ); // trim
$this->assertDef ( 'EN', 'en' ); // case insensitivity
// (thanks Eugen Pankratz for noticing the typos!)
$this->assertDef ( 'En-Us-Edison', 'en-us-edison' ); // complex ci
$this->assertDef ( 'fr en', false ); // multiple languages
$this->assertDef ( '%', false ); // bad character
// test overlong language according to syntax
$this->assertDef ( 'thisistoolongsoitgetscut', false );
// primary subtag rules
// I'm somewhat hesitant to allow x and i as primary language codes,
// because they usually are never used in real life. However,
// theoretically speaking, having them alone is permissable, so
// I'll be lenient. No XML parser is going to complain anyway.
$this->assertDef ( 'x' );
$this->assertDef ( 'i' );
// real world use-cases
$this->assertDef ( 'x-klingon' );
$this->assertDef ( 'i-mingo' );
// because the RFC only defines two and three letter primary codes,
// anything with a length of four or greater is invalid, despite
// the syntax stipulation of 1 to 8 characters. Because the RFC
// specifically states that this reservation is in order to allow
// for future versions to expand, the adoption of a new RFC will
// require these test cases to be rewritten, even if backwards-
// compatibility is largely retained (i.e. this is not forwards
// compatible)
$this->assertDef ( 'four', false );
// for similar reasons, disallow any other one character language
$this->assertDef ( 'f', false );
// second subtag rules
// one letter subtags prohibited until revision. This is, however,
// less volatile than the restrictions on the primary subtags.
// Also note that this test-case tests fix-behavior: chop
// off subtags until you get a valid language code.
$this->assertDef ( 'en-a', 'en' );
// however, x is a reserved single-letter subtag that is allowed
$this->assertDef ( 'en-x', 'en-x' );
// 2-8 chars are permitted, but have special meaning that cannot
// be checked without maintaining country code lookup tables (for
// two characters) or special registration tables (for all above).
$this->assertDef ( 'en-uk', true );
// further subtag rules: only syntactic constraints
$this->assertDef ( 'en-us-edison' );
$this->assertDef ( 'en-us-toolonghaha', 'en-us' );
$this->assertDef ( 'en-us-a-silly-long-one' );
// rfc 3066 stipulates that if a three letter and a two letter code
// are available, the two letter one MUST be used. Without a language
// code lookup table, we cannot implement this functionality.
// although the HTML protocol, technically speaking, allows you to
// omit language tags, this implicitly means that the parent element's
// language is the one applicable, which, in some cases, is incorrect.
// Thus, we allow und, only slightly defying the RFC's SHOULD NOT
// designation.
$this->assertDef ( 'und' );
// because attributes only allow one language, mul is allowed, complying
// with the RFC's SHOULD NOT designation.
$this->assertDef ( 'mul' );
}
}
// vim: et sw=4 sts=4