<?php
class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness {
	public function test() {
		$this->def = new HTMLPurifier_AttrDef_Lang ();
		
		// basic good uses
		$this->assertDef ( 'en' );
		$this->assertDef ( 'en-us' );
		
		$this->assertDef ( ' en ', 'en' ); // trim
		$this->assertDef ( 'EN', 'en' ); // case insensitivity
		                              
		// (thanks Eugen Pankratz for noticing the typos!)
		$this->assertDef ( 'En-Us-Edison', 'en-us-edison' ); // complex ci
		
		$this->assertDef ( 'fr en', false ); // multiple languages
		$this->assertDef ( '%', false ); // bad character
		                              
		// test overlong language according to syntax
		$this->assertDef ( 'thisistoolongsoitgetscut', false );
		
		// primary subtag rules
		// I'm somewhat hesitant to allow x and i as primary language codes,
		// because they usually are never used in real life. However,
		// theoretically speaking, having them alone is permissable, so
		// I'll be lenient. No XML parser is going to complain anyway.
		$this->assertDef ( 'x' );
		$this->assertDef ( 'i' );
		// real world use-cases
		$this->assertDef ( 'x-klingon' );
		$this->assertDef ( 'i-mingo' );
		// because the RFC only defines two and three letter primary codes,
		// anything with a length of four or greater is invalid, despite
		// the syntax stipulation of 1 to 8 characters. Because the RFC
		// specifically states that this reservation is in order to allow
		// for future versions to expand, the adoption of a new RFC will
		// require these test cases to be rewritten, even if backwards-
		// compatibility is largely retained (i.e. this is not forwards
		// compatible)
		$this->assertDef ( 'four', false );
		// for similar reasons, disallow any other one character language
		$this->assertDef ( 'f', false );
		
		// second subtag rules
		// one letter subtags prohibited until revision. This is, however,
		// less volatile than the restrictions on the primary subtags.
		// Also note that this test-case tests fix-behavior: chop
		// off subtags until you get a valid language code.
		$this->assertDef ( 'en-a', 'en' );
		// however, x is a reserved single-letter subtag that is allowed
		$this->assertDef ( 'en-x', 'en-x' );
		// 2-8 chars are permitted, but have special meaning that cannot
		// be checked without maintaining country code lookup tables (for
		// two characters) or special registration tables (for all above).
		$this->assertDef ( 'en-uk', true );
		
		// further subtag rules: only syntactic constraints
		$this->assertDef ( 'en-us-edison' );
		$this->assertDef ( 'en-us-toolonghaha', 'en-us' );
		$this->assertDef ( 'en-us-a-silly-long-one' );
		
		// rfc 3066 stipulates that if a three letter and a two letter code
		// are available, the two letter one MUST be used. Without a language
		// code lookup table, we cannot implement this functionality.
		
		// although the HTML protocol, technically speaking, allows you to
		// omit language tags, this implicitly means that the parent element's
		// language is the one applicable, which, in some cases, is incorrect.
		// Thus, we allow und, only slightly defying the RFC's SHOULD NOT
		// designation.
		$this->assertDef ( 'und' );
		
		// because attributes only allow one language, mul is allowed, complying
		// with the RFC's SHOULD NOT designation.
		$this->assertDef ( 'mul' );
	}
}

// vim: et sw=4 sts=4