attrTypes = new HTMLPurifier_AttrTypes (); $this->doctypes = new HTMLPurifier_DoctypeRegistry (); // setup basic modules $common = array ( 'CommonAttributes', 'Text', 'Hypertext', 'List', 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute', // Unsafe: 'Scripting', 'Object', 'Forms', // Sorta legacy, but present in strict: 'Name' ); $transitional = array ( 'Legacy', 'Target', 'Iframe' ); $xml = array ( 'XMLCommonAttributes' ); $non_xml = array ( 'NonXMLCommonAttributes' ); // setup basic doctypes $this->doctypes->register ( 'HTML 4.01 Transitional', false, array_merge ( $common, $transitional, $non_xml ), array ( 'Tidy_Transitional', 'Tidy_Proprietary' ), array (), '-//W3C//DTD HTML 4.01 Transitional//EN', 'http://www.w3.org/TR/html4/loose.dtd' ); $this->doctypes->register ( 'HTML 4.01 Strict', false, array_merge ( $common, $non_xml ), array ( 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name' ), array (), '-//W3C//DTD HTML 4.01//EN', 'http://www.w3.org/TR/html4/strict.dtd' ); $this->doctypes->register ( 'XHTML 1.0 Transitional', true, array_merge ( $common, $transitional, $xml, $non_xml ), array ( 'Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name' ), array (), '-//W3C//DTD XHTML 1.0 Transitional//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' ); $this->doctypes->register ( 'XHTML 1.0 Strict', true, array_merge ( $common, $xml, $non_xml ), array ( 'Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name' ), array (), '-//W3C//DTD XHTML 1.0 Strict//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' ); $this->doctypes->register ( 'XHTML 1.1', true, // Iframe is a real XHTML 1.1 module, despite being // "transitional"! array_merge ( $common, $xml, array ( 'Ruby', 'Iframe' ) ), array ( 'Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name' ), // Tidy_XHTML1_1 array (), '-//W3C//DTD XHTML 1.1//EN', 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' ); } /** * Registers a module to the recognized module list, useful for * overloading pre-existing modules. * * @param $module Mixed: * string module name, with or without * HTMLPurifier_HTMLModule prefix, or instance of * subclass of HTMLPurifier_HTMLModule. * @param $overload Boolean * whether or not to overload previous modules. * If this is not set, and you do overload a module, * HTML Purifier will complain with a warning. * @note This function will not call autoload, you must instantiate * (and thus invoke) autoload outside the method. * @note If a string is passed as a module name, different variants * will be tested in this order: * - Check for HTMLPurifier_HTMLModule_$name * - Check all prefixes with $name in order they were added * - Check for literal object name * - Throw fatal error * If your object name collides with an internal class, specify * your module manually. All modules must have been included * externally: registerModule will not perform inclusions for you! */ public function registerModule($module, $overload = false) { if (is_string ( $module )) { // attempt to load the module $original_module = $module; $ok = false; foreach ( $this->prefixes as $prefix ) { $module = $prefix . $original_module; if (class_exists ( $module )) { $ok = true; break; } } if (! $ok) { $module = $original_module; if (! class_exists ( $module )) { trigger_error ( $original_module . ' module does not exist', E_USER_ERROR ); return; } } $module = new $module (); } if (empty ( $module->name )) { trigger_error ( 'Module instance of ' . get_class ( $module ) . ' must have name' ); return; } if (! $overload && isset ( $this->registeredModules [$module->name] )) { trigger_error ( 'Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING ); } $this->registeredModules [$module->name] = $module; } /** * Adds a module to the current doctype by first registering it, * and then tacking it on to the active doctype */ public function addModule($module) { $this->registerModule ( $module ); if (is_object ( $module )) { $module = $module->name; } $this->userModules [] = $module; } /** * Adds a class prefix that registerModule() will use to resolve a * string name to a concrete class */ public function addPrefix($prefix) { $this->prefixes [] = $prefix; } /** * Performs processing on modules, after being called you may * use getElement() and getElements() * * @param HTMLPurifier_Config $config */ public function setup($config) { $this->trusted = $config->get ( 'HTML.Trusted' ); // generate $this->doctype = $this->doctypes->make ( $config ); $modules = $this->doctype->modules; // take out the default modules that aren't allowed $lookup = $config->get ( 'HTML.AllowedModules' ); $special_cases = $config->get ( 'HTML.CoreModules' ); if (is_array ( $lookup )) { foreach ( $modules as $k => $m ) { if (isset ( $special_cases [$m] )) { continue; } if (! isset ( $lookup [$m] )) { unset ( $modules [$k] ); } } } // custom modules if ($config->get ( 'HTML.Proprietary' )) { $modules [] = 'Proprietary'; } if ($config->get ( 'HTML.SafeObject' )) { $modules [] = 'SafeObject'; } if ($config->get ( 'HTML.SafeEmbed' )) { $modules [] = 'SafeEmbed'; } if ($config->get ( 'HTML.SafeScripting' ) !== array ()) { $modules [] = 'SafeScripting'; } if ($config->get ( 'HTML.Nofollow' )) { $modules [] = 'Nofollow'; } if ($config->get ( 'HTML.TargetBlank' )) { $modules [] = 'TargetBlank'; } // merge in custom modules $modules = array_merge ( $modules, $this->userModules ); foreach ( $modules as $module ) { $this->processModule ( $module ); $this->modules [$module]->setup ( $config ); } foreach ( $this->doctype->tidyModules as $module ) { $this->processModule ( $module ); $this->modules [$module]->setup ( $config ); } // prepare any injectors foreach ( $this->modules as $module ) { $n = array (); foreach ( $module->info_injector as $injector ) { if (! is_object ( $injector )) { $class = "HTMLPurifier_Injector_$injector"; $injector = new $class (); } $n [$injector->name] = $injector; } $module->info_injector = $n; } // setup lookup table based on all valid modules foreach ( $this->modules as $module ) { foreach ( $module->info as $name => $def ) { if (! isset ( $this->elementLookup [$name] )) { $this->elementLookup [$name] = array (); } $this->elementLookup [$name] [] = $module->name; } } // note the different choice $this->contentSets = new HTMLPurifier_ContentSets ( // content set assembly deals with all possible modules, // not just ones deemed to be "safe" $this->modules ); $this->attrCollections = new HTMLPurifier_AttrCollections ( $this->attrTypes, // there is no way to directly disable a global attribute, // but using AllowedAttributes or simply not including // the module in your custom doctype should be sufficient $this->modules ); } /** * Takes a module and adds it to the active module collection, * registering it if necessary. */ public function processModule($module) { if (! isset ( $this->registeredModules [$module] ) || is_object ( $module )) { $this->registerModule ( $module ); } $this->modules [$module] = $this->registeredModules [$module]; } /** * Retrieves merged element definitions. * * @return Array of HTMLPurifier_ElementDef */ public function getElements() { $elements = array (); foreach ( $this->modules as $module ) { if (! $this->trusted && ! $module->safe) { continue; } foreach ( $module->info as $name => $v ) { if (isset ( $elements [$name] )) { continue; } $elements [$name] = $this->getElement ( $name ); } } // remove dud elements, this happens when an element that // appeared to be safe actually wasn't foreach ( $elements as $n => $v ) { if ($v === false) { unset ( $elements [$n] ); } } return $elements; } /** * Retrieves a single merged element definition * * @param string $name * Name of element * @param bool $trusted * Boolean trusted overriding parameter: set to true * if you want the full version of an element * @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef * @note You may notice that modules are getting iterated over twice (once * in getElements() and once here). This * is because */ public function getElement($name, $trusted = null) { if (! isset ( $this->elementLookup [$name] )) { return false; } // setup global state variables $def = false; if ($trusted === null) { $trusted = $this->trusted; } // iterate through each module that has registered itself to this // element foreach ( $this->elementLookup [$name] as $module_name ) { $module = $this->modules [$module_name]; // refuse to create/merge from a module that is deemed unsafe-- // pretend the module doesn't exist--when trusted mode is not on. if (! $trusted && ! $module->safe) { continue; } // clone is used because, ideally speaking, the original // definition should not be modified. Usually, this will // make no difference, but for consistency's sake $new_def = clone $module->info [$name]; if (! $def && $new_def->standalone) { $def = $new_def; } elseif ($def) { // This will occur even if $new_def is standalone. In practice, // this will usually result in a full replacement. $def->mergeIn ( $new_def ); } else { // :TODO: // non-standalone definitions that don't have a standalone // to merge into could be deferred to the end // HOWEVER, it is perfectly valid for a non-standalone // definition to lack a standalone definition, even // after all processing: this allows us to safely // specify extra attributes for elements that may not be // enabled all in one place. In particular, this might // be the case for trusted elements. WARNING: care must // be taken that the /extra/ definitions are all safe. continue; } // attribute value expansions $this->attrCollections->performInclusions ( $def->attr ); $this->attrCollections->expandIdentifiers ( $def->attr, $this->attrTypes ); // descendants_are_inline, for ChildDef_Chameleon if (is_string ( $def->content_model ) && strpos ( $def->content_model, 'Inline' ) !== false) { if ($name != 'del' && $name != 'ins') { // this is for you, ins/del $def->descendants_are_inline = true; } } $this->contentSets->generateChildDef ( $def, $module ); } // This can occur if there is a blank definition, but no base to // mix it in with if (! $def) { return false; } // add information on required attributes foreach ( $def->attr as $attr_name => $attr_def ) { if ($attr_def->required) { $def->required_attr [] = $attr_name; } } return $def; } } // vim: et sw=4 sts=4