scheme = is_null ( $scheme ) || ctype_lower ( $scheme ) ? $scheme : strtolower ( $scheme );
$this->userinfo = $userinfo;
$this->host = $host;
$this->port = is_null ( $port ) ? $port : ( int ) $port;
$this->path = $path;
$this->query = $query;
$this->fragment = $fragment;
}
/**
* Retrieves a scheme object corresponding to the URI's scheme/default
*
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
*/
public function getSchemeObj($config, $context) {
$registry = HTMLPurifier_URISchemeRegistry::instance ();
if ($this->scheme !== null) {
$scheme_obj = $registry->getScheme ( $this->scheme, $config, $context );
if (! $scheme_obj) {
return false;
} // invalid scheme, clean it out
} else {
// no scheme: retrieve the default one
$def = $config->getDefinition ( 'URI' );
$scheme_obj = $def->getDefaultScheme ( $config, $context );
if (! $scheme_obj) {
// something funky happened to the default scheme object
trigger_error ( 'Default scheme object "' . $def->defaultScheme . '" was not readable', E_USER_WARNING );
return false;
}
}
return $scheme_obj;
}
/**
* Generic validation method applicable for all schemes.
* May modify
* this URI in order to get it into a compliant form.
*
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool True if validation/filtering succeeds, false if failure
*/
public function validate($config, $context) {
// ABNF definitions from RFC 3986
$chars_sub_delims = '!$&\'()*+,;=';
$chars_gen_delims = ':/?#[]@';
$chars_pchar = $chars_sub_delims . ':@';
// validate host
if (! is_null ( $this->host )) {
$host_def = new HTMLPurifier_AttrDef_URI_Host ();
$this->host = $host_def->validate ( $this->host, $config, $context );
if ($this->host === false) {
$this->host = null;
}
}
// validate scheme
// NOTE: It's not appropriate to check whether or not this
// scheme is in our registry, since a URIFilter may convert a
// URI that we don't allow into one we do. So instead, we just
// check if the scheme can be dropped because there is no host
// and it is our default scheme.
if (! is_null ( $this->scheme ) && is_null ( $this->host ) || $this->host === '') {
// support for relative paths is pretty abysmal when the
// scheme is present, so axe it when possible
$def = $config->getDefinition ( 'URI' );
if ($def->defaultScheme === $this->scheme) {
$this->scheme = null;
}
}
// validate username
if (! is_null ( $this->userinfo )) {
$encoder = new HTMLPurifier_PercentEncoder ( $chars_sub_delims . ':' );
$this->userinfo = $encoder->encode ( $this->userinfo );
}
// validate port
if (! is_null ( $this->port )) {
if ($this->port < 1 || $this->port > 65535) {
$this->port = null;
}
}
// validate path
$segments_encoder = new HTMLPurifier_PercentEncoder ( $chars_pchar . '/' );
if (! is_null ( $this->host )) { // this catches $this->host === ''
// path-abempty (hier and relative)
// http://www.example.com/my/path
// //www.example.com/my/path (looks odd, but works, and
// recognized by most browsers)
// (this set is valid or invalid on a scheme by scheme
// basis, so we'll deal with it later)
// file:///my/path
// ///my/path
$this->path = $segments_encoder->encode ( $this->path );
} elseif ($this->path !== '') {
if ($this->path [0] === '/') {
// path-absolute (hier and relative)
// http:/my/path
// /my/path
if (strlen ( $this->path ) >= 2 && $this->path [1] === '/') {
// This could happen if both the host gets stripped
// out
// http://my/path
// //my/path
$this->path = '';
} else {
$this->path = $segments_encoder->encode ( $this->path );
}
} elseif (! is_null ( $this->scheme )) {
// path-rootless (hier)
// http:my/path
// Short circuit evaluation means we don't need to check nz
$this->path = $segments_encoder->encode ( $this->path );
} else {
// path-noscheme (relative)
// my/path
// (once again, not checking nz)
$segment_nc_encoder = new HTMLPurifier_PercentEncoder ( $chars_sub_delims . '@' );
$c = strpos ( $this->path, '/' );
if ($c !== false) {
$this->path = $segment_nc_encoder->encode ( substr ( $this->path, 0, $c ) ) . $segments_encoder->encode ( substr ( $this->path, $c ) );
} else {
$this->path = $segment_nc_encoder->encode ( $this->path );
}
}
} else {
// path-empty (hier and relative)
$this->path = ''; // just to be safe
}
// qf = query and fragment
$qf_encoder = new HTMLPurifier_PercentEncoder ( $chars_pchar . '/?' );
if (! is_null ( $this->query )) {
$this->query = $qf_encoder->encode ( $this->query );
}
if (! is_null ( $this->fragment )) {
$this->fragment = $qf_encoder->encode ( $this->fragment );
}
return true;
}
/**
* Convert URI back to string
*
* @return string URI appropriate for output
*/
public function toString() {
// reconstruct authority
$authority = null;
// there is a rendering difference between a null authority
// (http:foo-bar) and an empty string authority
// (http:///foo-bar).
if (! is_null ( $this->host )) {
$authority = '';
if (! is_null ( $this->userinfo )) {
$authority .= $this->userinfo . '@';
}
$authority .= $this->host;
if (! is_null ( $this->port )) {
$authority .= ':' . $this->port;
}
}
// Reconstruct the result
// One might wonder about parsing quirks from browsers after
// this reconstruction. Unfortunately, parsing behavior depends
// on what *scheme* was employed (file:///foo is handled *very*
// differently than http:///foo), so unfortunately we have to
// defer to the schemes to do the right thing.
$result = '';
if (! is_null ( $this->scheme )) {
$result .= $this->scheme . ':';
}
if (! is_null ( $authority )) {
$result .= '//' . $authority;
}
$result .= $this->path;
if (! is_null ( $this->query )) {
$result .= '?' . $this->query;
}
if (! is_null ( $this->fragment )) {
$result .= '#' . $this->fragment;
}
return $result;
}
/**
* Returns true if this URL might be considered a 'local' URL given
* the current context.
* This is true when the host is null, or
* when it matches the host supplied to the configuration.
*
* Note that this does not do any scheme checking, so it is mostly
* only appropriate for metadata that doesn't care about protocol
* security. isBenign is probably what you actually want.
*
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool
*/
public function isLocal($config, $context) {
if ($this->host === null) {
return true;
}
$uri_def = $config->getDefinition ( 'URI' );
if ($uri_def->host === $this->host) {
return true;
}
return false;
}
/**
* Returns true if this URL should be considered a 'benign' URL,
* that is:
*
* - It is a local URL (isLocal), and
* - It has a equal or better level of security
*
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool
*/
public function isBenign($config, $context) {
if (! $this->isLocal ( $config, $context )) {
return false;
}
$scheme_obj = $this->getSchemeObj ( $config, $context );
if (! $scheme_obj) {
return false;
} // conservative approach
$current_scheme_obj = $config->getDefinition ( 'URI' )->getDefaultScheme ( $config, $context );
if ($current_scheme_obj->secure) {
if (! $scheme_obj->secure) {
return false;
}
}
return true;
}
}
// vim: et sw=4 sts=4