From e923bc4ab388d00ec757987003e39918756a7c59 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Mon, 12 Feb 2007 15:13:41 +0000 Subject: Added it_html::sanitize to make user-contributed html safe to use --- html.class | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/html.class b/html.class index 098e4b3..ccacfea 100644 --- a/html.class +++ b/html.class @@ -33,10 +33,11 @@ class it_html 'show_content_type' => true, 'show_favicon' => true, 'show_boot_dom' => true, + 'staticallycallable' => "q,u,select", # Those methods are statically callable (have same arguments as global stubs) but are a bit slower + 'notexported' => "sanitize", # Those methods are not exported ); var $tags_seen = array('body' => true); # body always counts as seen var $_hasnonewline = array(); - var $_staticallycallable = "q,u,select"; # Those methods are statically callable (have same arguments as global stubs) but are a bit slower /** * Create a HTML object and global functions for all methods (exlcluding @@ -64,6 +65,7 @@ function it_html($config = array()) $this->_htmltype = $this->_oldhtml ? "html" : "xhtml"; $this->_oldhtml = $this->_htmltype == "html"; $this->_hasnonewline = array_flip(explode(',', "dummy," . $this->_nonewlinetags)); # dummy keeps values >0 + $notexported = array_flip(explode(',', "dummy," . $this->_notexported)); # dummy keeps values >0 # Create global functions for _tags foreach (array_merge(explode(',', $this->_tags), explode(',', $this->_moretags)) as $func) @@ -75,7 +77,7 @@ function it_html($config = array()) # Create global functions for it_html methods foreach (get_class_methods(get_class($this)) as $func) { - if (!preg_match('/^_/', $func) && !is_a($this, $func) && $func && !function_exists($func)) + if (!preg_match('/^_/', $func) && !is_a($this, $func) && $func && !function_exists($func) && !$notexported[$func]) $code[$func] = "function $func() { \$args = func_get_args(); return \$GLOBALS['$this->_name']->$func(\$args); }"; } @@ -223,6 +225,50 @@ function span($args) } +/** + * Return HTML with all evil things stripped. Allowed are a coupld of simple + * tags like div, p, i, b, br without attributes, a with absolute href, + * img with absolute src url. Also ensures that tags are balanced. + * @param $html HTML string to be sanitized + * @return Sanitized HTML + */ +function sanitize($html) +{ + $result = ""; + $html = it::replace('[\0\n\r\s]+' => " ", $html); + $urlpattern = 'https?://[^">]+'; + + if ($tag = it::match("(.*)<(div|p|i|b)[^>]*>(.*?)(.*)", $html)) + { + # Simple tags with content, no attributes kept + list($head, $tagname, $content, $tail) = $tag; + $result .= it_html::sanitize($head) . "<$tagname>" . it_html::sanitize($content) . "" . it_html::sanitize($tail); + } + else if ($tag = it::match('(.*)]+?href="(' . $urlpattern . ')"[^>]*?>(.*?)(.*)', $html)) + { + # Link tags, keeps only href attribute + list($head, $href, $content, $tail) = $tag; + $result .= it_html::sanitize($head) . "" . it_html::sanitize($content) . "" . it_html::sanitize($tail); + } + else if ($tag = it::match('(.*)]+?src="(' . $urlpattern . ')"[^>]*?>(.*)', $html)) + { + # Image tags, keeps only src attribute + list($head, $src, $tail) = $tag; + $result .= it_html::sanitize($head) . "" . it_html::sanitize($tail); + } + else if ($tag = it::match("(.*)<(br)[^>]*>(.*)", $html)) + { + # Simple tags without content, no attributes kept + list($head, $tagname, $tail) = $tag; + $result .= it_html::sanitize($head) . "<$tagname />" . it_html::sanitize($tail); + } + else + $result = Q(html_entity_decode(strip_tags($html))); + + return $result; +} + + /** * Shortcut: return htmlspecialchars($string); * @param $string String to encode with htmlspecialchars() -- cgit v1.2.3