From e923bc4ab388d00ec757987003e39918756a7c59 Mon Sep 17 00:00:00 2001
From: Christian Schneider
Date: Mon, 12 Feb 2007 15:13:41 +0000
Subject: Added it_html::sanitize to make user-contributed html safe to use
---
html.class | 50 ++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 48 insertions(+), 2 deletions(-)
diff --git a/html.class b/html.class
index 098e4b3..ccacfea 100644
--- a/html.class
+++ b/html.class
@@ -33,10 +33,11 @@ class it_html
'show_content_type' => true,
'show_favicon' => true,
'show_boot_dom' => true,
+ 'staticallycallable' => "q,u,select", # Those methods are statically callable (have same arguments as global stubs) but are a bit slower
+ 'notexported' => "sanitize", # Those methods are not exported
);
var $tags_seen = array('body' => true); # body always counts as seen
var $_hasnonewline = array();
- var $_staticallycallable = "q,u,select"; # Those methods are statically callable (have same arguments as global stubs) but are a bit slower
/**
* Create a HTML object and global functions for all methods (exlcluding
@@ -64,6 +65,7 @@ function it_html($config = array())
$this->_htmltype = $this->_oldhtml ? "html" : "xhtml";
$this->_oldhtml = $this->_htmltype == "html";
$this->_hasnonewline = array_flip(explode(',', "dummy," . $this->_nonewlinetags)); # dummy keeps values >0
+ $notexported = array_flip(explode(',', "dummy," . $this->_notexported)); # dummy keeps values >0
# Create global functions for _tags
foreach (array_merge(explode(',', $this->_tags), explode(',', $this->_moretags)) as $func)
@@ -75,7 +77,7 @@ function it_html($config = array())
# Create global functions for it_html methods
foreach (get_class_methods(get_class($this)) as $func)
{
- if (!preg_match('/^_/', $func) && !is_a($this, $func) && $func && !function_exists($func))
+ if (!preg_match('/^_/', $func) && !is_a($this, $func) && $func && !function_exists($func) && !$notexported[$func])
$code[$func] = "function $func() { \$args = func_get_args(); return \$GLOBALS['$this->_name']->$func(\$args); }";
}
@@ -223,6 +225,50 @@ function span($args)
}
+/**
+ * Return HTML with all evil things stripped. Allowed are a coupld of simple
+ * tags like div, p, i, b, br without attributes, a with absolute href,
+ * img with absolute src url. Also ensures that tags are balanced.
+ * @param $html HTML string to be sanitized
+ * @return Sanitized HTML
+ */
+function sanitize($html)
+{
+ $result = "";
+ $html = it::replace('[\0\n\r\s]+' => " ", $html);
+ $urlpattern = 'https?://[^">]+';
+
+ if ($tag = it::match("(.*)<(div|p|i|b)[^>]*>(.*?)\\2>(.*)", $html))
+ {
+ # Simple tags with content, no attributes kept
+ list($head, $tagname, $content, $tail) = $tag;
+ $result .= it_html::sanitize($head) . "<$tagname>" . it_html::sanitize($content) . "$tagname>" . it_html::sanitize($tail);
+ }
+ else if ($tag = it::match('(.*)]+?href="(' . $urlpattern . ')"[^>]*?>(.*?)(.*)', $html))
+ {
+ # Link tags, keeps only href attribute
+ list($head, $href, $content, $tail) = $tag;
+ $result .= it_html::sanitize($head) . "" . it_html::sanitize($content) . "" . it_html::sanitize($tail);
+ }
+ else if ($tag = it::match('(.*)]+?src="(' . $urlpattern . ')"[^>]*?>(.*)', $html))
+ {
+ # Image tags, keeps only src attribute
+ list($head, $src, $tail) = $tag;
+ $result .= it_html::sanitize($head) . "" . it_html::sanitize($tail);
+ }
+ else if ($tag = it::match("(.*)<(br)[^>]*>(.*)", $html))
+ {
+ # Simple tags without content, no attributes kept
+ list($head, $tagname, $tail) = $tag;
+ $result .= it_html::sanitize($head) . "<$tagname />" . it_html::sanitize($tail);
+ }
+ else
+ $result = Q(html_entity_decode(strip_tags($html)));
+
+ return $result;
+}
+
+
/**
* Shortcut: return htmlspecialchars($string);
* @param $string String to encode with htmlspecialchars()
--
cgit v1.2.3