diff options
author | Denis Demesmaeker | 2008-11-09 13:18:46 +0000 |
---|---|---|
committer | Denis Demesmaeker | 2008-11-09 13:18:46 +0000 |
commit | 9c79c7183997909600ab38936100f1b7e3d30ac4 (patch) | |
tree | f40f383d22cb3922f8bf5de278c0069b1bc8a669 | |
parent | 5ceb7642fac777226ca0462bb74c299dd2e397b9 (diff) | |
download | itools-9c79c7183997909600ab38936100f1b7e3d30ac4.tar.gz itools-9c79c7183997909600ab38936100f1b7e3d30ac4.tar.bz2 itools-9c79c7183997909600ab38936100f1b7e3d30ac4.zip |
support also different charset in sanitize function
-rw-r--r-- | it_html.class | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/it_html.class b/it_html.class index 75ebe9b..c4cb187 100644 --- a/it_html.class +++ b/it_html.class @@ -376,6 +376,7 @@ function sanitize($html) $result = ""; $html = it::replace(array('[\0\s]+' => " "), $html); # \s also matches \r and \n $urlpattern = 'https?://[^">]+'; + $charset = $GLOBALS['it_html']->p['charset'] ? $GLOBALS['it_html']->p['charset'] : 'iso-8859-1'; if ($tag = it::match("(.*)<(div|p|i|b)[^>]*>(.*?)</\\2>(.*)", $html)) { @@ -388,13 +389,13 @@ function sanitize($html) { # Link tags, keeps only href attribute list($head, $href, $content, $tail) = $tag; - $result .= it_html::sanitize($head) . '<a href="' . it_html::Q(html_entity_decode($href)) . '">' . it_html::sanitize($content) . "</a>" . it_html::sanitize($tail); + $result .= it_html::sanitize($head) . '<a href="' . it_html::Q(html_entity_decode($href), ENT_COMPAT, $charset) . '">' . it_html::sanitize($content) . "</a>" . it_html::sanitize($tail); } else if ($tag = it::match('(.*)<img[^>]+?src="(' . $urlpattern . ')"[^>]*?>(.*)', $html)) { # Image tags, keeps only src attribute list($head, $src, $tail) = $tag; - $result .= it_html::sanitize($head) . '<img src="' . it_html::Q(html_entity_decode($src)) . '" alt="" />' . it_html::sanitize($tail); + $result .= it_html::sanitize($head) . '<img src="' . it_html::Q(html_entity_decode($src, ENT_COMPAT, $charset)) . '" alt="" />' . it_html::sanitize($tail); } else if ($tag = it::match("(.*)<(br)[^>]*>(.*)", $html)) { @@ -404,7 +405,7 @@ function sanitize($html) $result .= it_html::sanitize($head) . "<$tagname />" . it_html::sanitize($tail); } else - $result = it_html::Q(it::replace(array('&#\d+;' => ""), html_entity_decode(strip_tags($html)))); + $result = it_html::Q(it::replace(array('&#\d+;' => ""), html_entity_decode(strip_tags($html), ENT_COMPAT, $charset))); return $result; } |