From 9c79c7183997909600ab38936100f1b7e3d30ac4 Mon Sep 17 00:00:00 2001 From: Denis Demesmaeker Date: Sun, 9 Nov 2008 13:18:46 +0000 Subject: support also different charset in sanitize function --- it_html.class | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'it_html.class') diff --git a/it_html.class b/it_html.class index 75ebe9b..c4cb187 100644 --- a/it_html.class +++ b/it_html.class @@ -376,6 +376,7 @@ function sanitize($html) $result = ""; $html = it::replace(array('[\0\s]+' => " "), $html); # \s also matches \r and \n $urlpattern = 'https?://[^">]+'; + $charset = $GLOBALS['it_html']->p['charset'] ? $GLOBALS['it_html']->p['charset'] : 'iso-8859-1'; if ($tag = it::match("(.*)<(div|p|i|b)[^>]*>(.*?)(.*)", $html)) { @@ -388,13 +389,13 @@ function sanitize($html) { # Link tags, keeps only href attribute list($head, $href, $content, $tail) = $tag; - $result .= it_html::sanitize($head) . '' . it_html::sanitize($content) . "" . it_html::sanitize($tail); + $result .= it_html::sanitize($head) . '' . it_html::sanitize($content) . "" . it_html::sanitize($tail); } else if ($tag = it::match('(.*)]+?src="(' . $urlpattern . ')"[^>]*?>(.*)', $html)) { # Image tags, keeps only src attribute list($head, $src, $tail) = $tag; - $result .= it_html::sanitize($head) . '' . it_html::sanitize($tail); + $result .= it_html::sanitize($head) . '' . it_html::sanitize($tail); } else if ($tag = it::match("(.*)<(br)[^>]*>(.*)", $html)) { @@ -404,7 +405,7 @@ function sanitize($html) $result .= it_html::sanitize($head) . "<$tagname />" . it_html::sanitize($tail); } else - $result = it_html::Q(it::replace(array('&#\d+;' => ""), html_entity_decode(strip_tags($html)))); + $result = it_html::Q(it::replace(array('&#\d+;' => ""), html_entity_decode(strip_tags($html), ENT_COMPAT, $charset))); return $result; } -- cgit v1.2.3