From 9c79c7183997909600ab38936100f1b7e3d30ac4 Mon Sep 17 00:00:00 2001
From: Denis Demesmaeker
Date: Sun, 9 Nov 2008 13:18:46 +0000
Subject: support also different charset in sanitize function
---
it_html.class | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/it_html.class b/it_html.class
index 75ebe9b..c4cb187 100644
--- a/it_html.class
+++ b/it_html.class
@@ -376,6 +376,7 @@ function sanitize($html)
$result = "";
$html = it::replace(array('[\0\s]+' => " "), $html); # \s also matches \r and \n
$urlpattern = 'https?://[^">]+';
+ $charset = $GLOBALS['it_html']->p['charset'] ? $GLOBALS['it_html']->p['charset'] : 'iso-8859-1';
if ($tag = it::match("(.*)<(div|p|i|b)[^>]*>(.*?)\\2>(.*)", $html))
{
@@ -388,13 +389,13 @@ function sanitize($html)
{
# Link tags, keeps only href attribute
list($head, $href, $content, $tail) = $tag;
- $result .= it_html::sanitize($head) . '' . it_html::sanitize($content) . "" . it_html::sanitize($tail);
+ $result .= it_html::sanitize($head) . '' . it_html::sanitize($content) . "" . it_html::sanitize($tail);
}
else if ($tag = it::match('(.*)]+?src="(' . $urlpattern . ')"[^>]*?>(.*)', $html))
{
# Image tags, keeps only src attribute
list($head, $src, $tail) = $tag;
- $result .= it_html::sanitize($head) . '' . it_html::sanitize($tail);
+ $result .= it_html::sanitize($head) . '' . it_html::sanitize($tail);
}
else if ($tag = it::match("(.*)<(br)[^>]*>(.*)", $html))
{
@@ -404,7 +405,7 @@ function sanitize($html)
$result .= it_html::sanitize($head) . "<$tagname />" . it_html::sanitize($tail);
}
else
- $result = it_html::Q(it::replace(array('\d+;' => ""), html_entity_decode(strip_tags($html))));
+ $result = it_html::Q(it::replace(array('\d+;' => ""), html_entity_decode(strip_tags($html), ENT_COMPAT, $charset)));
return $result;
}
--
cgit v1.2.3