summaryrefslogtreecommitdiff
path: root/it_html.class
diff options
context:
space:
mode:
authorChristian Schneider2012-03-28 13:00:39 +0000
committerChristian Schneider2012-03-28 13:00:39 +0000
commitb1c0b4946572027c8de564730a89ec584c830bf3 (patch)
tree81859c280f43a5cdb31ddede7300641fa530e0f8 /it_html.class
parent49d2a5ce1b6ad201f051263db7c3a1f5ad6a39ab (diff)
downloaditools-b1c0b4946572027c8de564730a89ec584c830bf3.tar.gz
itools-b1c0b4946572027c8de564730a89ec584c830bf3.tar.bz2
itools-b1c0b4946572027c8de564730a89ec584c830bf3.zip
Added it::any2utf8, fixed it::replace fast path to add u modified, added error reporting for invalid utf-8 input to it::match and it::replace
Diffstat (limited to 'it_html.class')
-rw-r--r--it_html.class4
1 files changed, 3 insertions, 1 deletions
diff --git a/it_html.class b/it_html.class
index 71ac965..f20c4be 100644
--- a/it_html.class
+++ b/it_html.class
@@ -431,9 +431,11 @@ function _strip_tags($html)
function sanitize($html)
{
$result = "";
+ $charset = $GLOBALS['it_html']->p['charset'] ? $GLOBALS['it_html']->p['charset'] : 'iso-8859-1';
+ if ($charset == "utf-8")
+ $html = it::any2utf8($html);
$html = it::replace(array('[\0\s]+' => " "), $html); # \s also matches \r and \n
$urlpattern = 'https?://[^">]+';
- $charset = $GLOBALS['it_html']->p['charset'] ? $GLOBALS['it_html']->p['charset'] : 'iso-8859-1';
if ($tag = it::match("(.*)<(div|p|i|b)\b[^>]*>(.*?)</\\2>(.*)", $html))
{