summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Schneider2012-07-02 15:30:50 +0000
committerChristian Schneider2012-07-02 15:30:50 +0000
commit57da96485e0671fc34f2ea862b872af186ff043c (patch)
tree5a312f9a618f262171d3e9998b83a0293a068b02
parent58a0444eabccaba87f3050b7f264e404f0cec6d4 (diff)
downloaditools-57da96485e0671fc34f2ea862b872af186ff043c.tar.gz
itools-57da96485e0671fc34f2ea862b872af186ff043c.tar.bz2
itools-57da96485e0671fc34f2ea862b872af186ff043c.zip
Improved cleanup handling for illegal characters in html, now handles utf-8 as well
-rw-r--r--it_html.class30
-rw-r--r--it_xml.class2
2 files changed, 18 insertions, 14 deletions
diff --git a/it_html.class b/it_html.class
index bf9f8ff..66c751a 100644
--- a/it_html.class
+++ b/it_html.class
@@ -180,9 +180,6 @@ function head($args = array())
header("Content-Type: " . $p['content-type']);
$js = isset($p['jsenv']) ? "var env = " . itjs::serialize($p['jsenv']) . ";\n" : '';
- if ($GLOBALS['it_html']->p['charset'] == "iso-8859-1")
- $js = it_html::latinize($js);
-
$js .= $this->_itjs($p['jsinline'], 'inline');
if ($p['js'])
@@ -194,7 +191,7 @@ function head($args = array())
}
if ($js)
- $data .= $this->js(array($js));
+ $data .= $this->js(array(self::_cleanup($js, $p['charset'])));
return tag('head', $header, $data);
}
@@ -286,6 +283,7 @@ function _tag($name, $args)
if (($levels = intval($GLOBALS['debug_srclines'])) && !it::match('^(head|meta|title|script|style|link)', $name))
$attr = array('title' => it_debug::backtrace(array('levels' => max(3, $levels), 'skipfiles' => "_html\\.class"))) + $attr;
+ $charset = $GLOBALS['it_html']->p['charset'];
$result .= "<$name";
# add attributes. If $value === true, use key only (<td nowrap> instead of <td nowrap=""> for old html, <td nowrap="nowrap"> for xhtml style)
@@ -296,7 +294,7 @@ function _tag($name, $args)
else if (isset($value) && $value !== true) # normal case: value
{
if (preg_match('/[<>&"\x00-\x08\x0a-\x0c\x0e-\x1f\x80-\x9f]/', $value)) # WARNING: copy/pasted from Q()
- $result .= " $key=\"" . str_replace("\n", "&#10;", htmlspecialchars($GLOBALS['it_html']->p['charset'] == "iso-8859-1" ? it_html::latinize($value) : $value, ENT_COMPAT, $GLOBALS['it_html']->p['charset'])) . '"';
+ $result .= " $key=\"" . str_replace("\n", "&#10;", htmlspecialchars(self::_cleanup($value, $charset), ENT_COMPAT, $charset)) . '"';
else
$result .= " $key=\"$value\"";
}
@@ -312,7 +310,7 @@ function _tag($name, $args)
else
$result .= " />$newline";
- if ($GLOBALS['debug_utf8check'] && $GLOBALS['it_html']->p['charset'] == "utf-8")
+ if ($GLOBALS['debug_utf8check'] && $charset == "utf-8")
$result = it::any2utf8($result, "error in $name()");
return $result;
@@ -458,18 +456,23 @@ function sanitize($html)
*/
function entity_decode($string)
{
- $string = preg_replace('/&#(8217|65533);/', "'", html_entity_decode($string, ENT_COMPAT, $GLOBALS['it_html']->p['charset']));
+ $charset = $GLOBALS['it_html']->p['charset'];
+ $string = preg_replace('/&#(8217|65533);/', "'", html_entity_decode($string, ENT_COMPAT, $charset));
$string = preg_replace('/&#[^;]*;/i', " ", $string); # remove remaining illegal numeric entities, e.g. 0x80-0x9f
- return $string;
+ return self::_cleanup($string, $charset);
}
/**
- * Replace or remove all illegal characters from a latin-1 string
+ * Replace or remove all illegal characters from a HTML string (knows utf-8 and latin1)
*/
-function latinize($string)
+function _cleanup($string, $charset)
{
- return preg_replace('/[\x00-\x08\x0b-\x0c\x0e-\x1f\x80-\x9f]/', ' ', strtr($string, array("\x80" => "EUR", "\x82" => "'", "\x84" => "\"", "\x85" => "...", "\x8a" => "S", "\x8c" => "OE", "\x8e" => "Z", "\x91" => "'", "\x92" => "'", "\x93" => "\"", "\x94" => "\"", "\x96" => "-", "\x97" => "-", "\x9a" => "s", "\x9e" => "z")));
+ $result = $charset == "utf-8"
+ ? preg_replace('/\xc2[\x80-\x9f]/', ' ', $string)
+ : preg_replace('/[\x80-\x9f]/', ' ', strtr($string, array("\x80" => "EUR", "\x82" => "'", "\x84" => "\"", "\x85" => "...", "\x8a" => "S", "\x8c" => "OE", "\x8e" => "Z", "\x91" => "'", "\x92" => "'", "\x93" => "\"", "\x94" => "\"", "\x96" => "-", "\x97" => "-", "\x9a" => "s", "\x9e" => "z")));
+
+ return preg_replace('/[\x00-\x08\x0b-\x0c\x0e-\x1f]/', ' ', $result);
}
/**
@@ -481,11 +484,12 @@ function Q($string)
{
if (preg_match('/[<>&"\x00-\x08\x0a-\x0c\x0e-\x1f\x80-\xff]/', $string)) # WARNING: copy/pasted to _tag()
{
- if ($GLOBALS['debug_utf8check'] && $GLOBALS['it_html']->p['charset'] == "utf-8")
+ $charset = $GLOBALS['it_html']->p['charset'];
+ if ($GLOBALS['debug_utf8check'] && $charset == "utf-8")
$string = it::any2utf8($string, "error in Q()");
$origstring = $string;
- $string = @htmlspecialchars($GLOBALS['it_html']->p['charset'] == "iso-8859-1" ? it_html::latinize($string) : $string, ENT_COMPAT, $GLOBALS['it_html']->p['charset']);
+ $string = @htmlspecialchars(self::_cleanup($string, $charset), ENT_COMPAT, $charset);
if ($string === "" && $origstring)
it::error("invalid utf-8 '$origstring'");
}
diff --git a/it_xml.class b/it_xml.class
index f854682..c98b6eb 100644
--- a/it_xml.class
+++ b/it_xml.class
@@ -134,7 +134,7 @@ function _sanitize($xmldata, $isutf8 = null)
# If not utf-8, remove characters illegal for latin-1
if (!$isutf8 && preg_match('/[\x00-\x08\x0b-\x0c\x0e-\x1f\x80-\x9f]/', $xmldata))
- $xmldata = it_html::latinize($xmldata);
+ $xmldata = it_html::_cleanup($xmldata, $isutf8 ? "utf-8" : "iso-8859-1");
return array($xmldata, $isutf8);
}