summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUrban Müller2012-03-26 15:11:39 +0000
committerUrban Müller2012-03-26 15:11:39 +0000
commitb7200b739ff651a7647d2d666e3674a7fe3cb6e2 (patch)
tree6362be7d1ea725fc9371839a50c506cfebaa1ef3
parent5e55c26d6ae3ab321a765fc66b7359a5a9edae8f (diff)
downloaditools-b7200b739ff651a7647d2d666e3674a7fe3cb6e2.tar.gz
itools-b7200b739ff651a7647d2d666e3674a7fe3cb6e2.tar.bz2
itools-b7200b739ff651a7647d2d666e3674a7fe3cb6e2.zip
fixed it_html::fix_encoding
-rw-r--r--it_html.class17
-rwxr-xr-xtests/it_html.t12
2 files changed, 19 insertions, 10 deletions
diff --git a/it_html.class b/it_html.class
index 41d799f..71ac965 100644
--- a/it_html.class
+++ b/it_html.class
@@ -259,18 +259,15 @@ function _parse_args($args)
# internal
-function fix_encoding($string)
+function fix_encoding($string, $silent = false)
{
if (preg_match('/[\x20-\x7f][\x80-\xff][\x20-\x7f]/', $string))
- {
- it::error(array('title' => utf8_encode("incorrectly utf8-encoded: " . trim($string)), 'skipfiles' => "it_html"));
- $string = utf8_encode($string);
- }
- else if ($string && preg_match('/[\x80-\xff]/', $string) && htmlspecialchars(utf8_decode($string), ENT_COMPAT, "utf-8") !== "")
- {
- it::error(array('title' => utf8_encode("doubly utf8-encoded: " . trim($string)), 'skipfiles' => "it_html"));
- $string = utf8_decode($string);
- }
+ list($string, $error) = array(utf8_encode($string), utf8_encode("incorrectly utf8-encoded: " . trim($string)));
+ else if ($string && preg_match('/[\x80-\xff]/', $string) && utf8_encode(utf8_decode($string)) === $string && htmlspecialchars(utf8_decode($string), ENT_COMPAT, "utf-8") !== "")
+ list($string, $error) = array(utf8_decode($string), utf8_encode("doubly utf8-encoded: " . trim($string)));
+
+ if ($error && !$silent)
+ it::error(array('title' => $error, 'skipfiles' => "it_html"));
return $string;
}
diff --git a/tests/it_html.t b/tests/it_html.t
index c955359..a576b47 100755
--- a/tests/it_html.t
+++ b/tests/it_html.t
@@ -168,4 +168,16 @@ is(
is(it_html::entity_decode("’"), "'");
is(it_html::entity_decode("࿿"), " ");
is(it_html::entity_decode("ϧ"), " ");
+
+is(it_html::fix_encoding("Meier"), "Meier");
+is(it_html::fix_encoding("Müller"), "Müller");
+is(it_html::fix_encoding("Aslı"), "Aslı");
+is(it_html::fix_encoding("é»"), "é»");
+
+is(it_html::fix_encoding(utf8_encode("Müller"), true), "Müller", "double encoded latin1"); # Double encoded latin1
+is(it_html::fix_encoding(utf8_encode("Aslı"), true), "Aslı"); # Double encoded non-latin1
+is(it_html::fix_encoding(utf8_encode("é»"), true), "é»"); # Double encoded special combination
+
+is(it_html::fix_encoding(utf8_decode("Müller"), true), "Müller"); # Incorrectly decoded latin1
+
?>