diff options
-rw-r--r-- | it_html.class | 17 | ||||
-rwxr-xr-x | tests/it_html.t | 12 |
2 files changed, 19 insertions, 10 deletions
diff --git a/it_html.class b/it_html.class index 41d799f..71ac965 100644 --- a/it_html.class +++ b/it_html.class @@ -259,18 +259,15 @@ function _parse_args($args) # internal -function fix_encoding($string) +function fix_encoding($string, $silent = false) { if (preg_match('/[\x20-\x7f][\x80-\xff][\x20-\x7f]/', $string)) - { - it::error(array('title' => utf8_encode("incorrectly utf8-encoded: " . trim($string)), 'skipfiles' => "it_html")); - $string = utf8_encode($string); - } - else if ($string && preg_match('/[\x80-\xff]/', $string) && htmlspecialchars(utf8_decode($string), ENT_COMPAT, "utf-8") !== "") - { - it::error(array('title' => utf8_encode("doubly utf8-encoded: " . trim($string)), 'skipfiles' => "it_html")); - $string = utf8_decode($string); - } + list($string, $error) = array(utf8_encode($string), utf8_encode("incorrectly utf8-encoded: " . trim($string))); + else if ($string && preg_match('/[\x80-\xff]/', $string) && utf8_encode(utf8_decode($string)) === $string && htmlspecialchars(utf8_decode($string), ENT_COMPAT, "utf-8") !== "") + list($string, $error) = array(utf8_decode($string), utf8_encode("doubly utf8-encoded: " . trim($string))); + + if ($error && !$silent) + it::error(array('title' => $error, 'skipfiles' => "it_html")); return $string; } diff --git a/tests/it_html.t b/tests/it_html.t index c955359..a576b47 100755 --- a/tests/it_html.t +++ b/tests/it_html.t @@ -168,4 +168,16 @@ is( is(it_html::entity_decode("’"), "'"); is(it_html::entity_decode("࿿"), " "); is(it_html::entity_decode("ϧ"), " "); + +is(it_html::fix_encoding("Meier"), "Meier"); +is(it_html::fix_encoding("Müller"), "Müller"); +is(it_html::fix_encoding("Aslı"), "Aslı"); +is(it_html::fix_encoding("é»"), "é»"); + +is(it_html::fix_encoding(utf8_encode("Müller"), true), "Müller", "double encoded latin1"); # Double encoded latin1 +is(it_html::fix_encoding(utf8_encode("Aslı"), true), "Aslı"); # Double encoded non-latin1 +is(it_html::fix_encoding(utf8_encode("é»"), true), "é»"); # Double encoded special combination + +is(it_html::fix_encoding(utf8_decode("Müller"), true), "Müller"); # Incorrectly decoded latin1 + ?> |