summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUrban Müller2012-04-10 14:25:32 +0000
committerUrban Müller2012-04-10 14:25:32 +0000
commit7b603ba54278b493d4b9704353a47cab84d00961 (patch)
treefaa9c55237d97bd964c152e936e46439b2ff5d1a
parent9f13a3b384785321fbfdd0344e95d58c58bc5086 (diff)
downloaditools-7b603ba54278b493d4b9704353a47cab84d00961.tar.gz
itools-7b603ba54278b493d4b9704353a47cab84d00961.tar.bz2
itools-7b603ba54278b493d4b9704353a47cab84d00961.zip
simpler double encoding test avoids false positives
-rw-r--r--it_html.class2
-rwxr-xr-xtests/it_html.t1
2 files changed, 1 insertions, 2 deletions
diff --git a/it_html.class b/it_html.class
index 4c9f3e7..0f18996 100644
--- a/it_html.class
+++ b/it_html.class
@@ -263,7 +263,7 @@ function fix_encoding($string, $silent = false)
{
if (grapheme_strlen($string) === null)
list($string, $error) = array(utf8_encode($string), utf8_encode("incorrectly utf8-encoded: " . trim($string)));
- else if ($string && preg_match('/[\x80-\xff]/', $string) && grapheme_strlen(utf8_decode($string)) !== null && utf8_encode(utf8_decode($string)) === $string)
+ else if (preg_match('/\xc3\x83(\xc2\x84|\xc2\x9c|\xc2\xa4|\xc2\xb6|\xc2\xbc|\xc2\xa9|\xc2\xa0)/', $string)) # Double encoded ÄÖÜäöüéà, UTF8SAFE
list($string, $error) = array(utf8_decode($string), utf8_encode("doubly utf8-encoded: " . trim($string)));
if ($error && !$silent)
diff --git a/tests/it_html.t b/tests/it_html.t
index 174c487..3ac69f6 100755
--- a/tests/it_html.t
+++ b/tests/it_html.t
@@ -176,7 +176,6 @@ is(it_html::fix_encoding("Aslı"), "Aslı", "it_html::fix_encoding utf-8 non-lat
is(it_html::fix_encoding("é»"), "é»", "it_html::fix_encoding utf-8 latin1 special combination");
is(it_html::fix_encoding(utf8_encode("Müller"), true), "Müller", "it_html::fix_encoding double encoded latin1");
-is(it_html::fix_encoding(utf8_encode("Aslı"), true), "Aslı", "it_html::fix_encoding double encoded non-latin1");
is(it_html::fix_encoding(utf8_encode("é»"), true), "é»", "it_html::fix_encoding double encoded latin1 special combination");
is(it_html::fix_encoding(utf8_decode("Müller"), true), "Müller", "it_html::fix_encoding incorrectly encoded latin1");