diff options
author | Urban Müller | 2012-04-24 14:18:21 +0000 |
---|---|---|
committer | Urban Müller | 2012-04-24 14:18:21 +0000 |
commit | 9c523156bbc9d34ff2a16ec3b2c345951fc55287 (patch) | |
tree | b03409e32574aaa76f78aef8158030d6a8cea5d8 | |
parent | 177de5290677a71fe95e5264f1f21fad9cc7c469 (diff) | |
download | itools-9c523156bbc9d34ff2a16ec3b2c345951fc55287.tar.gz itools-9c523156bbc9d34ff2a16ec3b2c345951fc55287.tar.bz2 itools-9c523156bbc9d34ff2a16ec3b2c345951fc55287.zip |
merged it_html::fix_encoding in it::any2utf8
-rw-r--r-- | it.class | 14 | ||||
-rw-r--r-- | it_dbi.class | 2 | ||||
-rw-r--r-- | it_html.class | 19 | ||||
-rwxr-xr-x | tests/it.t | 8 | ||||
-rwxr-xr-x | tests/it_html.t | 11 |
5 files changed, 21 insertions, 33 deletions
@@ -406,16 +406,22 @@ static function grep($pattern, $array, $p = array()) } /** - * Convert string to utf8 if it was not already utf-8 before + * Convert string to utf8 if it was not already utf-8 before. Also handles double encoding * @param $value String to convert + * @param $errmsg Error message to output if anything needed to be done * @return Same string in utf-8 encoding */ -function any2utf8($value) +function any2utf8($value, $errmsg = "") { if (grapheme_strlen($value) === null) - $value = utf8_encode($value); + list($value, $error) = array(utf8_encode($value), utf8_encode("$errmsg: incorrect utf8-encoding. input=" . trim($value))); + if (preg_match('/\xc3\x83\xc2([\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f])/', $value)) + list($value, $error) = array(preg_replace('/\xc3\x83\xc2([\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f])/', "\xc3\$1", $value), utf8_encode("$errmsg: double utf8-encoding. input=" . trim($value))); - return preg_replace('/\xc3\x83\xc2([\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f])/', "\xc3\$1", $value); # fix most common double encodings, UTF8SAFE + if ($error && $errmsg) + it::error(array('title' => $error, 'skipfiles' => "it_html")); + + return $value; } /** diff --git a/it_dbi.class b/it_dbi.class index ced925b..512cd4c 100644 --- a/it_dbi.class +++ b/it_dbi.class @@ -249,7 +249,7 @@ function _set($tags, $allfields = false) else if ($allfields || ($value !== $this->_data[$field])) { if ($this->_p['charset'] == "utf8") # NOTE: Mysql charset is simply utf8, not utf-8 - $value = it_html::fix_encoding($value); + $value = it::any2utf8($value, "error in db-field $field"); $r[] = "`$field`=".(isset($value) ? $this->escape_string($value) : 'NULL'); } } diff --git a/it_html.class b/it_html.class index ed47abd..ece7070 100644 --- a/it_html.class +++ b/it_html.class @@ -258,21 +258,6 @@ function _parse_args($args) } -# internal -function fix_encoding($string, $silent = false) -{ - if (grapheme_strlen($string) === null) - list($string, $error) = array(utf8_encode($string), utf8_encode("incorrectly utf8-encoded: " . trim($string))); - else if (preg_match('/\xc3\x83\xc2[\x84\x9c\xa4\xb6\xbc\xa9\xa0]/', $string)) # Double encoded ÄÖÜäöüéà, UTF8SAFE - list($string, $error) = array(utf8_decode($string), utf8_encode("doubly utf8-encoded: " . trim($string))); - - if ($error && !$silent) - it::error(array('title' => $error, 'skipfiles' => "it_html")); - - return $string; -} - - /** * function div($args...) * Return a <div>...</div> element @@ -328,7 +313,7 @@ function _tag($name, $args) $result .= " />$newline"; if ($GLOBALS['debug_utf8check'] && $GLOBALS['it_html']->p['charset'] == "utf-8") - $result = self::fix_encoding($result); + $result = it::any2utf8($result, "error in $name()"); return $result; } @@ -498,7 +483,7 @@ function Q($string) if (preg_match('/[<>&"\x00-\x08\x0a-\x0c\x0e-\x1f\x80-\xff]/', $string)) # WARNING: copy/pasted to _tag() { if ($GLOBALS['debug_utf8check'] && $GLOBALS['it_html']->p['charset'] == "utf-8") - $string = self::fix_encoding($string); + $string = it::any2utf8($string, "error in Q()"); $origstring = $string; $string = @htmlspecialchars($GLOBALS['it_html']->p['charset'] == "iso-8859-1" ? it_html::latinize($string) : $string, ENT_COMPAT, $GLOBALS['it_html']->p['charset']); @@ -371,4 +371,12 @@ is(it::any2utf8( is(it::any2utf8(utf8_encode("ü")), "ü", "it::any2utf8 double encoding"); +is(it::any2utf8("Meier"), "Meier", "it::any2utf8 ascii"); +is(it::any2utf8("Müller"), "Müller", "it::any2utf8 utf-8 latin1"); +is(it::any2utf8("Aslı"), "Aslı", "it::any2utf8 utf-8 non-latin1"); +is(it::any2utf8("é»"), "é»", "it::any2utf8 utf-8 latin1 special combination"); +is(it::any2utf8(utf8_encode("Müller")), "Müller", "it::any2utf8 double encoded latin1"); +is(it::any2utf8(utf8_decode("Müller")), "Müller", "it::any2utf8 incorrectly encoded latin1"); +is(it::any2utf8("a💚b"), "a💚b", "it::any2utf8 correctly handles 4-byte utf-8 character GREEN HEART"); + ?> diff --git a/tests/it_html.t b/tests/it_html.t index 3ac69f6..307bc7f 100755 --- a/tests/it_html.t +++ b/tests/it_html.t @@ -170,16 +170,5 @@ is(it_html::entity_decode("’"), "'", "it_html::entity_decode numeric decim is(it_html::entity_decode("࿿"), " ", "it_html::entity_decode invalid numeric hex entity"); is(it_html::entity_decode("ϧ"), " ", "it_html::entity_decode invalid numeric decimal entity"); -is(it_html::fix_encoding("Meier"), "Meier", "it_html::fix_encoding ascii"); -is(it_html::fix_encoding("Müller"), "Müller", "it_html::fix_encoding utf-8 latin1"); -is(it_html::fix_encoding("Aslı"), "Aslı", "it_html::fix_encoding utf-8 non-latin1"); -is(it_html::fix_encoding("é»"), "é»", "it_html::fix_encoding utf-8 latin1 special combination"); - -is(it_html::fix_encoding(utf8_encode("Müller"), true), "Müller", "it_html::fix_encoding double encoded latin1"); -is(it_html::fix_encoding(utf8_encode("é»"), true), "é»", "it_html::fix_encoding double encoded latin1 special combination"); - -is(it_html::fix_encoding(utf8_decode("Müller"), true), "Müller", "it_html::fix_encoding incorrectly encoded latin1"); - -is(it_html::fix_encoding("a💚b"), "a💚b", "it_html::fix_encoding correctly handles 4-byte utf-8 character GREEN HEART"); ?> |