From 9c523156bbc9d34ff2a16ec3b2c345951fc55287 Mon Sep 17 00:00:00 2001 From: Urban Müller Date: Tue, 24 Apr 2012 14:18:21 +0000 Subject: merged it_html::fix_encoding in it::any2utf8 --- it.class | 14 ++++++++++---- it_dbi.class | 2 +- it_html.class | 19 ++----------------- tests/it.t | 8 ++++++++ tests/it_html.t | 11 ----------- 5 files changed, 21 insertions(+), 33 deletions(-) diff --git a/it.class b/it.class index b84c740..407e200 100644 --- a/it.class +++ b/it.class @@ -406,16 +406,22 @@ static function grep($pattern, $array, $p = array()) } /** - * Convert string to utf8 if it was not already utf-8 before + * Convert string to utf8 if it was not already utf-8 before. Also handles double encoding * @param $value String to convert + * @param $errmsg Error message to output if anything needed to be done * @return Same string in utf-8 encoding */ -function any2utf8($value) +function any2utf8($value, $errmsg = "") { if (grapheme_strlen($value) === null) - $value = utf8_encode($value); + list($value, $error) = array(utf8_encode($value), utf8_encode("$errmsg: incorrect utf8-encoding. input=" . trim($value))); + if (preg_match('/\xc3\x83\xc2([\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f])/', $value)) + list($value, $error) = array(preg_replace('/\xc3\x83\xc2([\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f])/', "\xc3\$1", $value), utf8_encode("$errmsg: double utf8-encoding. input=" . trim($value))); - return preg_replace('/\xc3\x83\xc2([\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f])/', "\xc3\$1", $value); # fix most common double encodings, UTF8SAFE + if ($error && $errmsg) + it::error(array('title' => $error, 'skipfiles' => "it_html")); + + return $value; } /** diff --git a/it_dbi.class b/it_dbi.class index ced925b..512cd4c 100644 --- a/it_dbi.class +++ b/it_dbi.class @@ -249,7 +249,7 @@ function _set($tags, $allfields = false) else if ($allfields || ($value !== $this->_data[$field])) { if ($this->_p['charset'] == "utf8") # NOTE: Mysql charset is simply utf8, not utf-8 - $value = it_html::fix_encoding($value); + $value = it::any2utf8($value, "error in db-field $field"); $r[] = "`$field`=".(isset($value) ? $this->escape_string($value) : 'NULL'); } } diff --git a/it_html.class b/it_html.class index ed47abd..ece7070 100644 --- a/it_html.class +++ b/it_html.class @@ -258,21 +258,6 @@ function _parse_args($args) } -# internal -function fix_encoding($string, $silent = false) -{ - if (grapheme_strlen($string) === null) - list($string, $error) = array(utf8_encode($string), utf8_encode("incorrectly utf8-encoded: " . trim($string))); - else if (preg_match('/\xc3\x83\xc2[\x84\x9c\xa4\xb6\xbc\xa9\xa0]/', $string)) # Double encoded ÄÖÜäöüéà, UTF8SAFE - list($string, $error) = array(utf8_decode($string), utf8_encode("doubly utf8-encoded: " . trim($string))); - - if ($error && !$silent) - it::error(array('title' => $error, 'skipfiles' => "it_html")); - - return $string; -} - - /** * function div($args...) * Return a