diff options
author | Urban Müller | 2013-09-19 13:05:58 +0000 |
---|---|---|
committer | Urban Müller | 2013-09-19 13:05:58 +0000 |
commit | d61d1b2faa02fb7852c13ae9dd1bedc8b60f48e1 (patch) | |
tree | 1670a9b8c99ce8addd3b5fca2db6d80109bf2319 /it.class | |
parent | d4e9c671d73026d7a85acd0304fca2cc7f09ac5e (diff) | |
download | itools-d61d1b2faa02fb7852c13ae9dd1bedc8b60f48e1.tar.gz itools-d61d1b2faa02fb7852c13ae9dd1bedc8b60f48e1.tar.bz2 itools-d61d1b2faa02fb7852c13ae9dd1bedc8b60f48e1.zip |
handle illegal utf-8 characters, small cleanup
Diffstat (limited to 'it.class')
-rw-r--r-- | it.class | 10 |
1 files changed, 6 insertions, 4 deletions
@@ -406,7 +406,7 @@ static function grep($pattern, $array, $p = array()) } /** - * Convert string to utf8 if it was not already utf-8 before. Also handles double encoding + * Convert string or array to utf8 if it was not already utf-8 before. Also handles double encoding * @param $value String or array to convert * @param $errprefix Error message to output if anything needed to be done * @return Same string in utf-8 encoding @@ -422,11 +422,13 @@ static function any2utf8($value, $errprefix = "") else if (is_string($value)) { if (grapheme_strlen($value) === null) - list($value, $error) = array(utf8_encode($value), utf8_encode("$errprefix: incorrect utf8-encoding. input=" . trim($value))); + list($value, $error) = array(utf8_encode($value), utf8_encode("incorrect utf8-encoding. input=$value")); if (preg_match('/\xc3[\x82\x83]\xc2[\x82\x83\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f]/', $value)) - list($value, $error) = array(it::any2utf8(preg_replace_callback('/\xc3[\x82\x83]\xc2[\x82\x83\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f]/', function($m) {return utf8_decode($m[0]);}, $value)), "$errprefix: double utf8-encoding. input=" . trim($value)); + list($value, $error) = array(it::any2utf8(preg_replace_callback('/\xc3[\x82\x83]\xc2[\x82\x83\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f]/', function($m) {return utf8_decode($m[0]);}, $value)), "double utf8-encoding. input=$value"); + if (preg_match('/\xef\xb7|\xef\xbf/', $value)) + list($value, $error) = array(preg_replace('/\xef\xb7[\x90-\xaf]|\xef\xbf[\xbe-\xbf]/', " ", $value), "forbidden utf-8 character. input=$value"); if ($error && $errprefix) - it::error(array('title' => $error)); + it::error(array('title' => "$errprefix: " . trim($error))); } return $value; |