From d61d1b2faa02fb7852c13ae9dd1bedc8b60f48e1 Mon Sep 17 00:00:00 2001 From: Urban Müller Date: Thu, 19 Sep 2013 13:05:58 +0000 Subject: handle illegal utf-8 characters, small cleanup --- it.class | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'it.class') diff --git a/it.class b/it.class index d84f266..0c2429e 100644 --- a/it.class +++ b/it.class @@ -406,7 +406,7 @@ static function grep($pattern, $array, $p = array()) } /** - * Convert string to utf8 if it was not already utf-8 before. Also handles double encoding + * Convert string or array to utf8 if it was not already utf-8 before. Also handles double encoding * @param $value String or array to convert * @param $errprefix Error message to output if anything needed to be done * @return Same string in utf-8 encoding @@ -422,11 +422,13 @@ static function any2utf8($value, $errprefix = "") else if (is_string($value)) { if (grapheme_strlen($value) === null) - list($value, $error) = array(utf8_encode($value), utf8_encode("$errprefix: incorrect utf8-encoding. input=" . trim($value))); + list($value, $error) = array(utf8_encode($value), utf8_encode("incorrect utf8-encoding. input=$value")); if (preg_match('/\xc3[\x82\x83]\xc2[\x82\x83\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f]/', $value)) - list($value, $error) = array(it::any2utf8(preg_replace_callback('/\xc3[\x82\x83]\xc2[\x82\x83\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f]/', function($m) {return utf8_decode($m[0]);}, $value)), "$errprefix: double utf8-encoding. input=" . trim($value)); + list($value, $error) = array(it::any2utf8(preg_replace_callback('/\xc3[\x82\x83]\xc2[\x82\x83\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f]/', function($m) {return utf8_decode($m[0]);}, $value)), "double utf8-encoding. input=$value"); + if (preg_match('/\xef\xb7|\xef\xbf/', $value)) + list($value, $error) = array(preg_replace('/\xef\xb7[\x90-\xaf]|\xef\xbf[\xbe-\xbf]/', " ", $value), "forbidden utf-8 character. input=$value"); if ($error && $errprefix) - it::error(array('title' => $error)); + it::error(array('title' => "$errprefix: " . trim($error))); } return $value; -- cgit v1.2.3