summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUrban Müller2013-09-19 13:05:58 +0000
committerUrban Müller2013-09-19 13:05:58 +0000
commitd61d1b2faa02fb7852c13ae9dd1bedc8b60f48e1 (patch)
tree1670a9b8c99ce8addd3b5fca2db6d80109bf2319
parentd4e9c671d73026d7a85acd0304fca2cc7f09ac5e (diff)
downloaditools-d61d1b2faa02fb7852c13ae9dd1bedc8b60f48e1.tar.gz
itools-d61d1b2faa02fb7852c13ae9dd1bedc8b60f48e1.tar.bz2
itools-d61d1b2faa02fb7852c13ae9dd1bedc8b60f48e1.zip
handle illegal utf-8 characters, small cleanup
-rw-r--r--it.class10
1 files changed, 6 insertions, 4 deletions
diff --git a/it.class b/it.class
index d84f266..0c2429e 100644
--- a/it.class
+++ b/it.class
@@ -406,7 +406,7 @@ static function grep($pattern, $array, $p = array())
}
/**
- * Convert string to utf8 if it was not already utf-8 before. Also handles double encoding
+ * Convert string or array to utf8 if it was not already utf-8 before. Also handles double encoding
* @param $value String or array to convert
* @param $errprefix Error message to output if anything needed to be done
* @return Same string in utf-8 encoding
@@ -422,11 +422,13 @@ static function any2utf8($value, $errprefix = "")
else if (is_string($value))
{
if (grapheme_strlen($value) === null)
- list($value, $error) = array(utf8_encode($value), utf8_encode("$errprefix: incorrect utf8-encoding. input=" . trim($value)));
+ list($value, $error) = array(utf8_encode($value), utf8_encode("incorrect utf8-encoding. input=$value"));
if (preg_match('/\xc3[\x82\x83]\xc2[\x82\x83\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f]/', $value))
- list($value, $error) = array(it::any2utf8(preg_replace_callback('/\xc3[\x82\x83]\xc2[\x82\x83\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f]/', function($m) {return utf8_decode($m[0]);}, $value)), "$errprefix: double utf8-encoding. input=" . trim($value));
+ list($value, $error) = array(it::any2utf8(preg_replace_callback('/\xc3[\x82\x83]\xc2[\x82\x83\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f]/', function($m) {return utf8_decode($m[0]);}, $value)), "double utf8-encoding. input=$value");
+ if (preg_match('/\xef\xb7|\xef\xbf/', $value))
+ list($value, $error) = array(preg_replace('/\xef\xb7[\x90-\xaf]|\xef\xbf[\xbe-\xbf]/', " ", $value), "forbidden utf-8 character. input=$value");
if ($error && $errprefix)
- it::error(array('title' => $error));
+ it::error(array('title' => "$errprefix: " . trim($error)));
}
return $value;