diff options
author | Christian Schneider | 2018-01-18 18:43:40 +0100 |
---|---|---|
committer | Christian Schneider | 2018-01-18 18:43:40 +0100 |
commit | 960b387c66c0126f862e21d5c2b56fa17e399b75 (patch) | |
tree | 71338d0939dfb4bc742bfcef98e63136636689a9 /it.class | |
parent | 50485bf9d9243693514ace67e9931a6c24947333 (diff) | |
download | itools-960b387c66c0126f862e21d5c2b56fa17e399b75.tar.gz itools-960b387c66c0126f862e21d5c2b56fa17e399b75.tar.bz2 itools-960b387c66c0126f862e21d5c2b56fa17e399b75.zip |
Normalize combining diaeresis to umlaut in it::any2utf8()
Diffstat (limited to 'it.class')
-rw-r--r-- | it.class | 4 |
1 files changed, 3 insertions, 1 deletions
@@ -519,7 +519,9 @@ static function any2utf8($value, $errprefix = "") list($value, $error) = array(it::any2utf8(preg_replace_callback('/\xc3[\x82\x83]\xc2[\x82\x83\xbc\xa9\xa4\xb6\xa8\xa2\xa0\xb4\xaa\xa7\x84\xab\xae\x9c\xaf\x96\xb2\xbb\xb9\x9f]/', function($m) {return utf8_decode($m[0]);}, $value)), $errprefix ? "$errprefix: double utf8-encoding. input=$value" : ""); if (preg_match('/\xef\xb7[\x90-\xaf]|\xef\xbf[\xbe\xbf]/', $value)) list($value, $error) = array(preg_replace('/\xef\xb7[\x90-\xaf]|\xef\xbf[\xbe\xbf]/', " ", $value), "forbidden utf-8 character. input=$value"); - $value = preg_replace('/\xc2\xad/', '', $value); + $value = preg_replace('/\xc2\xad/', '', $value); # Kill invisible soft hyphens + if (preg_match('/\xcc\x88/', $value)) # Normalize combining diaeresis to umlaut + $value = strtr($value, [ "a\xcc\x88" => 'ä', "A\xcc\x88" => 'Ä', "e\xcc\x88" => 'ë', "E\xcc\x88" => 'Ë', "i\xcc\x88" => 'ï', "I\xcc\x88" => 'Ï', "o\xcc\x88" => 'ö', "O\xcc\x88" => 'Ö', "u\xcc\x88" => 'ü', "U\xcc\x88" => 'Ü' ]); if ($error && $errprefix) it::error(array('title' => "$errprefix: " . trim($error))); } |