diff options
author | Christian Schneider | 2019-03-12 17:57:48 +0100 |
---|---|---|
committer | Christian Schneider | 2019-03-12 17:57:48 +0100 |
commit | 4d1b92f1d4e9f2f80d04b452246399cb40bb543d (patch) | |
tree | ee37188099433141809bd98760194a07d689d546 /it.class | |
parent | 7cabc0bf2f7e3b3bf87f8f0247a7a2eae4ebfc7f (diff) | |
download | itools-4d1b92f1d4e9f2f80d04b452246399cb40bb543d.tar.gz itools-4d1b92f1d4e9f2f80d04b452246399cb40bb543d.tar.bz2 itools-4d1b92f1d4e9f2f80d04b452246399cb40bb543d.zip |
Normalize to FORM_C in any2utf8
Diffstat (limited to 'it.class')
-rw-r--r-- | it.class | 3 |
1 files changed, 1 insertions, 2 deletions
@@ -533,8 +533,7 @@ static function any2utf8($value, $errprefix = "") if (preg_match('/\xef\xb7[\x90-\xaf]|\xef\xbf[\xbe\xbf]/', $value)) list($value, $error) = array(preg_replace('/\xef\xb7[\x90-\xaf]|\xef\xbf[\xbe\xbf]/', " ", $value), "forbidden utf-8 character. input=$value"); $value = preg_replace('/\xc2\xad/', '', $value); # Kill invisible soft hyphens - if (preg_match('/\xcc\x88/', $value)) # Normalize combining diaeresis to umlaut - $value = strtr($value, [ "a\xcc\x88" => 'ä', "A\xcc\x88" => 'Ä', "e\xcc\x88" => 'ë', "E\xcc\x88" => 'Ë', "i\xcc\x88" => 'ï', "I\xcc\x88" => 'Ï', "o\xcc\x88" => 'ö', "O\xcc\x88" => 'Ö', "u\xcc\x88" => 'ü', "U\xcc\x88" => 'Ü' ]); + $value = normalizer_normalize($value, Normalizer::FORM_C); if ($error && $errprefix) it::error(array('title' => "$errprefix: " . trim($error))); } |