From 4d1b92f1d4e9f2f80d04b452246399cb40bb543d Mon Sep 17 00:00:00 2001
From: Christian Schneider
Date: Tue, 12 Mar 2019 17:57:48 +0100
Subject: Normalize to FORM_C in any2utf8

---
 it.class  | 3 +--
 test/it.t | 2 ++
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/it.class b/it.class
index 937cde7..714ffc4 100644
--- a/it.class
+++ b/it.class
@@ -533,8 +533,7 @@ static function any2utf8($value, $errprefix = "")
 		if (preg_match('/\xef\xb7[\x90-\xaf]|\xef\xbf[\xbe\xbf]/', $value))
 			list($value, $error) = array(preg_replace('/\xef\xb7[\x90-\xaf]|\xef\xbf[\xbe\xbf]/', " ", $value), "forbidden utf-8 character. input=$value");
 		$value = preg_replace('/\xc2\xad/', '', $value);	# Kill invisible soft hyphens
-		if (preg_match('/\xcc\x88/', $value))	# Normalize combining diaeresis to umlaut
-			$value = strtr($value, [ "a\xcc\x88" => 'ä', "A\xcc\x88" => 'Ä', "e\xcc\x88" => 'ë', "E\xcc\x88" => 'Ë', "i\xcc\x88" => 'ï', "I\xcc\x88" => 'Ï', "o\xcc\x88" => 'ö', "O\xcc\x88" => 'Ö', "u\xcc\x88" => 'ü', "U\xcc\x88" => 'Ü' ]);
+		$value = normalizer_normalize($value, Normalizer::FORM_C);
 		if ($error && $errprefix)
 			it::error(array('title' => "$errprefix: " . trim($error)));
 	}
diff --git a/test/it.t b/test/it.t
index fdaef6c..b624f96 100755
--- a/test/it.t
+++ b/test/it.t
@@ -424,6 +424,8 @@ foreach ([ 'a' => 'ä', 'e' => 'ë', 'i' => 'ï', 'o' => 'ö', 'u' => 'ü' ] as
 	is(it::any2utf8("$src\xcc\x88"), $dst, "it::any2utf8 normalize combining diaeresis $dst to umlaut code $dst");
 }
 
+is(it::any2utf8("\x65\xcc\x81"), "é", "it::any2utf8 convert to normal form C");
+
 foreach (array($dummy, false, true, null, 1, "a", "Ä", "/", array()) as $var)
 	is(it::json_decode(it::json_encode($var)), $var);
 is(it::json_decode('{"foo":"bar"}')->foo, "bar");
-- 
cgit v1.2.3