summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Schneider2026-03-31 18:47:31 +0200
committerChristian Schneider2026-03-31 18:47:31 +0200
commit074fbc730e77c2bfc913ca50e51045f958953e05 (patch)
tree5ada8740737611b5669cd3b645cb5fc8ef39a915
parent7211d0baed1e77db85ab0c21062ddeeecac7caaf (diff)
downloaditools-master.tar.gz
itools-master.tar.bz2
itools-master.zip
Remove combining characters at beginning of string in it::any2utf8() (mail8281)HEADmaster
-rw-r--r--it.class2
-rwxr-xr-xtest/it.t1
2 files changed, 3 insertions, 0 deletions
diff --git a/it.class b/it.class
index 0c2ec18..ddb95eb 100644
--- a/it.class
+++ b/it.class
@@ -610,6 +610,8 @@ static function any2utf8($value, $errprefix = "")
list($value, $error) = array(preg_replace('/\xef\xb7[\x90-\xaf]|\xef\xbf[\xbe\xbf]/', " ", $value), "forbidden utf-8 character. input=$value");
$value = preg_replace('/\xc2\xad/', '', $value); # Kill invisible soft hyphens
$value = normalizer_normalize($value, Normalizer::FORM_C);
+ if (preg_match('/^\pM/u', $value)) # Remove combining characters (e.g. U+0338 "Combining Long Solidus Overlay") at beginning of string
+ list($value, $error) = array(preg_replace('/^\pM+/u', "", $value), "combining character at beginning of string. input=$value");
if ($error && $errprefix)
it::error(array('title' => "$errprefix: " . trim($error)));
}
diff --git a/test/it.t b/test/it.t
index b475fbc..dc60727 100755
--- a/test/it.t
+++ b/test/it.t
@@ -470,6 +470,7 @@ is(it::any2utf8([1, true, false, null]), [1, true, false, null], "any2utf8 shoul
is(it::any2utf8([it::utf8_decode('Müller') => it::utf8_decode('Müller')]), ['Müller' => 'Müller'], "it::any2utf8 latin1 keys");
is(it::any2utf8("\xc2\xad"), "", "it::any2utf8 remove soft hyphens");
+is(it::any2utf8("\u{0338}\u{0338}a\u{0338}b"), "a\u{0338}b", "it::any2utf8 remove combining characters at beginning");
foreach ([ 'a' => 'ä', 'e' => 'ë', 'i' => 'ï', 'o' => 'ö', 'u' => 'ü' ] as $src => $dst)
{