From 55bd4e0052b830256ad1d1134bbe5c7231d1427b Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Mon, 12 Dec 2022 16:55:47 +0100 Subject: Add and use it::utf8_decode and it::utf8_encode for easier migration to PHP 8.2 --- test/exec.t | 2 +- test/it.t | 34 +++++++++++++++++----------------- test/it_xml.t | 4 ++-- 3 files changed, 20 insertions(+), 20 deletions(-) (limited to 'test') diff --git a/test/exec.t b/test/exec.t index 09e16da..04850fc 100755 --- a/test/exec.t +++ b/test/exec.t @@ -70,7 +70,7 @@ foreach (["", "C", "de_CH", "de_CH.utf8"] as $locale) setlocale(LC_ALL, $locale); $arg = "preüpost"; if (it::match('utf8', $locale)) - $arg = utf8_encode($arg); + $arg = it::any2utf8($arg); is(it::exec("echo " . $arg), $arg . "\n", "exec with umlaut (locale '$locale')"); is(it::exec("echo {arg}", ['arg' => $arg]), $arg . "\n", "exec with argument and umlaut (locale '$locale')"); } diff --git a/test/it.t b/test/it.t index 7733c00..41758da 100755 --- a/test/it.t +++ b/test/it.t @@ -221,15 +221,15 @@ _match( ); _match( - utf8_decode('ö'), utf8_decode('Ö'), - utf8_decode('Ö'), + it::utf8_decode('ö'), it::utf8_decode('Ö'), + it::utf8_decode('Ö'), 'match umlaute in de_CH.latin1 case insensitive', ['utf8' => false] ); _match( - utf8_decode('aöBÜ'), utf8_decode('AÖbü'), - utf8_decode('AÖbü'), + it::utf8_decode('aöBÜ'), it::utf8_decode('AÖbü'), + it::utf8_decode('AÖbü'), "match umlaute with non-utf-8 override in p", ['utf8' => false] ); @@ -419,32 +419,32 @@ is(grapheme_strlen("\xc1"), null, "need grapheme_strlen side effect for any2utf8 is(it::any2utf8('Meier'), 'Meier', "it::any2utf8 ascii input"); is(it::any2utf8('Müller'), 'Müller', "it::any2utf8 utf8 input"); is(it::any2utf8('Aslı'), 'Aslı', "it::any2utf8 utf8 non-latin1 input"); -is(it::any2utf8(utf8_decode('Müller')), 'Müller', "it::any2utf8 latin1 input"); +is(it::any2utf8(it::utf8_decode('Müller')), 'Müller', "it::any2utf8 latin1 input"); is(it::any2utf8( ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'), # omit soft hyphen cause we filter it ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', "it::any2utf8 utf8 input (exhaustive alphabet)"); is(it::any2utf8( - utf8_decode(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ')), + it::utf8_decode(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ')), ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', "it::any2utf8 latin1 input (exhaustive alphabet)"); -is(it::any2utf8(utf8_encode("ü")), "ü", "it::any2utf8 double encoding"); +is(it::any2utf8(it::utf8_encode("ü")), "ü", "it::any2utf8 double encoding"); is(it::any2utf8("Meier"), "Meier", "it::any2utf8 ascii"); is(it::any2utf8("Müller"), "Müller", "it::any2utf8 utf-8 latin1"); is(it::any2utf8("Aslı"), "Aslı", "it::any2utf8 utf-8 non-latin1"); is(it::any2utf8("é»"), "é»", "it::any2utf8 utf-8 latin1 special combination"); -is(it::any2utf8(utf8_encode("Müller")), "Müller", "it::any2utf8 doubly encoded utf8"); -is(it::any2utf8(utf8_encode(utf8_encode("Müller"))), "Müller", "it::any2utf8 triply encoded utf8"); -is(it::any2utf8(utf8_decode("Müller")), "Müller", "it::any2utf8 incorrectly encoded latin1"); +is(it::any2utf8(it::utf8_encode("Müller")), "Müller", "it::any2utf8 doubly encoded utf8"); +is(it::any2utf8(it::utf8_encode(it::utf8_encode("Müller"))), "Müller", "it::any2utf8 triply encoded utf8"); +is(it::any2utf8(it::utf8_decode("Müller")), "Müller", "it::any2utf8 incorrectly encoded latin1"); is(it::any2utf8("a💚b"), "a💚b", "it::any2utf8 correctly handles 4-byte utf-8 character GREEN HEART"); -is(it::any2utf8(["foo", utf8_decode("bär")]), ["foo", "bär"], "any2utf8 on arrays"); -is(it::any2utf8(["foo", [utf8_decode("bär")]]), ["foo", ["bär"]], "any2utf8 on recursive arrays"); +is(it::any2utf8(["foo", it::utf8_decode("bär")]), ["foo", "bär"], "any2utf8 on arrays"); +is(it::any2utf8(["foo", [it::utf8_decode("bär")]]), ["foo", ["bär"]], "any2utf8 on recursive arrays"); is(it::any2utf8([1, true, false, null]), [1, true, false, null], "any2utf8 should leave types alone"); -is(it::any2utf8([utf8_decode('Müller') => utf8_decode('Müller')]), ['Müller' => 'Müller'], "it::any2utf8 latin1 keys"); +is(it::any2utf8([it::utf8_decode('Müller') => it::utf8_decode('Müller')]), ['Müller' => 'Müller'], "it::any2utf8 latin1 keys"); is(it::any2utf8("\xc2\xad"), "", "it::any2utf8 remove soft hyphens"); @@ -532,10 +532,10 @@ it::file_put($tmpfile, "bb"); is(it::file_get($tmpfile), "bb"); unlink($tmpfile); -requesturi(utf8_decode("lüönd"), "lüönd"); -requesturi(utf8_decode("ü").utf8_encode("ü"), "üü"); -requesturi(utf8_encode("müller"), "müller"); -requesturi(utf8_encode(utf8_encode("müller")), "müller"); +requesturi(it::utf8_decode("lüönd"), "lüönd"); +requesturi(it::utf8_decode("ü").it::utf8_encode("ü"), "üü"); +requesturi(it::utf8_encode("müller"), "müller"); +requesturi(it::utf8_encode(it::utf8_encode("müller")), "müller"); requesturi("I 💚 Nü York", "I 💚 Nü York"); function requesturi($teststring, $expect) diff --git a/test/it_xml.t b/test/it_xml.t index e21f052..88a5cf4 100755 --- a/test/it_xml.t +++ b/test/it_xml.t @@ -63,7 +63,7 @@ _match( _match( 'x ü y', - utf8_decode('foo Object ( [val] => x ü y ) '), + it::utf8_decode('foo Object ( [val] => x ü y ) '), 'Manual encoding override', "", ['encoding' => "iso-8859-1"] @@ -77,7 +77,7 @@ _match( _match( '&amp; <a> &amp; <b> &amp; <c> ü', - utf8_decode('foo Object ( [val] => & & & ü ) '), + it::utf8_decode('foo Object ( [val] => & & & ü ) '), 'Predecode illegal entities while keeping properly encoded ones (iso-8859-1)', "", ['encoding' => "iso-8859-1"] -- cgit v1.2.3