From 4574035ec43cabf050a7ab036bbba1cd2447a18f Mon Sep 17 00:00:00 2001 From: Urban Müller Date: Thu, 22 Mar 2012 18:48:52 +0000 Subject: check encoding in Q() and _tag(), adapt tests to utf-8 --- tests/it_html.t | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'tests') diff --git a/tests/it_html.t b/tests/it_html.t index 0def431..c955359 100755 --- a/tests/it_html.t +++ b/tests/it_html.t @@ -4,6 +4,7 @@ # Tests for html.class # Traditional html generation +ini_set('default_charset', "utf-8"); new it_html(array('htmltype' => "html")); is( @@ -95,30 +96,22 @@ is(

Wolken

'), - ' swisspics posted < < & yesterday a photo tag missmatch:

', + ' swisspics posted < < ä & yesterday a photo tag missmatch:

', 'it_html::sanitize tag soup' ); is( it_html::sanitize('q←x'), - "q←x", + "q←x", 'it_html::sanitize preserve numeric entities' ); -it_html::configure(array('charset' => "utf-8")); is( it_html::sanitize('qüx'), "q\xc3\xbcx", 'it_html::sanitize with utf-8' ); -it_html::configure(array('charset' => "iso-8859-1")); -is( - it_html::sanitize('qüx'), - "q\xfcx", - 'it_html::sanitize with latin1' -); - is( it_html::sanitize('a
b
'), "a
b
", @@ -126,8 +119,8 @@ is( ); is( - U("/foo.html", array('bar' => array('gna' => 42, 'qux' => array('quux' => "", 'gnp' => "fasel")))), - '/foo.html?bar[gna]=42&bar[qux][quux]=%3CZ%FCrich%3E&bar[qux][gn%F6p]=fasel', + U("/foo.html", array('bar' => array('gna' => 42, 'qux' => array('quux' => "", 'gnöp' => "fasel")))), + '/foo.html?bar[gna]=42&bar[qux][quux]=%3CZ%C3%BCrich%3E&bar[qux][gn%C3%B6p]=fasel', 'U() with nested arrays' ); @@ -149,11 +142,30 @@ is( 'U() converting of \ to /' ); -is(it_html::entity_decode("ä"), ""); -is(it_html::entity_decode("’"), "'"); +is(it_html::entity_decode("ä"), "ä"); is(it_html::entity_decode("J"), "J"); is(it_html::entity_decode("J"), "J"); -is(it_html::entity_decode("࿿"), " "); is(it_html::entity_decode("A"), "A"); + + +# +# check transliterations in iso-8859-1 +# + +it_html::configure(array('charset' => "iso-8859-1")); + +is( + it_html::sanitize('qüx'), + "q\xfcx", + 'it_html::sanitize with latin1' +); + +is( + it_html::sanitize('q←x'), + "q←x", + 'it_html::sanitize preserve non-decodable numeric entities' +); +is(it_html::entity_decode("’"), "'"); +is(it_html::entity_decode("࿿"), " "); is(it_html::entity_decode("ϧ"), " "); ?> -- cgit v1.2.3 From f5e2d8058ee6d8d014b0303dae231e5ae1ae27df Mon Sep 17 00:00:00 2001 From: Urban Müller Date: Thu, 22 Mar 2012 19:43:52 +0000 Subject: converted to utf-8 --- tests/getopt.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/getopt.t b/tests/getopt.t index 7a84588..d67738d 100755 --- a/tests/getopt.t +++ b/tests/getopt.t @@ -17,7 +17,7 @@ function getopt_ok($argv, $exp, $name) return is($got['argument'], $exp, $name); } -foreach (array("" => "blah gnaber", " (umlaute)" => "pre post") as $variant => $testarg) { +foreach (array("" => "blah gnaber", " (umlaute)" => "pre üäpost") as $variant => $testarg) { getopt_ok(array('-a', $testarg), $testarg, "Short version" . $variant); getopt_ok(array('--argument', $testarg), $testarg, "Long version with space" . $variant); getopt_ok(array("--argument=$testarg"), $testarg, "Long version with equal" . $variant); -- cgit v1.2.3 From 22be3fa7ab6efd48b457413cbd72b1d21d67bfab Mon Sep 17 00:00:00 2001 From: Christian Weber Date: Thu, 22 Mar 2012 20:21:54 +0000 Subject: cleanup, set locale for latin1 case-sensitive tests, adjust tests to file now being encoded in utf-8 --- tests/it.t | 308 ++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 184 insertions(+), 124 deletions(-) (limited to 'tests') diff --git a/tests/it.t b/tests/it.t index 1a308ec..ec95cc4 100755 --- a/tests/it.t +++ b/tests/it.t @@ -3,253 +3,313 @@ # Tests for it.class -function match($regex, $string, $expect, $name) + +# +# tests for it::match() +# +$oldcharset = ini_get('default_charset'); +$oldlocale = setlocale(LC_CTYPE, 0); + +ini_set('default_charset', 'utf-8'); +setlocale(LC_CTYPE, 'de_CH'); # required becuase we're checking German umlauts in latin1 mode + + +function match($regex, $string, $expect, $name, $p = array()) { $GLOBALS['TEST_MORE_LEVEL'] = 1; - $pass = is (it::match($regex, $string), $expect, $name); + $pass = is (it::match($regex, $string, $p), $expect, $name); if (!$pass) { - diag(" regex given: $regex"); + diag(" regex given: $regex" . ($p ? " " .D($p) : "")); diag(" regex converted: " . it::convertregex($regex)); } $GLOBALS['TEST_MORE_LEVEL'] = 0; } + match( 'b', 'aaaabaaaa', 'b', 'simple regex' - ); +); + match( 'a/b', ' a/b ', 'a/b', 'regex with /' ); + match( 'aa(bb)aa(cc)aa(dd)qq', 'aabbaaccaaddqq', - array( 'bb', 'cc', 'dd' ), + array('bb', 'cc', 'dd'), 'return array of captures' - ); +); + match( '\bblah\b', ' blah ', 'blah', 'match \b at spaces' - ); +); + match( '\bblah\b', 'blah', 'blah', 'match \b at end of string' - ); +); + match( '\bblah\b', 'ablahc', false, 'don\'t match \b at word chars' - ); +); + match( - '\bblah\b', 'blah', + '\bblah\b', 'Üblahä', false, - 'don\'t match \b at umlaute in latin1' - ); + 'don\'t match \b at umlaute' +); + match( '\Bblah\B', ' blah ', false, 'don\'t match \B at spaces' - ); +); + match( '\Bblah\B', 'blah', false, 'don\'t match \B at end of string' - ); +); + match( '\Bblah\B', 'ablahc', 'blah', 'match \B at word chars' - ); +); + match( - '\Bblah\B', 'blah', + '\Bblah\B', 'Üblahä', 'blah', - 'match \B at umlaute in latin1' - ); + 'match \B at umlaute' +); + match( - '\w+', ' |#blah ', - 'blah', + '\w+', ' |#Üblahä ', + 'Üblahä', 'include umlaute in \w' - ); +); + match( - '[[:alpha:]]+', ' |#blah ', - 'blah', + '[[:alpha:]]+', ' |#blahä ', + 'blahä', 'include umlaute in [[:alpha:]]' - ); +); + match( - '\W+', ' |#blah ', + '\W+', ' |#Üblahä ', ' |#', 'don\'t include umlaute in \W' - ); +); + match( - '\ba', 'a', + '\ba', 'äa', '', '\b must know umlauts' - ); +); -eval( '$escapedwordregex = "' . it::convertregex( '\w' ) . '";' ); -$escapedwordregex = preg_replace( '|[\\\\/]|', '', $escapedwordregex ); +match( + 'aaa\\\\w+', ' aaa\www ', + 'aaa\www', + 'don\'t parse \w in \\\\w at beginning (match)' +); + +match( + 'aaa\\\\w+', ' aaa\www ', + 'aaa\www', + 'don\'t parse \w in \\\\w after chars (match)' +); + +eval('$escapedwordregex = "' . it::convertregex('\w') . '";'); +$escapedwordregex = preg_replace('|[\\\\/]|', '', $escapedwordregex); match( '\\\\w+', $escapedwordregex, false, 'don\'t parse \w in \\\\w at beginning (no match)' - ); -match( - 'aaa\\\\w+', ' aaa\www ', - 'aaa\www', - 'don\'t parse \w in \\\\w at beginning (match)' - ); +); + match( 'aaa\\\\w+', 'aaa' . $escapedwordregex, false, 'don\'t parse \w in \\\\w after chars (no match)' - ); -match( - 'aaa\\\\w+', ' aaa\www ', - 'aaa\www', - 'don\'t parse \w in \\\\w after chars (match)' - ); +); + match( '\\\\\\\\w+', '\\' . $escapedwordregex, false, 'don\'t parse \w in \\\\\\\w (no match)' - ); +); + match( '\\\\\\\\w+', ' \\\\www ', '\\\\www', 'don\'t parse \\\\\\\\w as \w (match)' - ); +); + match( '[\w]+', '[[[]]]---', false, 'replace \w in [\w] correctly (no match)' - ); +); + match( '[\w]+', ' \\\\aword[[[]]] ', 'aword', 'replace \w in [\w] correctly (match)' - ); +); + match( '[\\\\w]+', ' blabergna ', false, 'don\'t parse \w in [\\\\w] (no match)' - ); +); + match( '[\\\\w]+', ' \\\\worda[[[]', '\\\\w', 'don\'t parse \w in [\\\\w] (match)' - ); +); + match( '[a\W]+', 'bbbbbbb a a%$+ accccc', ' a a%$+ a', '\W in []' - ); +); + match( - '\\\\\\w+', ' \blah ', - '\blah', + '\\\\\\w+', ' \Üblahä ', + '\Üblahä', 'parse \w in \\\\\\w at beginning' - ); +); + match( - 'aaa\\\\\\w+', ' aaa\blah ', - 'aaa\blah', + 'aaa\\\\\\w+', ' aaa\Üblahä ', + 'aaa\Üblahä', 'parse \w in \\\\\\w after chars' - ); -is( - it::replace( - array( - 'regex1' => 'repl1', - 'regex2' => 'repl2', - 'regex3' => 'repl3' ), - 'regex2 regex1 regex3' ), - 'repl2 repl1 repl3', - 'test tr regex function' - ); -is( - it::match( '\w+', 'word1 wrd2 word_3', array('all' => true )), - array( 'word1', 'wrd2', 'word_3' ), - "test match_all function" - ); +); + +match( + '\w+', 'word1 wörd2 word_3', + array('word1', 'wörd2', 'word_3'), + "test match_all function", + array('all' => true) +); + match( 'aBcD', ' aBcD ', 'aBcD', "caseinsensitive is default" - ); +); + match( - '', '', - '', - 'match umlaute in latin1 case insensitive' - ); + '\w+', 'Müller', + 'Müller', + '\w matches umlaut in utf-8 mode' +); -is( - it::match(utf8_encode('aB'), utf8_encode("Ab"), array('utf8' => true)), - utf8_encode('Ab'), - "match utf-8 umlaute in case insensitive" +match( + 'M.ller', 'Müller', + 'Müller', + '. matches umlaut in utf-8 mode' ); -$oldcharset = ini_get('default_charset'); -ini_set('default_charset', 'utf-8'); match( - utf8_encode('aB'), utf8_encode('Ab'), - utf8_encode('Ab'), - "match utf-8 umlaute in case insensitive using default_charset" + utf8_decode('ö'), utf8_decode('Ö'), + utf8_decode('Ö'), + 'match umlaute in de_CH.latin1 case insensitive', + array('utf8' => false) ); -is( - it::match('aB', 'Ab', array('utf8' => false)), - 'Ab', - "non-utf-8 override with default_charset=utf-8" + +match( + utf8_decode('aöBÜ'), utf8_decode('AÖbü'), + utf8_decode('AÖbü'), + "match umlaute with non-utf-8 override in p", + array('utf8' => false) ); + + match( - '\w+', utf8_encode('Mller'), - utf8_encode('Mller'), - '\w matches umlaut in utf-8 mode' + 'abc', "aBc", + false, + "set case sensitivity by parameter", + array('casesensitive' => 1), ); + match( - 'M.ller', utf8_encode('Mller'), - utf8_encode('Mller'), - '. matches umlaut in utf-8 mode' + '\w+', 'word1 wörd2 word_3', + array('word1', 'wörd2', 'word_3'), + "test all => 1 without captures", + array('all' => 1) ); -ini_set('default_charset', $oldcharset); -is( - it::match( 'abc', "aBc", array('casesensitive' => 1 )), - false, - "set case sensitivity by parameter" - ); +match( + '\w+\s+(\d+)', 'word1 12 wörd2 3 word_3 4', + array('12', '3', '4'), + "test all => 1 with one capture", + array('all' => 1) +); +match( + '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', + array(array('word1', '12'), array('wörd2', '3'), array('word_3', '4')), + "test all => 1 with captures", + array('all' => 1) +); + +match( + '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', + array(array('word1', 'wörd2', 'word_3'), array('12', '3', '4')), + "test all => 1,pattern_order => 1", + array('all' => 1, 'pattern_order' => 1) +); + +ini_set('default_charset', 'iso-8859-1'); +match( + 'aöBÜ', "AÖbü", + 'AÖbü', + "match utf-8 umlaute in case insensitive mode with utf8 override", + array('utf8' => true) +); +ini_set('default_charset', 'utf-8'); + + +# +# tests for it::replace() +# is( - it::match( '\w+', 'word1 wrd2 word_3', array('all' => 1 )), - array( 'word1', 'wrd2', 'word_3' ), - "test all=>1 without captures" - ); -is( - it::match( '\w+\s+(\d+)', 'word1 12 wrd2 3 word_3 4', array('all' => 1 )), - array( '12', '3', '4' ), - "test all=>1 with one capture" - ); -is( - it::match( '(\w+)\s+(\d+)', 'word1 12 wrd2 3 word_3 4', array('all' => 1 )), - array( array( 'word1', '12' ), array( 'wrd2', '3' ), array( 'word_3', '4' ) ), - "test all=>1 with captures" - ); -is( - it::match( '(\w+)\s+(\d+)', 'word1 12 wrd2 3 word_3 4', array('all' => 1, 'pattern_order' => 1 )), - array( array( 'word1', 'wrd2', 'word_3' ), array( '12', '3', '4' ) ), - "test all=>1,pattern_order=>1" - ); + it::replace( + array( + 'regex1' => 'repl1', + 'regex2' => 'repl2', + 'regex3' => 'repl3'), + 'regex2 regex1 regex3'), + 'repl2 repl1 repl3', + 'test tr regex function' +); is(it::replace(array('a' => "1", 'b' => "2"), "ab"), "12"); is(it::replace(array('!' => "x"), "!"), "x"); -is(it::replace(array('\w' => "x"), "o"), "xx"); -is(it::replace(array('[[:alpha:]]' => "x"), ""), "x"); -is(it::replace(array('\w' => "x", '#' => "!"), "#"), "!x"); -is(it::replace(array('#' => "!", '\w' => "x"), "#"), "!x"); -is(it::replace(array('' => "x"), ""), "x"); +is(it::replace(array('\w' => "x"), "oö"), "xx"); +is(it::replace(array('[[:alpha:]]' => "x"), "ö"), "x"); +is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x"); +is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x"); +is(it::replace(array('ö' => "x"), "Ö"), "x"); is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa"); + +setlocale(LC_CTYPE, $oldlocale); +ini_set('default_charset', $oldcharset); # end of tests that must run with specific charset + + # it::filter_keys tests $data = array('a' => 1, 'b' => 2, 'c' => 3); -- cgit v1.2.3 From b7e99acb4b7f53799751c07ac9aae3de5fabd99e Mon Sep 17 00:00:00 2001 From: Urban Müller Date: Thu, 22 Mar 2012 22:45:55 +0000 Subject: adapt to utf-8 --- tests/exec.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/exec.t b/tests/exec.t index 689bd26..9425420 100755 --- a/tests/exec.t +++ b/tests/exec.t @@ -11,7 +11,7 @@ is(it::shell_command("echo {arg}", array('arg' => '')), "echo ''", "quote empty foreach (array("", "C", "de_CH", "de_CH.utf8") as $locale) { setlocale(LC_ALL, $locale); - $arg = "prepost"; + $arg = "preüpost"; if (it::match('utf8', $locale)) $arg = utf8_encode($arg); is(it::exec("echo " . $arg), $arg . "\n", "exec with umlaut (locale '$locale')"); -- cgit v1.2.3 From 6c6828a8f0904110a67fe89031f9d4eaedf29213 Mon Sep 17 00:00:00 2001 From: Christian Weber Date: Fri, 23 Mar 2012 15:28:10 +0000 Subject: it_xml uses correct target encoding by default, adapt tests to utf-8 deafult --- tests/it_xml.t | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'tests') diff --git a/tests/it_xml.t b/tests/it_xml.t index f74c54b..d74fadf 100755 --- a/tests/it_xml.t +++ b/tests/it_xml.t @@ -5,13 +5,14 @@ function match($xmldata, $expected, $name, $prefix = "", $p = array()) { - $classname = $prefix ? ($prefix . "_xml") : "it_xml"; + $classname = ($prefix ?: "it") . "_xml"; $varname = $prefix . "foo"; $xmldata = "$xmldata"; $xml = new $classname($xmldata, $p); + $mod_utf8 = $p['encoding'] != "iso-8859-1" ? "u" : ""; is( - preg_replace('/[#\s]+/', " ", print_r($xml->$varname, true)), + preg_replace('/[#\s]+/' . $mod_utf8, " ", print_r($xml->$varname, true)), $expected, "$name (string)" ); @@ -24,11 +25,10 @@ function match($xmldata, $expected, $name, $prefix = "", $p = array()) fclose($tmpfile); is( - preg_replace('/[#\s]+/', " ", print_r($xml->$varname, true)), + preg_replace('/[#\s]+/' . $mod_utf8, " ", print_r($xml->$varname, true)), $expected, "$name (file)" ); - } match( @@ -44,8 +44,8 @@ match( ); match( - 'Stssihofstadt', - 'foo Object ( [attr] => Array ( [title] => Zrich ) [val] => Stssihofstadt ) ', + 'Stüssihofstadt', + 'foo Object ( [attr] => Array ( [title] => Zürich ) [val] => Stüssihofstadt ) ', 'simple tag with latin1 content and attribute' ); @@ -62,26 +62,33 @@ match( ); match( - '&amp; <a> &amp; <b> &amp; <c> ü', - 'foo Object ( [val] => & & & ) ', - 'Predecode illegal entities while keeping properly encoded ones' + 'x ü y', + utf8_decode('foo Object ( [val] => x ü y ) '), + 'Manual encoding override', + "", + array('encoding' => "iso-8859-1") ); match( '&amp; <a> &amp; <b> &amp; <c> ü', - utf8_encode('foo Object ( [val] => & & & ) '), - 'Predecode illegal entities while keeping properly encoded ones (UTF-8)', - "", - array('encoding' => "UTF-8") + 'foo Object ( [val] => & & & ü ) ', + 'Predecode illegal entities while keeping properly encoded ones', ); +match( + '&amp; <a> &amp; <b> &amp; <c> ü', + utf8_decode('foo Object ( [val] => & & & ü ) '), + 'Predecode illegal entities while keeping properly encoded ones (iso-8859-1)', + "", + array('encoding' => "iso-8859-1") +); match( "a\x05b", 'foo Object ( [val] => a b ) ', 'Illegal latin 1 character', "", - array('encoding' => "ISO-8859-1") + array('encoding' => "iso-8859-1") ); # Test inheritance -- cgit v1.2.3 From b7200b739ff651a7647d2d666e3674a7fe3cb6e2 Mon Sep 17 00:00:00 2001 From: Urban Müller Date: Mon, 26 Mar 2012 15:11:39 +0000 Subject: fixed it_html::fix_encoding --- tests/it_html.t | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tests') diff --git a/tests/it_html.t b/tests/it_html.t index c955359..a576b47 100755 --- a/tests/it_html.t +++ b/tests/it_html.t @@ -168,4 +168,16 @@ is( is(it_html::entity_decode("’"), "'"); is(it_html::entity_decode("࿿"), " "); is(it_html::entity_decode("ϧ"), " "); + +is(it_html::fix_encoding("Meier"), "Meier"); +is(it_html::fix_encoding("Müller"), "Müller"); +is(it_html::fix_encoding("Aslı"), "Aslı"); +is(it_html::fix_encoding("é»"), "é»"); + +is(it_html::fix_encoding(utf8_encode("Müller"), true), "Müller", "double encoded latin1"); # Double encoded latin1 +is(it_html::fix_encoding(utf8_encode("Aslı"), true), "Aslı"); # Double encoded non-latin1 +is(it_html::fix_encoding(utf8_encode("é»"), true), "é»"); # Double encoded special combination + +is(it_html::fix_encoding(utf8_decode("Müller"), true), "Müller"); # Incorrectly decoded latin1 + ?> -- cgit v1.2.3 From bbcc6615dc5316a73f07b262950ac18d50c80497 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Mon, 26 Mar 2012 15:16:01 +0000 Subject: Improve it_html tests --- tests/it_html.t | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'tests') diff --git a/tests/it_html.t b/tests/it_html.t index a576b47..b1d271e 100755 --- a/tests/it_html.t +++ b/tests/it_html.t @@ -165,19 +165,19 @@ is( "q←x", 'it_html::sanitize preserve non-decodable numeric entities' ); -is(it_html::entity_decode("’"), "'"); -is(it_html::entity_decode("࿿"), " "); -is(it_html::entity_decode("ϧ"), " "); +is(it_html::entity_decode("’"), "'", "it_html::entity_decode numeric decimal entity"); +is(it_html::entity_decode("࿿"), " ", "it_html::entity_decode invalid numeric hex entity"); +is(it_html::entity_decode("ϧ"), " ", "it_html::entity_decode invalid numeric decimal entity"); -is(it_html::fix_encoding("Meier"), "Meier"); -is(it_html::fix_encoding("Müller"), "Müller"); -is(it_html::fix_encoding("Aslı"), "Aslı"); -is(it_html::fix_encoding("é»"), "é»"); +is(it_html::fix_encoding("Meier"), "Meier", "it_html::fix_encoding ascii"); +is(it_html::fix_encoding("Müller"), "Müller", "it_html::fix_encoding utf-8 latin1"); +is(it_html::fix_encoding("Aslı"), "Aslı", "it_html::fix_encoding utf-8 non-latin1"); +is(it_html::fix_encoding("é»"), "é»", "it_html::fix_encoding utf-8 latin1 special combination"); -is(it_html::fix_encoding(utf8_encode("Müller"), true), "Müller", "double encoded latin1"); # Double encoded latin1 -is(it_html::fix_encoding(utf8_encode("Aslı"), true), "Aslı"); # Double encoded non-latin1 -is(it_html::fix_encoding(utf8_encode("é»"), true), "é»"); # Double encoded special combination +is(it_html::fix_encoding(utf8_encode("Müller"), true), "Müller", "it_html::fix_encoding double encoded latin1"); +is(it_html::fix_encoding(utf8_encode("Aslı"), true), "Aslı", "it_html::fix_encoding double encoded non-latin1"); +is(it_html::fix_encoding(utf8_encode("é»"), true), "é»", "it_html::fix_encoding double encoded latin1 special combination"); -is(it_html::fix_encoding(utf8_decode("Müller"), true), "Müller"); # Incorrectly decoded latin1 +is(it_html::fix_encoding(utf8_decode("Müller"), true), "Müller", "it_html::fix_encoding incorrectly encoded latin1"); ?> -- cgit v1.2.3 From 0147e6e3aea620a54b0c3f6c932c658ee72a45a0 Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Mon, 26 Mar 2012 15:20:24 +0000 Subject: new utf8 safe functions it::grep and it::substr_replace --- tests/it.t | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tests') diff --git a/tests/it.t b/tests/it.t index ec95cc4..317b8a3 100755 --- a/tests/it.t +++ b/tests/it.t @@ -305,6 +305,10 @@ is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x"); is(it::replace(array('ö' => "x"), "Ö"), "x"); is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa"); +is(it::grep('ismatch', array('ismatch', 'isnomatch')), array('ismatch'), 'grep with simple regex'); +is(it::grep('!', array('ismatch!', 'isnomatch')), array('ismatch!'), '! in regex'); +is(it::grep('lower|UPPER', array('lower', 'LOWER', 'upper', 'UPPER'), 'casesensitive' => 1), array(0 => 'lower', 3 => 'UPPER'), 'set casesensitive'); +is(it::grep('match', array('foo' => 'match', 'bar' => 'gna')), array('foo' => 'match'), 'with keys'); setlocale(LC_CTYPE, $oldlocale); ini_set('default_charset', $oldcharset); # end of tests that must run with specific charset @@ -342,4 +346,8 @@ is(it::date('datetime', 1000000.543), it::date('datetime', "1000000"), '... larg is(it::date('time', "10.5"), "10:05", 'interpret string with points with strtotime'); is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtotime'); +# it::substr_replace +is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii'); +is(it::substr_replace('✔☯♥', '☃☃', 1, 1), '✔☃☃♥', 'it::substr_replace for utf-8'); + ?> -- cgit v1.2.3 From 52239c6482d10cbea631befd8f96b74425731b72 Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Mon, 26 Mar 2012 15:41:08 +0000 Subject: make get and get_multi tests encoding agnostic --- tests/it_url.t | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'tests') diff --git a/tests/it_url.t b/tests/it_url.t index e8507aa..ee683ce 100755 --- a/tests/it_url.t +++ b/tests/it_url.t @@ -87,25 +87,22 @@ is( $url = new it_url('http://www.gna.ch/'); $page = $url->get(); -is( - it::match('()', $page), - '', +ok( + strpos($page, ''), #UTF8SAFE '$url->get with url in constructor' ); $url = new it_url('http://bogus.url'); $page = $url->get('http://www.gna.ch/'); -is( - it::match('()', $page), - '', +ok( + strpos($page, ''), #UTF8SAFE '$url->get(url) with url as string arg' ); $url = new it_url('http://bogus.url'); $page = $url->get(array('url' => 'http://www.gna.ch/')); -is( - it::match('()', $page), - '', +ok( + strpos($page, ''), #UTF8SAFE '$url->get(\'url\' => url) with url as named arg' ); is( @@ -121,15 +118,14 @@ is( unset($url, $page); $page = it_url::get('http://www.gna.ch/'); -is( - it::match('()', $page), - '', +ok( + strpos($page, ''), #UTF8SAFE 'it_url::get() static call' ); $pages = it_url::get_multi('urls' => array('a' => 'http://www.gna.ch/', 'b' => 'http://search.ch/')); -ok(it::match('', $pages['a']), 'it_url::get_multi got first url'); -ok(it::match('', $pages['b']), 'it_url::get_multi got second url'); +ok(strpos($pages['a'], ''), 'it_url::get_multi got first url'); #UTF8SAFE +ok(strpos($pages['b'], ''), 'it_url::get_multi got second url'); #UTF8SAFE is(count($pages), 2, 'it_url::get_multi no additional array elements'); ?> -- cgit v1.2.3 From 49d2a5ce1b6ad201f051263db7c3a1f5ad6a39ab Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Mon, 26 Mar 2012 16:12:05 +0000 Subject: space after \# --- tests/it_url.t | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tests') diff --git a/tests/it_url.t b/tests/it_url.t index ee683ce..10fd015 100755 --- a/tests/it_url.t +++ b/tests/it_url.t @@ -88,21 +88,21 @@ is( $url = new it_url('http://www.gna.ch/'); $page = $url->get(); ok( - strpos($page, ''), #UTF8SAFE + strpos($page, ''), # UTF8SAFE '$url->get with url in constructor' ); $url = new it_url('http://bogus.url'); $page = $url->get('http://www.gna.ch/'); ok( - strpos($page, ''), #UTF8SAFE + strpos($page, ''), # UTF8SAFE '$url->get(url) with url as string arg' ); $url = new it_url('http://bogus.url'); $page = $url->get(array('url' => 'http://www.gna.ch/')); ok( - strpos($page, ''), #UTF8SAFE + strpos($page, ''), # UTF8SAFE '$url->get(\'url\' => url) with url as named arg' ); is( @@ -119,13 +119,13 @@ is( unset($url, $page); $page = it_url::get('http://www.gna.ch/'); ok( - strpos($page, ''), #UTF8SAFE + strpos($page, ''), # UTF8SAFE 'it_url::get() static call' ); $pages = it_url::get_multi('urls' => array('a' => 'http://www.gna.ch/', 'b' => 'http://search.ch/')); -ok(strpos($pages['a'], ''), 'it_url::get_multi got first url'); #UTF8SAFE -ok(strpos($pages['b'], ''), 'it_url::get_multi got second url'); #UTF8SAFE +ok(strpos($pages['a'], ''), 'it_url::get_multi got first url'); # UTF8SAFE +ok(strpos($pages['b'], ''), 'it_url::get_multi got second url'); # UTF8SAFE is(count($pages), 2, 'it_url::get_multi no additional array elements'); ?> -- cgit v1.2.3 From b1c0b4946572027c8de564730a89ec584c830bf3 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Wed, 28 Mar 2012 13:00:39 +0000 Subject: Added it::any2utf8, fixed it::replace fast path to add u modified, added error reporting for invalid utf-8 input to it::match and it::replace --- tests/it_html.t | 1 + 1 file changed, 1 insertion(+) (limited to 'tests') diff --git a/tests/it_html.t b/tests/it_html.t index b1d271e..770d11a 100755 --- a/tests/it_html.t +++ b/tests/it_html.t @@ -153,6 +153,7 @@ is(it_html::entity_decode("A"), "A"); # it_html::configure(array('charset' => "iso-8859-1")); +ini_set('default_charset', "iso-8859-1"); is( it_html::sanitize('qüx'), -- cgit v1.2.3 From 2c7860119626cdf65fa4d7de38b7e0a1fdb816df Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Wed, 28 Mar 2012 13:05:31 +0000 Subject: Added tests for it::replace with nbsp, it::any2utf8 --- tests/it.t | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tests') diff --git a/tests/it.t b/tests/it.t index 317b8a3..408d84a 100755 --- a/tests/it.t +++ b/tests/it.t @@ -304,6 +304,7 @@ is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x"); is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x"); is(it::replace(array('ö' => "x"), "Ö"), "x"); is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa"); +is(it::replace(array('\s' => "x"), it_html::entity_decode(" ")), "x", "match non-breaking space as white-space character"); is(it::grep('ismatch', array('ismatch', 'isnomatch')), array('ismatch'), 'grep with simple regex'); is(it::grep('!', array('ismatch!', 'isnomatch')), array('ismatch!'), '! in regex'); @@ -350,4 +351,18 @@ is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtot is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii'); is(it::substr_replace('✔☯♥', '☃☃', 1, 1), '✔☃☃♥', 'it::substr_replace for utf-8'); +is(it::any2utf8('Meier'), 'Meier', "it::any2utf8 ascii input"); +is(it::any2utf8('Müller'), 'Müller', "it::any2utf8 utf8 input"); +is(it::any2utf8('Aslı'), 'Aslı', "it::any2utf8 utf8 non-latin1 input"); +is(it::any2utf8(utf8_decode('Müller')), 'Müller', "it::any2utf8 latin1 input"); + +is(it::any2utf8( + ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'), + ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', + "it::any2utf8 utf8 input (exhaustive alphabet)"); +is(it::any2utf8( + utf8_decode(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ')), + ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', + "it::any2utf8 latin1 input (exhaustive alphabet)"); + ?> -- cgit v1.2.3 From a7ac24d9f9698ff0a1d42268d79f74cd21f70eb4 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Wed, 28 Mar 2012 13:40:12 +0000 Subject: Fix it::ucwords and added tests for it::ucfirst and it::ucwords --- tests/it.t | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tests') diff --git a/tests/it.t b/tests/it.t index 408d84a..8e4a7e2 100755 --- a/tests/it.t +++ b/tests/it.t @@ -347,6 +347,10 @@ is(it::date('datetime', 1000000.543), it::date('datetime', "1000000"), '... larg is(it::date('time', "10.5"), "10:05", 'interpret string with points with strtotime'); is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtotime'); +# it::uc* +is(it::ucfirst('foo bär über'), 'Foo bär über'); +is(it::ucwords('foo bär über'), 'Foo Bär Über'); + # it::substr_replace is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii'); is(it::substr_replace('✔☯♥', '☃☃', 1, 1), '✔☃☃♥', 'it::substr_replace for utf-8'); -- cgit v1.2.3 From a60fb69ff8e7755ab969298ab85a2b63bbcc8b14 Mon Sep 17 00:00:00 2001 From: Christian Weber Date: Fri, 30 Mar 2012 16:13:15 +0000 Subject: add GREEN HEART unit test for it_html::fix_encoding() --- tests/it_html.t | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tests') diff --git a/tests/it_html.t b/tests/it_html.t index 770d11a..2323e5e 100755 --- a/tests/it_html.t +++ b/tests/it_html.t @@ -181,4 +181,6 @@ is(it_html::fix_encoding(utf8_encode("é»"), true), "é»", "it_html::fix_encod is(it_html::fix_encoding(utf8_decode("Müller"), true), "Müller", "it_html::fix_encoding incorrectly encoded latin1"); +is(it_html::fix_encoding("a💚b"), "a💚b", "it_html::fix_encoding incorrectly encoded GREEN HEART"); + ?> -- cgit v1.2.3 From 3242f516a6c3e89fa898d96927746447e15d7750 Mon Sep 17 00:00:00 2001 From: Christian Weber Date: Fri, 30 Mar 2012 16:17:08 +0000 Subject: correct GREEN HEART test description --- tests/it_html.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/it_html.t b/tests/it_html.t index 2323e5e..174c487 100755 --- a/tests/it_html.t +++ b/tests/it_html.t @@ -181,6 +181,6 @@ is(it_html::fix_encoding(utf8_encode("é»"), true), "é»", "it_html::fix_encod is(it_html::fix_encoding(utf8_decode("Müller"), true), "Müller", "it_html::fix_encoding incorrectly encoded latin1"); -is(it_html::fix_encoding("a💚b"), "a💚b", "it_html::fix_encoding incorrectly encoded GREEN HEART"); +is(it_html::fix_encoding("a💚b"), "a💚b", "it_html::fix_encoding correctly handles 4-byte utf-8 character GREEN HEART"); ?> -- cgit v1.2.3