From 22be3fa7ab6efd48b457413cbd72b1d21d67bfab Mon Sep 17 00:00:00 2001 From: Christian Weber Date: Thu, 22 Mar 2012 20:21:54 +0000 Subject: cleanup, set locale for latin1 case-sensitive tests, adjust tests to file now being encoded in utf-8 --- tests/it.t | 308 ++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 184 insertions(+), 124 deletions(-) (limited to 'tests/it.t') diff --git a/tests/it.t b/tests/it.t index 1a308ec..ec95cc4 100755 --- a/tests/it.t +++ b/tests/it.t @@ -3,253 +3,313 @@ # Tests for it.class -function match($regex, $string, $expect, $name) + +# +# tests for it::match() +# +$oldcharset = ini_get('default_charset'); +$oldlocale = setlocale(LC_CTYPE, 0); + +ini_set('default_charset', 'utf-8'); +setlocale(LC_CTYPE, 'de_CH'); # required becuase we're checking German umlauts in latin1 mode + + +function match($regex, $string, $expect, $name, $p = array()) { $GLOBALS['TEST_MORE_LEVEL'] = 1; - $pass = is (it::match($regex, $string), $expect, $name); + $pass = is (it::match($regex, $string, $p), $expect, $name); if (!$pass) { - diag(" regex given: $regex"); + diag(" regex given: $regex" . ($p ? " " .D($p) : "")); diag(" regex converted: " . it::convertregex($regex)); } $GLOBALS['TEST_MORE_LEVEL'] = 0; } + match( 'b', 'aaaabaaaa', 'b', 'simple regex' - ); +); + match( 'a/b', ' a/b ', 'a/b', 'regex with /' ); + match( 'aa(bb)aa(cc)aa(dd)qq', 'aabbaaccaaddqq', - array( 'bb', 'cc', 'dd' ), + array('bb', 'cc', 'dd'), 'return array of captures' - ); +); + match( '\bblah\b', ' blah ', 'blah', 'match \b at spaces' - ); +); + match( '\bblah\b', 'blah', 'blah', 'match \b at end of string' - ); +); + match( '\bblah\b', 'ablahc', false, 'don\'t match \b at word chars' - ); +); + match( - '\bblah\b', 'blah', + '\bblah\b', 'Üblahä', false, - 'don\'t match \b at umlaute in latin1' - ); + 'don\'t match \b at umlaute' +); + match( '\Bblah\B', ' blah ', false, 'don\'t match \B at spaces' - ); +); + match( '\Bblah\B', 'blah', false, 'don\'t match \B at end of string' - ); +); + match( '\Bblah\B', 'ablahc', 'blah', 'match \B at word chars' - ); +); + match( - '\Bblah\B', 'blah', + '\Bblah\B', 'Üblahä', 'blah', - 'match \B at umlaute in latin1' - ); + 'match \B at umlaute' +); + match( - '\w+', ' |#blah ', - 'blah', + '\w+', ' |#Üblahä ', + 'Üblahä', 'include umlaute in \w' - ); +); + match( - '[[:alpha:]]+', ' |#blah ', - 'blah', + '[[:alpha:]]+', ' |#blahä ', + 'blahä', 'include umlaute in [[:alpha:]]' - ); +); + match( - '\W+', ' |#blah ', + '\W+', ' |#Üblahä ', ' |#', 'don\'t include umlaute in \W' - ); +); + match( - '\ba', 'a', + '\ba', 'äa', '', '\b must know umlauts' - ); +); -eval( '$escapedwordregex = "' . it::convertregex( '\w' ) . '";' ); -$escapedwordregex = preg_replace( '|[\\\\/]|', '', $escapedwordregex ); +match( + 'aaa\\\\w+', ' aaa\www ', + 'aaa\www', + 'don\'t parse \w in \\\\w at beginning (match)' +); + +match( + 'aaa\\\\w+', ' aaa\www ', + 'aaa\www', + 'don\'t parse \w in \\\\w after chars (match)' +); + +eval('$escapedwordregex = "' . it::convertregex('\w') . '";'); +$escapedwordregex = preg_replace('|[\\\\/]|', '', $escapedwordregex); match( '\\\\w+', $escapedwordregex, false, 'don\'t parse \w in \\\\w at beginning (no match)' - ); -match( - 'aaa\\\\w+', ' aaa\www ', - 'aaa\www', - 'don\'t parse \w in \\\\w at beginning (match)' - ); +); + match( 'aaa\\\\w+', 'aaa' . $escapedwordregex, false, 'don\'t parse \w in \\\\w after chars (no match)' - ); -match( - 'aaa\\\\w+', ' aaa\www ', - 'aaa\www', - 'don\'t parse \w in \\\\w after chars (match)' - ); +); + match( '\\\\\\\\w+', '\\' . $escapedwordregex, false, 'don\'t parse \w in \\\\\\\w (no match)' - ); +); + match( '\\\\\\\\w+', ' \\\\www ', '\\\\www', 'don\'t parse \\\\\\\\w as \w (match)' - ); +); + match( '[\w]+', '[[[]]]---', false, 'replace \w in [\w] correctly (no match)' - ); +); + match( '[\w]+', ' \\\\aword[[[]]] ', 'aword', 'replace \w in [\w] correctly (match)' - ); +); + match( '[\\\\w]+', ' blabergna ', false, 'don\'t parse \w in [\\\\w] (no match)' - ); +); + match( '[\\\\w]+', ' \\\\worda[[[]', '\\\\w', 'don\'t parse \w in [\\\\w] (match)' - ); +); + match( '[a\W]+', 'bbbbbbb a a%$+ accccc', ' a a%$+ a', '\W in []' - ); +); + match( - '\\\\\\w+', ' \blah ', - '\blah', + '\\\\\\w+', ' \Üblahä ', + '\Üblahä', 'parse \w in \\\\\\w at beginning' - ); +); + match( - 'aaa\\\\\\w+', ' aaa\blah ', - 'aaa\blah', + 'aaa\\\\\\w+', ' aaa\Üblahä ', + 'aaa\Üblahä', 'parse \w in \\\\\\w after chars' - ); -is( - it::replace( - array( - 'regex1' => 'repl1', - 'regex2' => 'repl2', - 'regex3' => 'repl3' ), - 'regex2 regex1 regex3' ), - 'repl2 repl1 repl3', - 'test tr regex function' - ); -is( - it::match( '\w+', 'word1 wrd2 word_3', array('all' => true )), - array( 'word1', 'wrd2', 'word_3' ), - "test match_all function" - ); +); + +match( + '\w+', 'word1 wörd2 word_3', + array('word1', 'wörd2', 'word_3'), + "test match_all function", + array('all' => true) +); + match( 'aBcD', ' aBcD ', 'aBcD', "caseinsensitive is default" - ); +); + match( - '', '', - '', - 'match umlaute in latin1 case insensitive' - ); + '\w+', 'Müller', + 'Müller', + '\w matches umlaut in utf-8 mode' +); -is( - it::match(utf8_encode('aB'), utf8_encode("Ab"), array('utf8' => true)), - utf8_encode('Ab'), - "match utf-8 umlaute in case insensitive" +match( + 'M.ller', 'Müller', + 'Müller', + '. matches umlaut in utf-8 mode' ); -$oldcharset = ini_get('default_charset'); -ini_set('default_charset', 'utf-8'); match( - utf8_encode('aB'), utf8_encode('Ab'), - utf8_encode('Ab'), - "match utf-8 umlaute in case insensitive using default_charset" + utf8_decode('ö'), utf8_decode('Ö'), + utf8_decode('Ö'), + 'match umlaute in de_CH.latin1 case insensitive', + array('utf8' => false) ); -is( - it::match('aB', 'Ab', array('utf8' => false)), - 'Ab', - "non-utf-8 override with default_charset=utf-8" + +match( + utf8_decode('aöBÜ'), utf8_decode('AÖbü'), + utf8_decode('AÖbü'), + "match umlaute with non-utf-8 override in p", + array('utf8' => false) ); + + match( - '\w+', utf8_encode('Mller'), - utf8_encode('Mller'), - '\w matches umlaut in utf-8 mode' + 'abc', "aBc", + false, + "set case sensitivity by parameter", + array('casesensitive' => 1), ); + match( - 'M.ller', utf8_encode('Mller'), - utf8_encode('Mller'), - '. matches umlaut in utf-8 mode' + '\w+', 'word1 wörd2 word_3', + array('word1', 'wörd2', 'word_3'), + "test all => 1 without captures", + array('all' => 1) ); -ini_set('default_charset', $oldcharset); -is( - it::match( 'abc', "aBc", array('casesensitive' => 1 )), - false, - "set case sensitivity by parameter" - ); +match( + '\w+\s+(\d+)', 'word1 12 wörd2 3 word_3 4', + array('12', '3', '4'), + "test all => 1 with one capture", + array('all' => 1) +); +match( + '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', + array(array('word1', '12'), array('wörd2', '3'), array('word_3', '4')), + "test all => 1 with captures", + array('all' => 1) +); + +match( + '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', + array(array('word1', 'wörd2', 'word_3'), array('12', '3', '4')), + "test all => 1,pattern_order => 1", + array('all' => 1, 'pattern_order' => 1) +); + +ini_set('default_charset', 'iso-8859-1'); +match( + 'aöBÜ', "AÖbü", + 'AÖbü', + "match utf-8 umlaute in case insensitive mode with utf8 override", + array('utf8' => true) +); +ini_set('default_charset', 'utf-8'); + + +# +# tests for it::replace() +# is( - it::match( '\w+', 'word1 wrd2 word_3', array('all' => 1 )), - array( 'word1', 'wrd2', 'word_3' ), - "test all=>1 without captures" - ); -is( - it::match( '\w+\s+(\d+)', 'word1 12 wrd2 3 word_3 4', array('all' => 1 )), - array( '12', '3', '4' ), - "test all=>1 with one capture" - ); -is( - it::match( '(\w+)\s+(\d+)', 'word1 12 wrd2 3 word_3 4', array('all' => 1 )), - array( array( 'word1', '12' ), array( 'wrd2', '3' ), array( 'word_3', '4' ) ), - "test all=>1 with captures" - ); -is( - it::match( '(\w+)\s+(\d+)', 'word1 12 wrd2 3 word_3 4', array('all' => 1, 'pattern_order' => 1 )), - array( array( 'word1', 'wrd2', 'word_3' ), array( '12', '3', '4' ) ), - "test all=>1,pattern_order=>1" - ); + it::replace( + array( + 'regex1' => 'repl1', + 'regex2' => 'repl2', + 'regex3' => 'repl3'), + 'regex2 regex1 regex3'), + 'repl2 repl1 repl3', + 'test tr regex function' +); is(it::replace(array('a' => "1", 'b' => "2"), "ab"), "12"); is(it::replace(array('!' => "x"), "!"), "x"); -is(it::replace(array('\w' => "x"), "o"), "xx"); -is(it::replace(array('[[:alpha:]]' => "x"), ""), "x"); -is(it::replace(array('\w' => "x", '#' => "!"), "#"), "!x"); -is(it::replace(array('#' => "!", '\w' => "x"), "#"), "!x"); -is(it::replace(array('' => "x"), ""), "x"); +is(it::replace(array('\w' => "x"), "oö"), "xx"); +is(it::replace(array('[[:alpha:]]' => "x"), "ö"), "x"); +is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x"); +is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x"); +is(it::replace(array('ö' => "x"), "Ö"), "x"); is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa"); + +setlocale(LC_CTYPE, $oldlocale); +ini_set('default_charset', $oldcharset); # end of tests that must run with specific charset + + # it::filter_keys tests $data = array('a' => 1, 'b' => 2, 'c' => 3); -- cgit v1.2.3 From 0147e6e3aea620a54b0c3f6c932c658ee72a45a0 Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Mon, 26 Mar 2012 15:20:24 +0000 Subject: new utf8 safe functions it::grep and it::substr_replace --- tests/it.t | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tests/it.t') diff --git a/tests/it.t b/tests/it.t index ec95cc4..317b8a3 100755 --- a/tests/it.t +++ b/tests/it.t @@ -305,6 +305,10 @@ is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x"); is(it::replace(array('ö' => "x"), "Ö"), "x"); is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa"); +is(it::grep('ismatch', array('ismatch', 'isnomatch')), array('ismatch'), 'grep with simple regex'); +is(it::grep('!', array('ismatch!', 'isnomatch')), array('ismatch!'), '! in regex'); +is(it::grep('lower|UPPER', array('lower', 'LOWER', 'upper', 'UPPER'), 'casesensitive' => 1), array(0 => 'lower', 3 => 'UPPER'), 'set casesensitive'); +is(it::grep('match', array('foo' => 'match', 'bar' => 'gna')), array('foo' => 'match'), 'with keys'); setlocale(LC_CTYPE, $oldlocale); ini_set('default_charset', $oldcharset); # end of tests that must run with specific charset @@ -342,4 +346,8 @@ is(it::date('datetime', 1000000.543), it::date('datetime', "1000000"), '... larg is(it::date('time', "10.5"), "10:05", 'interpret string with points with strtotime'); is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtotime'); +# it::substr_replace +is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii'); +is(it::substr_replace('✔☯♥', '☃☃', 1, 1), '✔☃☃♥', 'it::substr_replace for utf-8'); + ?> -- cgit v1.2.3 From 2c7860119626cdf65fa4d7de38b7e0a1fdb816df Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Wed, 28 Mar 2012 13:05:31 +0000 Subject: Added tests for it::replace with nbsp, it::any2utf8 --- tests/it.t | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tests/it.t') diff --git a/tests/it.t b/tests/it.t index 317b8a3..408d84a 100755 --- a/tests/it.t +++ b/tests/it.t @@ -304,6 +304,7 @@ is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x"); is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x"); is(it::replace(array('ö' => "x"), "Ö"), "x"); is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa"); +is(it::replace(array('\s' => "x"), it_html::entity_decode(" ")), "x", "match non-breaking space as white-space character"); is(it::grep('ismatch', array('ismatch', 'isnomatch')), array('ismatch'), 'grep with simple regex'); is(it::grep('!', array('ismatch!', 'isnomatch')), array('ismatch!'), '! in regex'); @@ -350,4 +351,18 @@ is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtot is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii'); is(it::substr_replace('✔☯♥', '☃☃', 1, 1), '✔☃☃♥', 'it::substr_replace for utf-8'); +is(it::any2utf8('Meier'), 'Meier', "it::any2utf8 ascii input"); +is(it::any2utf8('Müller'), 'Müller', "it::any2utf8 utf8 input"); +is(it::any2utf8('Aslı'), 'Aslı', "it::any2utf8 utf8 non-latin1 input"); +is(it::any2utf8(utf8_decode('Müller')), 'Müller', "it::any2utf8 latin1 input"); + +is(it::any2utf8( + ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'), + ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', + "it::any2utf8 utf8 input (exhaustive alphabet)"); +is(it::any2utf8( + utf8_decode(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ')), + ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', + "it::any2utf8 latin1 input (exhaustive alphabet)"); + ?> -- cgit v1.2.3 From a7ac24d9f9698ff0a1d42268d79f74cd21f70eb4 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Wed, 28 Mar 2012 13:40:12 +0000 Subject: Fix it::ucwords and added tests for it::ucfirst and it::ucwords --- tests/it.t | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tests/it.t') diff --git a/tests/it.t b/tests/it.t index 408d84a..8e4a7e2 100755 --- a/tests/it.t +++ b/tests/it.t @@ -347,6 +347,10 @@ is(it::date('datetime', 1000000.543), it::date('datetime', "1000000"), '... larg is(it::date('time', "10.5"), "10:05", 'interpret string with points with strtotime'); is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtotime'); +# it::uc* +is(it::ucfirst('foo bär über'), 'Foo bär über'); +is(it::ucwords('foo bär über'), 'Foo Bär Über'); + # it::substr_replace is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii'); is(it::substr_replace('✔☯♥', '☃☃', 1, 1), '✔☃☃♥', 'it::substr_replace for utf-8'); -- cgit v1.2.3