From 22be3fa7ab6efd48b457413cbd72b1d21d67bfab Mon Sep 17 00:00:00 2001 From: Christian Weber Date: Thu, 22 Mar 2012 20:21:54 +0000 Subject: cleanup, set locale for latin1 case-sensitive tests, adjust tests to file now being encoded in utf-8 --- tests/it.t | 308 ++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 184 insertions(+), 124 deletions(-) (limited to 'tests') diff --git a/tests/it.t b/tests/it.t index 1a308ec..ec95cc4 100755 --- a/tests/it.t +++ b/tests/it.t @@ -3,253 +3,313 @@ # Tests for it.class -function match($regex, $string, $expect, $name) + +# +# tests for it::match() +# +$oldcharset = ini_get('default_charset'); +$oldlocale = setlocale(LC_CTYPE, 0); + +ini_set('default_charset', 'utf-8'); +setlocale(LC_CTYPE, 'de_CH'); # required becuase we're checking German umlauts in latin1 mode + + +function match($regex, $string, $expect, $name, $p = array()) { $GLOBALS['TEST_MORE_LEVEL'] = 1; - $pass = is (it::match($regex, $string), $expect, $name); + $pass = is (it::match($regex, $string, $p), $expect, $name); if (!$pass) { - diag(" regex given: $regex"); + diag(" regex given: $regex" . ($p ? " " .D($p) : "")); diag(" regex converted: " . it::convertregex($regex)); } $GLOBALS['TEST_MORE_LEVEL'] = 0; } + match( 'b', 'aaaabaaaa', 'b', 'simple regex' - ); +); + match( 'a/b', ' a/b ', 'a/b', 'regex with /' ); + match( 'aa(bb)aa(cc)aa(dd)qq', 'aabbaaccaaddqq', - array( 'bb', 'cc', 'dd' ), + array('bb', 'cc', 'dd'), 'return array of captures' - ); +); + match( '\bblah\b', ' blah ', 'blah', 'match \b at spaces' - ); +); + match( '\bblah\b', 'blah', 'blah', 'match \b at end of string' - ); +); + match( '\bblah\b', 'ablahc', false, 'don\'t match \b at word chars' - ); +); + match( - '\bblah\b', 'Üblahä', + '\bblah\b', 'Ãœblahä', false, - 'don\'t match \b at umlaute in latin1' - ); + 'don\'t match \b at umlaute' +); + match( '\Bblah\B', ' blah ', false, 'don\'t match \B at spaces' - ); +); + match( '\Bblah\B', 'blah', false, 'don\'t match \B at end of string' - ); +); + match( '\Bblah\B', 'ablahc', 'blah', 'match \B at word chars' - ); +); + match( - '\Bblah\B', 'Üblahä', + '\Bblah\B', 'Ãœblahä', 'blah', - 'match \B at umlaute in latin1' - ); + 'match \B at umlaute' +); + match( - '\w+', ' |#Üblahä ', - 'Üblahä', + '\w+', ' |#Ãœblahä ', + 'Ãœblahä', 'include umlaute in \w' - ); +); + match( - '[[:alpha:]]+', ' |#blahä ', - 'blahä', + '[[:alpha:]]+', ' |#blahä ', + 'blahä', 'include umlaute in [[:alpha:]]' - ); +); + match( - '\W+', ' |#Üblahä ', + '\W+', ' |#Ãœblahä ', ' |#', 'don\'t include umlaute in \W' - ); +); + match( - '\ba', 'äa', + '\ba', 'äa', '', '\b must know umlauts' - ); +); -eval( '$escapedwordregex = "' . it::convertregex( '\w' ) . '";' ); -$escapedwordregex = preg_replace( '|[\\\\/]|', '', $escapedwordregex ); +match( + 'aaa\\\\w+', ' aaa\www ', + 'aaa\www', + 'don\'t parse \w in \\\\w at beginning (match)' +); + +match( + 'aaa\\\\w+', ' aaa\www ', + 'aaa\www', + 'don\'t parse \w in \\\\w after chars (match)' +); + +eval('$escapedwordregex = "' . it::convertregex('\w') . '";'); +$escapedwordregex = preg_replace('|[\\\\/]|', '', $escapedwordregex); match( '\\\\w+', $escapedwordregex, false, 'don\'t parse \w in \\\\w at beginning (no match)' - ); -match( - 'aaa\\\\w+', ' aaa\www ', - 'aaa\www', - 'don\'t parse \w in \\\\w at beginning (match)' - ); +); + match( 'aaa\\\\w+', 'aaa' . $escapedwordregex, false, 'don\'t parse \w in \\\\w after chars (no match)' - ); -match( - 'aaa\\\\w+', ' aaa\www ', - 'aaa\www', - 'don\'t parse \w in \\\\w after chars (match)' - ); +); + match( '\\\\\\\\w+', '\\' . $escapedwordregex, false, 'don\'t parse \w in \\\\\\\w (no match)' - ); +); + match( '\\\\\\\\w+', ' \\\\www ', '\\\\www', 'don\'t parse \\\\\\\\w as \w (match)' - ); +); + match( '[\w]+', '[[[]]]---', false, 'replace \w in [\w] correctly (no match)' - ); +); + match( '[\w]+', ' \\\\aword[[[]]] ', 'aword', 'replace \w in [\w] correctly (match)' - ); +); + match( '[\\\\w]+', ' blabergna ', false, 'don\'t parse \w in [\\\\w] (no match)' - ); +); + match( '[\\\\w]+', ' \\\\worda[[[]', '\\\\w', 'don\'t parse \w in [\\\\w] (match)' - ); +); + match( '[a\W]+', 'bbbbbbb a a%$+ accccc', ' a a%$+ a', '\W in []' - ); +); + match( - '\\\\\\w+', ' \Üblahä ', - '\Üblahä', + '\\\\\\w+', ' \Ãœblahä ', + '\Ãœblahä', 'parse \w in \\\\\\w at beginning' - ); +); + match( - 'aaa\\\\\\w+', ' aaa\Üblahä ', - 'aaa\Üblahä', + 'aaa\\\\\\w+', ' aaa\Ãœblahä ', + 'aaa\Ãœblahä', 'parse \w in \\\\\\w after chars' - ); -is( - it::replace( - array( - 'regex1' => 'repl1', - 'regex2' => 'repl2', - 'regex3' => 'repl3' ), - 'regex2 regex1 regex3' ), - 'repl2 repl1 repl3', - 'test tr regex function' - ); -is( - it::match( '\w+', 'word1 wörd2 word_3', array('all' => true )), - array( 'word1', 'wörd2', 'word_3' ), - "test match_all function" - ); +); + +match( + '\w+', 'word1 wörd2 word_3', + array('word1', 'wörd2', 'word_3'), + "test match_all function", + array('all' => true) +); + match( 'aBcD', ' aBcD ', 'aBcD', "caseinsensitive is default" - ); +); + match( - 'ö', 'Ö', - 'Ö', - 'match umlaute in latin1 case insensitive' - ); + '\w+', 'Müller', + 'Müller', + '\w matches umlaut in utf-8 mode' +); -is( - it::match(utf8_encode('aöBÜ'), utf8_encode("AÖbü"), array('utf8' => true)), - utf8_encode('AÖbü'), - "match utf-8 umlaute in case insensitive" +match( + 'M.ller', 'Müller', + 'Müller', + '. matches umlaut in utf-8 mode' ); -$oldcharset = ini_get('default_charset'); -ini_set('default_charset', 'utf-8'); match( - utf8_encode('aöBÜ'), utf8_encode('AÖbü'), - utf8_encode('AÖbü'), - "match utf-8 umlaute in case insensitive using default_charset" + utf8_decode('ö'), utf8_decode('Ö'), + utf8_decode('Ö'), + 'match umlaute in de_CH.latin1 case insensitive', + array('utf8' => false) ); -is( - it::match('aöBÜ', 'AÖbü', array('utf8' => false)), - 'AÖbü', - "non-utf-8 override with default_charset=utf-8" + +match( + utf8_decode('aöBÃœ'), utf8_decode('AÖbü'), + utf8_decode('AÖbü'), + "match umlaute with non-utf-8 override in p", + array('utf8' => false) ); + + match( - '\w+', utf8_encode('Müller'), - utf8_encode('Müller'), - '\w matches umlaut in utf-8 mode' + 'abc', "aBc", + false, + "set case sensitivity by parameter", + array('casesensitive' => 1), ); + match( - 'M.ller', utf8_encode('Müller'), - utf8_encode('Müller'), - '. matches umlaut in utf-8 mode' + '\w+', 'word1 wörd2 word_3', + array('word1', 'wörd2', 'word_3'), + "test all => 1 without captures", + array('all' => 1) ); -ini_set('default_charset', $oldcharset); -is( - it::match( 'abc', "aBc", array('casesensitive' => 1 )), - false, - "set case sensitivity by parameter" - ); +match( + '\w+\s+(\d+)', 'word1 12 wörd2 3 word_3 4', + array('12', '3', '4'), + "test all => 1 with one capture", + array('all' => 1) +); +match( + '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', + array(array('word1', '12'), array('wörd2', '3'), array('word_3', '4')), + "test all => 1 with captures", + array('all' => 1) +); + +match( + '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', + array(array('word1', 'wörd2', 'word_3'), array('12', '3', '4')), + "test all => 1,pattern_order => 1", + array('all' => 1, 'pattern_order' => 1) +); + +ini_set('default_charset', 'iso-8859-1'); +match( + 'aöBÃœ', "AÖbü", + 'AÖbü', + "match utf-8 umlaute in case insensitive mode with utf8 override", + array('utf8' => true) +); +ini_set('default_charset', 'utf-8'); + + +# +# tests for it::replace() +# is( - it::match( '\w+', 'word1 wörd2 word_3', array('all' => 1 )), - array( 'word1', 'wörd2', 'word_3' ), - "test all=>1 without captures" - ); -is( - it::match( '\w+\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('all' => 1 )), - array( '12', '3', '4' ), - "test all=>1 with one capture" - ); -is( - it::match( '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('all' => 1 )), - array( array( 'word1', '12' ), array( 'wörd2', '3' ), array( 'word_3', '4' ) ), - "test all=>1 with captures" - ); -is( - it::match( '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('all' => 1, 'pattern_order' => 1 )), - array( array( 'word1', 'wörd2', 'word_3' ), array( '12', '3', '4' ) ), - "test all=>1,pattern_order=>1" - ); + it::replace( + array( + 'regex1' => 'repl1', + 'regex2' => 'repl2', + 'regex3' => 'repl3'), + 'regex2 regex1 regex3'), + 'repl2 repl1 repl3', + 'test tr regex function' +); is(it::replace(array('a' => "1", 'b' => "2"), "ab"), "12"); is(it::replace(array('!' => "x"), "!"), "x"); -is(it::replace(array('\w' => "x"), "oö"), "xx"); -is(it::replace(array('[[:alpha:]]' => "x"), "ö"), "x"); -is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x"); -is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x"); -is(it::replace(array('ö' => "x"), "Ö"), "x"); +is(it::replace(array('\w' => "x"), "oö"), "xx"); +is(it::replace(array('[[:alpha:]]' => "x"), "ö"), "x"); +is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x"); +is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x"); +is(it::replace(array('ö' => "x"), "Ö"), "x"); is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa"); + +setlocale(LC_CTYPE, $oldlocale); +ini_set('default_charset', $oldcharset); # end of tests that must run with specific charset + + # it::filter_keys tests $data = array('a' => 1, 'b' => 2, 'c' => 3); -- cgit v1.2.3