diff options
| author | Christian Weber | 2012-03-22 20:21:54 +0000 | 
|---|---|---|
| committer | Christian Weber | 2012-03-22 20:21:54 +0000 | 
| commit | 22be3fa7ab6efd48b457413cbd72b1d21d67bfab (patch) | |
| tree | c28b197f2480217a3b4d2048b547a432a6a9fd38 /tests/it.t | |
| parent | f5e2d8058ee6d8d014b0303dae231e5ae1ae27df (diff) | |
| download | itools-22be3fa7ab6efd48b457413cbd72b1d21d67bfab.tar.gz itools-22be3fa7ab6efd48b457413cbd72b1d21d67bfab.tar.bz2 itools-22be3fa7ab6efd48b457413cbd72b1d21d67bfab.zip | |
cleanup, set locale for latin1 case-sensitive tests, adjust tests to file now being encoded in utf-8
Diffstat (limited to 'tests/it.t')
| -rwxr-xr-x | tests/it.t | 308 | 
1 files changed, 184 insertions, 124 deletions
| @@ -3,253 +3,313 @@  # Tests for it.class -function match($regex, $string, $expect, $name) + +# +# tests for it::match() +# +$oldcharset = ini_get('default_charset'); +$oldlocale = setlocale(LC_CTYPE, 0); + +ini_set('default_charset', 'utf-8'); +setlocale(LC_CTYPE, 'de_CH');		# required becuase we're checking German umlauts in latin1 mode + + +function match($regex, $string, $expect, $name, $p = array())  {  	$GLOBALS['TEST_MORE_LEVEL'] = 1; -	$pass = is (it::match($regex, $string), $expect, $name); +	$pass = is (it::match($regex, $string, $p), $expect, $name);  	if (!$pass) { -		diag("        regex given: $regex"); +		diag("        regex given: $regex" . ($p ? " " .D($p) : ""));  		diag("    regex converted: " . it::convertregex($regex));  	}   	$GLOBALS['TEST_MORE_LEVEL'] = 0;  } +  match(  	'b', 'aaaabaaaa',  	'b',  	'simple regex' -	); +); +  match(  	'a/b', '   a/b   ',  	'a/b',  	'regex with /'  ); +  match(  	'aa(bb)aa(cc)aa(dd)qq', 'aabbaaccaaddqq', -	array( 'bb', 'cc', 'dd' ), +	array('bb', 'cc', 'dd'),  	'return array of captures' -	); +); +  match(  	'\bblah\b', ' blah ',  	'blah',  	'match \b at spaces' -	); +); +  match(  	'\bblah\b', 'blah',  	'blah',  	'match \b at end of string' -	); +); +  match(  	'\bblah\b', 'ablahc',  	false,  	'don\'t match \b at word chars' -	); +); +  match( -	'\bblah\b', 'Üblahä', +	'\bblah\b', 'Üblahä',  	false, -	'don\'t match \b at umlaute in latin1' -	); +	'don\'t match \b at umlaute' +); +  match(  	'\Bblah\B', ' blah ',  	false,  	'don\'t match \B at spaces' -	); +); +  match(  	'\Bblah\B', 'blah',  	false,  	'don\'t match \B at end of string' -	); +); +  match(  	'\Bblah\B', 'ablahc',  	'blah',  	'match \B at word chars' -	); +); +  match( -	'\Bblah\B', 'Üblahä', +	'\Bblah\B', 'Üblahä',  	'blah', -	'match \B at umlaute in latin1' -	); +	'match \B at umlaute' +); +  match( -	'\w+', '  |#Üblahä   ', -	'Üblahä', +	'\w+', '  |#Üblahä   ', +	'Üblahä',  	'include umlaute in \w' -	); +); +  match( -	'[[:alpha:]]+', '  |#blahä   ', -	'blahä', +	'[[:alpha:]]+', '  |#blahä   ', +	'blahä',  	'include umlaute in [[:alpha:]]' -	); +); +  match( -	'\W+', '  |#Üblahä  ', +	'\W+', '  |#Üblahä  ',  	'  |#',  	'don\'t include umlaute in \W' -	); +); +  match( -	'\ba', 'äa', +	'\ba', 'äa',  	'',  	'\b must know umlauts' -	); +); -eval( '$escapedwordregex = "' . it::convertregex( '\w' ) . '";' ); -$escapedwordregex = preg_replace( '|[\\\\/]|', '', $escapedwordregex ); +match( +	'aaa\\\\w+', '   aaa\www  ', +	'aaa\www', +	'don\'t parse \w in \\\\w at beginning (match)' +); + +match( +	'aaa\\\\w+', '   aaa\www  ', +	'aaa\www', +	'don\'t parse \w in \\\\w after chars (match)' +); + +eval('$escapedwordregex = "' . it::convertregex('\w') . '";'); +$escapedwordregex = preg_replace('|[\\\\/]|', '', $escapedwordregex);  match(  	'\\\\w+',  $escapedwordregex,  	false,  	'don\'t parse \w in \\\\w at beginning (no match)' -	); -match( -	'aaa\\\\w+', '   aaa\www  ', -	'aaa\www', -	'don\'t parse \w in \\\\w at beginning (match)' -	); +); +  match(  	'aaa\\\\w+', 'aaa' . $escapedwordregex,  	false,  	'don\'t parse \w in \\\\w after chars (no match)' -	); -match( -	'aaa\\\\w+', '   aaa\www  ', -	'aaa\www', -	'don\'t parse \w in \\\\w after chars (match)' -	); +); +  match(  	'\\\\\\\\w+', '\\' . $escapedwordregex,  	false,  	'don\'t parse \w in \\\\\\\w (no match)' -	); +); +  match(  	'\\\\\\\\w+', '  \\\\www  ',  	'\\\\www',  	'don\'t parse \\\\\\\\w as \w (match)' -	); +); +  match(  	'[\w]+', '[[[]]]---',  	false,  	'replace \w in [\w] correctly (no match)' -	); +); +  match(  	'[\w]+', '  \\\\aword[[[]]]   ',  	'aword',  	'replace \w in [\w] correctly (match)' -	); +); +  match(  	'[\\\\w]+', ' blabergna ',  	false,  	'don\'t parse \w in [\\\\w] (no match)' -	); +); +  match(  	'[\\\\w]+', '  \\\\worda[[[]',  	'\\\\w',  	'don\'t parse \w in [\\\\w] (match)' -	); +); +  match(  	'[a\W]+', 'bbbbbbb a a%$+ accccc',  	' a a%$+ a',  	'\W in []' -	); +); +  match( -	'\\\\\\w+', '  \Üblahä  ', -	'\Üblahä', +	'\\\\\\w+', '  \Üblahä  ', +	'\Üblahä',  	'parse \w in \\\\\\w at beginning' -	); +); +  match( -	'aaa\\\\\\w+', '  aaa\Üblahä  ', -	'aaa\Üblahä', +	'aaa\\\\\\w+', '  aaa\Üblahä  ', +	'aaa\Üblahä',  	'parse \w in \\\\\\w after chars' -	); -is( -	it::replace( -		array( -			'regex1' => 'repl1', -			'regex2' => 'repl2', -			'regex3' => 'repl3' ), -		'regex2 regex1 regex3' ), -	'repl2 repl1 repl3', -	'test tr regex function' -	); -is( -	it::match( '\w+', 'word1 wörd2 word_3', array('all' => true )), -	array( 'word1', 'wörd2', 'word_3' ), -	"test match_all function" -	); +); + +match( +	'\w+', 'word1 wörd2 word_3', +	array('word1', 'wörd2', 'word_3'), +	"test match_all function", +	array('all' => true) +); +  match(  	'aBcD', '  aBcD  ',  	'aBcD',  	"caseinsensitive is default" -	); +); +  match( -	'ö', 'Ö', -	'Ö', -	'match umlaute in latin1 case insensitive' -	); +	'\w+', 'Müller', +	'Müller', +	'\w matches umlaut in utf-8 mode' +); -is( -	it::match(utf8_encode('aöBÜ'), utf8_encode("AÖbü"), array('utf8' => true)), -	utf8_encode('AÖbü'), -	"match utf-8 umlaute in case insensitive" +match( +	'M.ller', 'Müller', +	'Müller', +	'. matches umlaut in utf-8 mode'  ); -$oldcharset = ini_get('default_charset'); -ini_set('default_charset', 'utf-8');  match( -	utf8_encode('aöBÜ'), utf8_encode('AÖbü'), -	utf8_encode('AÖbü'), -	"match utf-8 umlaute in case insensitive using default_charset" +	utf8_decode('ö'), utf8_decode('Ö'), +	utf8_decode('Ö'), +	'match umlaute in de_CH.latin1 case insensitive', +	array('utf8' => false)  ); -is( -	it::match('aöBÜ', 'AÖbü', array('utf8' => false)), -	'AÖbü', -	"non-utf-8 override with default_charset=utf-8" + +match( +	utf8_decode('aöBÜ'), utf8_decode('AÖbü'), +	utf8_decode('AÖbü'), +	"match umlaute with non-utf-8 override in p", +	array('utf8' => false)  ); + +  match( -	'\w+', utf8_encode('Müller'), -	utf8_encode('Müller'), -	'\w matches umlaut in utf-8 mode' +	'abc', "aBc", +	false, +	"set case sensitivity by parameter", +	array('casesensitive' => 1),  ); +  match( -	'M.ller', utf8_encode('Müller'), -	utf8_encode('Müller'), -	'. matches umlaut in utf-8 mode' +	'\w+', 'word1 wörd2 word_3', +	array('word1', 'wörd2', 'word_3'), +	"test all => 1 without captures", +	array('all' => 1)  ); -ini_set('default_charset', $oldcharset); -is( -	it::match( 'abc', "aBc", array('casesensitive' => 1 )), -	false, -	"set case sensitivity by parameter" -	); +match( +	'\w+\s+(\d+)', 'word1 12 wörd2 3 word_3 4', +	array('12', '3', '4'), +	"test all => 1 with one capture", +	array('all' => 1) +); +match( +	'(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', +	array(array('word1', '12'), array('wörd2', '3'), array('word_3', '4')), +	"test all => 1 with captures", +	array('all' => 1) +); + +match( +	'(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', +	array(array('word1', 'wörd2', 'word_3'), array('12', '3', '4')), +	"test all => 1,pattern_order => 1", +	array('all' => 1, 'pattern_order' => 1) +); + +ini_set('default_charset', 'iso-8859-1'); +match( +	'aöBÜ', "AÖbü", +	'AÖbü', +	"match utf-8 umlaute in case insensitive mode with utf8 override", +	array('utf8' => true) +); +ini_set('default_charset', 'utf-8'); + + +# +# tests for it::replace() +#  is( -	it::match( '\w+', 'word1 wörd2 word_3', array('all' => 1 )), -	array( 'word1', 'wörd2', 'word_3' ), -	"test all=>1 without captures" -	); -is( -	it::match( '\w+\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('all' => 1 )), -	array( '12', '3', '4' ), -	"test all=>1 with one capture" -	); -is( -	it::match( '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('all' => 1 )), -	array( array( 'word1', '12' ), array( 'wörd2', '3' ), array( 'word_3', '4' ) ), -	"test all=>1 with captures" -	); -is( -	it::match( '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('all' => 1, 'pattern_order' => 1 )), -	array( array( 'word1', 'wörd2', 'word_3' ), array( '12', '3', '4' ) ), -	"test all=>1,pattern_order=>1" -	); +	it::replace( +		array( +			'regex1' => 'repl1', +			'regex2' => 'repl2', +			'regex3' => 'repl3'), +		'regex2 regex1 regex3'), +	'repl2 repl1 repl3', +	'test tr regex function' +);  is(it::replace(array('a' => "1", 'b' => "2"), "ab"), "12");  is(it::replace(array('!' => "x"), "!"), "x"); -is(it::replace(array('\w' => "x"), "oö"), "xx"); -is(it::replace(array('[[:alpha:]]' => "x"), "ö"), "x"); -is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x"); -is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x"); -is(it::replace(array('ö' => "x"), "Ö"), "x"); +is(it::replace(array('\w' => "x"), "oö"), "xx"); +is(it::replace(array('[[:alpha:]]' => "x"), "ö"), "x"); +is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x"); +is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x"); +is(it::replace(array('ö' => "x"), "Ö"), "x");  is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa"); + +setlocale(LC_CTYPE, $oldlocale); +ini_set('default_charset', $oldcharset);	# end of tests that must run with specific charset + +  # it::filter_keys tests  $data = array('a' => 1, 'b' => 2, 'c' => 3); |