cleanup, set locale for latin1 case-sensitive tests, adjust tests to file now being encoded in utf-8

author: Christian Weber 2012-03-22 20:21:54 +0000
committer: Christian Weber 2012-03-22 20:21:54 +0000
commit: 22be3fa7ab6efd48b457413cbd72b1d21d67bfab (patch)
tree: c28b197f2480217a3b4d2048b547a432a6a9fd38 /tests
parent: f5e2d8058ee6d8d014b0303dae231e5ae1ae27df (diff)
download: itools-22be3fa7ab6efd48b457413cbd72b1d21d67bfab.tar.gz
itools-22be3fa7ab6efd48b457413cbd72b1d21d67bfab.tar.bz2
itools-22be3fa7ab6efd48b457413cbd72b1d21d67bfab.zip
1 files changed, 184 insertions, 124 deletions
diff --git a/tests/it.t b/tests/it.t
index 1a308ec..ec95cc4 100755
--- a/tests/it.t
+++ b/tests/it.t
@@ -3,253 +3,313 @@
 
 # Tests for it.class
 
-function match($regex, $string, $expect, $name)
+
+#
+# tests for it::match()
+#
+$oldcharset = ini_get('default_charset');
+$oldlocale = setlocale(LC_CTYPE, 0);
+
+ini_set('default_charset', 'utf-8');
+setlocale(LC_CTYPE, 'de_CH');		# required becuase we're checking German umlauts in latin1 mode
+
+
+function match($regex, $string, $expect, $name, $p = array())
 {
 	$GLOBALS['TEST_MORE_LEVEL'] = 1;
-	$pass = is (it::match($regex, $string), $expect, $name);
+	$pass = is (it::match($regex, $string, $p), $expect, $name);
 	if (!$pass) {
-		diag("        regex given: $regex");
+		diag("        regex given: $regex" . ($p ? " " .D($p) : ""));
 		diag("    regex converted: " . it::convertregex($regex));
 	} 
 	$GLOBALS['TEST_MORE_LEVEL'] = 0;
 }
 
+
 match(
 	'b', 'aaaabaaaa',
 	'b',
 	'simple regex'
-	);
+);
+
 match(
 	'a/b', '   a/b   ',
 	'a/b',
 	'regex with /'
 );
+
 match(
 	'aa(bb)aa(cc)aa(dd)qq', 'aabbaaccaaddqq',
-	array( 'bb', 'cc', 'dd' ),
+	array('bb', 'cc', 'dd'),
 	'return array of captures'
-	);
+);
+
 match(
 	'\bblah\b', ' blah ',
 	'blah',
 	'match \b at spaces'
-	);
+);
+
 match(
 	'\bblah\b', 'blah',
 	'blah',
 	'match \b at end of string'
-	);
+);
+
 match(
 	'\bblah\b', 'ablahc',
 	false,
 	'don\'t match \b at word chars'
-	);
+);
+
 match(
-	'\bblah\b', 'Üblahä',
+	'\bblah\b', 'ÃœblahÃ¤',
 	false,
-	'don\'t match \b at umlaute in latin1'
-	);
+	'don\'t match \b at umlaute'
+);
+
 match(
 	'\Bblah\B', ' blah ',
 	false,
 	'don\'t match \B at spaces'
-	);
+);
+
 match(
 	'\Bblah\B', 'blah',
 	false,
 	'don\'t match \B at end of string'
-	);
+);
+
 match(
 	'\Bblah\B', 'ablahc',
 	'blah',
 	'match \B at word chars'
-	);
+);
+
 match(
-	'\Bblah\B', 'Üblahä',
+	'\Bblah\B', 'ÃœblahÃ¤',
 	'blah',
-	'match \B at umlaute in latin1'
-	);
+	'match \B at umlaute'
+);
+
 match(
-	'\w+', '  |#Üblahä   ',
-	'Üblahä',
+	'\w+', '  |#ÃœblahÃ¤   ',
+	'ÃœblahÃ¤',
 	'include umlaute in \w'
-	);
+);
+
 match(
-	'[[:alpha:]]+', '  |#blahä   ',
-	'blahä',
+	'[[:alpha:]]+', '  |#blahÃ¤   ',
+	'blahÃ¤',
 	'include umlaute in [[:alpha:]]'
-	);
+);
+
 match(
-	'\W+', '  |#Üblahä  ',
+	'\W+', '  |#ÃœblahÃ¤  ',
 	'  |#',
 	'don\'t include umlaute in \W'
-	);
+);
+
 match(
-	'\ba', 'äa',
+	'\ba', 'Ã¤a',
 	'',
 	'\b must know umlauts'
-	);
+);
 
-eval( '$escapedwordregex = "' . it::convertregex( '\w' ) . '";' );
-$escapedwordregex = preg_replace( '|[\\\\/]|', '', $escapedwordregex );
+match(
+	'aaa\\\\w+', '   aaa\www  ',
+	'aaa\www',
+	'don\'t parse \w in \\\\w at beginning (match)'
+);
+
+match(
+	'aaa\\\\w+', '   aaa\www  ',
+	'aaa\www',
+	'don\'t parse \w in \\\\w after chars (match)'
+);
+
+eval('$escapedwordregex = "' . it::convertregex('\w') . '";');
+$escapedwordregex = preg_replace('|[\\\\/]|', '', $escapedwordregex);
 
 match(
 	'\\\\w+',  $escapedwordregex,
 	false,
 	'don\'t parse \w in \\\\w at beginning (no match)'
-	);
-match(
-	'aaa\\\\w+', '   aaa\www  ',
-	'aaa\www',
-	'don\'t parse \w in \\\\w at beginning (match)'
-	);
+);
+
 match(
 	'aaa\\\\w+', 'aaa' . $escapedwordregex,
 	false,
 	'don\'t parse \w in \\\\w after chars (no match)'
-	);
-match(
-	'aaa\\\\w+', '   aaa\www  ',
-	'aaa\www',
-	'don\'t parse \w in \\\\w after chars (match)'
-	);
+);
+
 match(
 	'\\\\\\\\w+', '\\' . $escapedwordregex,
 	false,
 	'don\'t parse \w in \\\\\\\w (no match)'
-	);
+);
+
 match(
 	'\\\\\\\\w+', '  \\\\www  ',
 	'\\\\www',
 	'don\'t parse \\\\\\\\w as \w (match)'
-	);
+);
+
 match(
 	'[\w]+', '[[[]]]---',
 	false,
 	'replace \w in [\w] correctly (no match)'
-	);
+);
+
 match(
 	'[\w]+', '  \\\\aword[[[]]]   ',
 	'aword',
 	'replace \w in [\w] correctly (match)'
-	);
+);
+
 match(
 	'[\\\\w]+', ' blabergna ',
 	false,
 	'don\'t parse \w in [\\\\w] (no match)'
-	);
+);
+
 match(
 	'[\\\\w]+', '  \\\\worda[[[]',
 	'\\\\w',
 	'don\'t parse \w in [\\\\w] (match)'
-	);
+);
+
 match(
 	'[a\W]+', 'bbbbbbb a a%$+ accccc',
 	' a a%$+ a',
 	'\W in []'
-	);
+);
+
 match(
-	'\\\\\\w+', '  \Üblahä  ',
-	'\Üblahä',
+	'\\\\\\w+', '  \ÃœblahÃ¤  ',
+	'\ÃœblahÃ¤',
 	'parse \w in \\\\\\w at beginning'
-	);
+);
+
 match(
-	'aaa\\\\\\w+', '  aaa\Üblahä  ',
-	'aaa\Üblahä',
+	'aaa\\\\\\w+', '  aaa\ÃœblahÃ¤  ',
+	'aaa\ÃœblahÃ¤',
 	'parse \w in \\\\\\w after chars'
-	);
-is(
-	it::replace(
-		array(
-			'regex1' => 'repl1',
-			'regex2' => 'repl2',
-			'regex3' => 'repl3' ),
-		'regex2 regex1 regex3' ),
-	'repl2 repl1 repl3',
-	'test tr regex function'
-	);
-is(
-	it::match( '\w+', 'word1 wörd2 word_3', array('all' => true )),
-	array( 'word1', 'wörd2', 'word_3' ),
-	"test match_all function"
-	);
+);
+
+match(
+	'\w+', 'word1 wÃ¶rd2 word_3',
+	array('word1', 'wÃ¶rd2', 'word_3'),
+	"test match_all function",
+	array('all' => true)
+);
+
 match(
 	'aBcD', '  aBcD  ',
 	'aBcD',
 	"caseinsensitive is default"
-	);
+);
+
 match(
-	'ö', 'Ö',
-	'Ö',
-	'match umlaute in latin1 case insensitive'
-	);
+	'\w+', 'MÃ¼ller',
+	'MÃ¼ller',
+	'\w matches umlaut in utf-8 mode'
+);
 
-is(
-	it::match(utf8_encode('aöBÜ'), utf8_encode("AÖbü"), array('utf8' => true)),
-	utf8_encode('AÖbü'),
-	"match utf-8 umlaute in case insensitive"
+match(
+	'M.ller', 'MÃ¼ller',
+	'MÃ¼ller',
+	'. matches umlaut in utf-8 mode'
 );
 
-$oldcharset = ini_get('default_charset');
-ini_set('default_charset', 'utf-8');
 match(
-	utf8_encode('aöBÜ'), utf8_encode('AÖbü'),
-	utf8_encode('AÖbü'),
-	"match utf-8 umlaute in case insensitive using default_charset"
+	utf8_decode('Ã¶'), utf8_decode('Ã–'),
+	utf8_decode('Ã–'),
+	'match umlaute in de_CH.latin1 case insensitive',
+	array('utf8' => false)
 );
-is(
-	it::match('aöBÜ', 'AÖbü', array('utf8' => false)),
-	'AÖbü',
-	"non-utf-8 override with default_charset=utf-8"
+
+match(
+	utf8_decode('aÃ¶BÃœ'), utf8_decode('AÃ–bÃ¼'),
+	utf8_decode('AÃ–bÃ¼'),
+	"match umlaute with non-utf-8 override in p",
+	array('utf8' => false)
 );
+
+
 match(
-	'\w+', utf8_encode('Müller'),
-	utf8_encode('Müller'),
-	'\w matches umlaut in utf-8 mode'
+	'abc', "aBc",
+	false,
+	"set case sensitivity by parameter",
+	array('casesensitive' => 1),
 );
+
 match(
-	'M.ller', utf8_encode('Müller'),
-	utf8_encode('Müller'),
-	'. matches umlaut in utf-8 mode'
+	'\w+', 'word1 wÃ¶rd2 word_3',
+	array('word1', 'wÃ¶rd2', 'word_3'),
+	"test all => 1 without captures",
+	array('all' => 1)
 );
-ini_set('default_charset', $oldcharset);
 
-is(
-	it::match( 'abc', "aBc", array('casesensitive' => 1 )),
-	false,
-	"set case sensitivity by parameter"
-	);
+match(
+	'\w+\s+(\d+)', 'word1 12 wÃ¶rd2 3 word_3 4',
+	array('12', '3', '4'),
+	"test all => 1 with one capture",
+	array('all' => 1)
+);
 
+match(
+	'(\w+)\s+(\d+)', 'word1 12 wÃ¶rd2 3 word_3 4',
+	array(array('word1', '12'), array('wÃ¶rd2', '3'), array('word_3', '4')),
+	"test all => 1 with captures",
+	array('all' => 1)
+);
+
+match(
+	'(\w+)\s+(\d+)', 'word1 12 wÃ¶rd2 3 word_3 4',
+	array(array('word1', 'wÃ¶rd2', 'word_3'), array('12', '3', '4')),
+	"test all => 1,pattern_order => 1",
+	array('all' => 1, 'pattern_order' => 1)
+);
+
+ini_set('default_charset', 'iso-8859-1');
+match(
+	'aÃ¶BÃœ', "AÃ–bÃ¼",
+	'AÃ–bÃ¼',
+	"match utf-8 umlaute in case insensitive mode with utf8 override",
+	array('utf8' => true)
+);
+ini_set('default_charset', 'utf-8');
+
+
+#
+# tests for it::replace()
+#
 is(
-	it::match( '\w+', 'word1 wörd2 word_3', array('all' => 1 )),
-	array( 'word1', 'wörd2', 'word_3' ),
-	"test all=>1 without captures"
-	);
-is(
-	it::match( '\w+\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('all' => 1 )),
-	array( '12', '3', '4' ),
-	"test all=>1 with one capture"
-	);
-is(
-	it::match( '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('all' => 1 )),
-	array( array( 'word1', '12' ), array( 'wörd2', '3' ), array( 'word_3', '4' ) ),
-	"test all=>1 with captures"
-	);
-is(
-	it::match( '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('all' => 1, 'pattern_order' => 1 )),
-	array( array( 'word1', 'wörd2', 'word_3' ), array( '12', '3', '4' ) ),
-	"test all=>1,pattern_order=>1"
-	);
+	it::replace(
+		array(
+			'regex1' => 'repl1',
+			'regex2' => 'repl2',
+			'regex3' => 'repl3'),
+		'regex2 regex1 regex3'),
+	'repl2 repl1 repl3',
+	'test tr regex function'
+);
 
 is(it::replace(array('a' => "1", 'b' => "2"), "ab"), "12");
 is(it::replace(array('!' => "x"), "!"), "x");
-is(it::replace(array('\w' => "x"), "oö"), "xx");
-is(it::replace(array('[[:alpha:]]' => "x"), "ö"), "x");
-is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x");
-is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x");
-is(it::replace(array('ö' => "x"), "Ö"), "x");
+is(it::replace(array('\w' => "x"), "oÃ¶"), "xx");
+is(it::replace(array('[[:alpha:]]' => "x"), "Ã¶"), "x");
+is(it::replace(array('\w' => "x", '#' => "!"), "#Ã¶"), "!x");
+is(it::replace(array('#' => "!", '\w' => "x"), "#Ã¶"), "!x");
+is(it::replace(array('Ã¶' => "x"), "Ã–"), "x");
 is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa");
 
+
+setlocale(LC_CTYPE, $oldlocale);
+ini_set('default_charset', $oldcharset);	# end of tests that must run with specific charset
+
+
 # it::filter_keys tests
 
 $data = array('a' => 1, 'b' => 2, 'c' => 3);
author	Christian Weber	2012-03-22 20:21:54 +0000
committer	Christian Weber	2012-03-22 20:21:54 +0000
commit	22be3fa7ab6efd48b457413cbd72b1d21d67bfab (patch)
tree	c28b197f2480217a3b4d2048b547a432a6a9fd38 /tests
parent	f5e2d8058ee6d8d014b0303dae231e5ae1ae27df (diff)
download	itools-22be3fa7ab6efd48b457413cbd72b1d21d67bfab.tar.gz itools-22be3fa7ab6efd48b457413cbd72b1d21d67bfab.tar.bz2 itools-22be3fa7ab6efd48b457413cbd72b1d21d67bfab.zip