From 4574035ec43cabf050a7ab036bbba1cd2447a18f Mon Sep 17 00:00:00 2001
From: Urban MÃ¼ller
Date: Thu, 22 Mar 2012 18:48:52 +0000
Subject: check encoding in Q() and _tag(), adapt tests to utf-8

---
 tests/it_html.t | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

(limited to 'tests')
diff --git a/tests/it_html.t b/tests/it_html.t
index 0def431..c955359 100755
--- a/tests/it_html.t
+++ b/tests/it_html.t
@@ -4,6 +4,7 @@
 #  Tests for html.class
 
 # Traditional html generation
+ini_set('default_charset', "utf-8");
 new it_html(array('htmltype' => "html"));
 
 is(
@@ -95,30 +96,22 @@ is(
 
 <P><a href="javascript:window.close()" title="Wolken"><img src="http://farm1.static.flickr.com/177/377214376_bcba167a7d_m.jpg" width="240" height="180" alt="Wolken" style="border: 1px solid #ddd;" /></a></p>
 '),
-	 ' <a href="http://www.flickr.com/people/swisspics%25/">swisspics</a> posted &lt; &lt; ä &amp; yesterday a <i>photo</i> <i>tag missmatch</i>:<br /><br /> <p><img src="http://farm1.static.flickr.com/177/377214376_bcba167a7d_m.jpg" alt="" /></p> ',
+	 ' <a href="http://www.flickr.com/people/swisspics%25/">swisspics</a> posted &lt; &lt; Ã¤ &amp; yesterday a <i>photo</i> <i>tag missmatch</i>:<br /><br /> <p><img src="http://farm1.static.flickr.com/177/377214376_bcba167a7d_m.jpg" alt="" /></p> ',
 	'it_html::sanitize tag soup'
 );
 
 is(
 	it_html::sanitize('q&#8592;x'),
-	 "q&#8592;x",
+	 "qâ†x",
 	'it_html::sanitize preserve numeric entities'
 );
 
-it_html::configure(array('charset' => "utf-8"));
 is(
 	it_html::sanitize('q&uuml;x'),
 	 "q\xc3\xbcx",
 	'it_html::sanitize with utf-8'
 );
 
-it_html::configure(array('charset' => "iso-8859-1"));
-is(
-	it_html::sanitize('q&uuml;x'),
-	 "q\xfcx",
-	'it_html::sanitize with latin1'
-);
-
 is(
 	it_html::sanitize('<b>a<br>b</b>'),
 	 "<b>a<br />b</b>",
@@ -126,8 +119,8 @@ is(
 );
 
 is(
-	U("/foo.html", array('bar' => array('gna' => 42, 'qux' => array('quux' => "<Zürich>", 'gnöp' => "fasel")))),
-	'/foo.html?bar[gna]=42&bar[qux][quux]=%3CZ%FCrich%3E&bar[qux][gn%F6p]=fasel',
+	U("/foo.html", array('bar' => array('gna' => 42, 'qux' => array('quux' => "<ZÃ¼rich>", 'gnÃ¶p' => "fasel")))),
+	'/foo.html?bar[gna]=42&bar[qux][quux]=%3CZ%C3%BCrich%3E&bar[qux][gn%C3%B6p]=fasel',
 	'U() with nested arrays'
 );
 
@@ -149,11 +142,30 @@ is(
 	'U() converting of \ to /'
 );
 
-is(it_html::entity_decode("&auml;"),  "ä");
-is(it_html::entity_decode("&#8217;"), "'");
+is(it_html::entity_decode("&auml;"),  "Ã¤");
 is(it_html::entity_decode("&#x4a;"),  "J");
 is(it_html::entity_decode("&#x4A;"),  "J");
-is(it_html::entity_decode("&#xfff;"), " ");
 is(it_html::entity_decode("&#65;"),   "A");
+
+
+#
+# check transliterations in iso-8859-1
+#
+
+it_html::configure(array('charset' => "iso-8859-1"));
+
+is(
+	it_html::sanitize('q&uuml;x'),
+	 "q\xfcx",
+	'it_html::sanitize with latin1'
+);
+
+is(
+	it_html::sanitize('q&#8592;x'),
+	 "q&#8592;x",
+	'it_html::sanitize preserve non-decodable numeric entities'
+);
+is(it_html::entity_decode("&#8217;"), "'");
+is(it_html::entity_decode("&#xfff;"), " ");
 is(it_html::entity_decode("&#999;"),  " ");
 ?>
-- 
cgit v1.2.3


From f5e2d8058ee6d8d014b0303dae231e5ae1ae27df Mon Sep 17 00:00:00 2001
From: Urban MÃ¼ller
Date: Thu, 22 Mar 2012 19:43:52 +0000
Subject: converted to utf-8

---
 tests/getopt.t | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tests')

diff --git a/tests/getopt.t b/tests/getopt.t
index 7a84588..d67738d 100755
--- a/tests/getopt.t
+++ b/tests/getopt.t
@@ -17,7 +17,7 @@ function getopt_ok($argv, $exp, $name)
 	return is($got['argument'], $exp, $name);
 }
 
-foreach (array("" => "blah gnaber", " (umlaute)" => "pre üäpost") as $variant => $testarg) {
+foreach (array("" => "blah gnaber", " (umlaute)" => "pre Ã¼Ã¤post") as $variant => $testarg) {
 	getopt_ok(array('-a', $testarg), $testarg, "Short version" . $variant);
 	getopt_ok(array('--argument', $testarg), $testarg, "Long version with space" . $variant);
 	getopt_ok(array("--argument=$testarg"), $testarg, "Long version with equal" . $variant);
-- 
cgit v1.2.3


From 22be3fa7ab6efd48b457413cbd72b1d21d67bfab Mon Sep 17 00:00:00 2001
From: Christian Weber
Date: Thu, 22 Mar 2012 20:21:54 +0000
Subject: cleanup, set locale for latin1 case-sensitive tests, adjust tests to
 file now being encoded in utf-8

---
 tests/it.t | 308 ++++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 184 insertions(+), 124 deletions(-)

(limited to 'tests')

diff --git a/tests/it.t b/tests/it.t
index 1a308ec..ec95cc4 100755
--- a/tests/it.t
+++ b/tests/it.t
@@ -3,253 +3,313 @@
 
 # Tests for it.class
 
-function match($regex, $string, $expect, $name)
+
+#
+# tests for it::match()
+#
+$oldcharset = ini_get('default_charset');
+$oldlocale = setlocale(LC_CTYPE, 0);
+
+ini_set('default_charset', 'utf-8');
+setlocale(LC_CTYPE, 'de_CH');		# required becuase we're checking German umlauts in latin1 mode
+
+
+function match($regex, $string, $expect, $name, $p = array())
 {
 	$GLOBALS['TEST_MORE_LEVEL'] = 1;
-	$pass = is (it::match($regex, $string), $expect, $name);
+	$pass = is (it::match($regex, $string, $p), $expect, $name);
 	if (!$pass) {
-		diag("        regex given: $regex");
+		diag("        regex given: $regex" . ($p ? " " .D($p) : ""));
 		diag("    regex converted: " . it::convertregex($regex));
 	} 
 	$GLOBALS['TEST_MORE_LEVEL'] = 0;
 }
 
+
 match(
 	'b', 'aaaabaaaa',
 	'b',
 	'simple regex'
-	);
+);
+
 match(
 	'a/b', '   a/b   ',
 	'a/b',
 	'regex with /'
 );
+
 match(
 	'aa(bb)aa(cc)aa(dd)qq', 'aabbaaccaaddqq',
-	array( 'bb', 'cc', 'dd' ),
+	array('bb', 'cc', 'dd'),
 	'return array of captures'
-	);
+);
+
 match(
 	'\bblah\b', ' blah ',
 	'blah',
 	'match \b at spaces'
-	);
+);
+
 match(
 	'\bblah\b', 'blah',
 	'blah',
 	'match \b at end of string'
-	);
+);
+
 match(
 	'\bblah\b', 'ablahc',
 	false,
 	'don\'t match \b at word chars'
-	);
+);
+
 match(
-	'\bblah\b', 'Üblahä',
+	'\bblah\b', 'ÃœblahÃ¤',
 	false,
-	'don\'t match \b at umlaute in latin1'
-	);
+	'don\'t match \b at umlaute'
+);
+
 match(
 	'\Bblah\B', ' blah ',
 	false,
 	'don\'t match \B at spaces'
-	);
+);
+
 match(
 	'\Bblah\B', 'blah',
 	false,
 	'don\'t match \B at end of string'
-	);
+);
+
 match(
 	'\Bblah\B', 'ablahc',
 	'blah',
 	'match \B at word chars'
-	);
+);
+
 match(
-	'\Bblah\B', 'Üblahä',
+	'\Bblah\B', 'ÃœblahÃ¤',
 	'blah',
-	'match \B at umlaute in latin1'
-	);
+	'match \B at umlaute'
+);
+
 match(
-	'\w+', '  |#Üblahä   ',
-	'Üblahä',
+	'\w+', '  |#ÃœblahÃ¤   ',
+	'ÃœblahÃ¤',
 	'include umlaute in \w'
-	);
+);
+
 match(
-	'[[:alpha:]]+', '  |#blahä   ',
-	'blahä',
+	'[[:alpha:]]+', '  |#blahÃ¤   ',
+	'blahÃ¤',
 	'include umlaute in [[:alpha:]]'
-	);
+);
+
 match(
-	'\W+', '  |#Üblahä  ',
+	'\W+', '  |#ÃœblahÃ¤  ',
 	'  |#',
 	'don\'t include umlaute in \W'
-	);
+);
+
 match(
-	'\ba', 'äa',
+	'\ba', 'Ã¤a',
 	'',
 	'\b must know umlauts'
-	);
+);
 
-eval( '$escapedwordregex = "' . it::convertregex( '\w' ) . '";' );
-$escapedwordregex = preg_replace( '|[\\\\/]|', '', $escapedwordregex );
+match(
+	'aaa\\\\w+', '   aaa\www  ',
+	'aaa\www',
+	'don\'t parse \w in \\\\w at beginning (match)'
+);
+
+match(
+	'aaa\\\\w+', '   aaa\www  ',
+	'aaa\www',
+	'don\'t parse \w in \\\\w after chars (match)'
+);
+
+eval('$escapedwordregex = "' . it::convertregex('\w') . '";');
+$escapedwordregex = preg_replace('|[\\\\/]|', '', $escapedwordregex);
 
 match(
 	'\\\\w+',  $escapedwordregex,
 	false,
 	'don\'t parse \w in \\\\w at beginning (no match)'
-	);
-match(
-	'aaa\\\\w+', '   aaa\www  ',
-	'aaa\www',
-	'don\'t parse \w in \\\\w at beginning (match)'
-	);
+);
+
 match(
 	'aaa\\\\w+', 'aaa' . $escapedwordregex,
 	false,
 	'don\'t parse \w in \\\\w after chars (no match)'
-	);
-match(
-	'aaa\\\\w+', '   aaa\www  ',
-	'aaa\www',
-	'don\'t parse \w in \\\\w after chars (match)'
-	);
+);
+
 match(
 	'\\\\\\\\w+', '\\' . $escapedwordregex,
 	false,
 	'don\'t parse \w in \\\\\\\w (no match)'
-	);
+);
+
 match(
 	'\\\\\\\\w+', '  \\\\www  ',
 	'\\\\www',
 	'don\'t parse \\\\\\\\w as \w (match)'
-	);
+);
+
 match(
 	'[\w]+', '[[[]]]---',
 	false,
 	'replace \w in [\w] correctly (no match)'
-	);
+);
+
 match(
 	'[\w]+', '  \\\\aword[[[]]]   ',
 	'aword',
 	'replace \w in [\w] correctly (match)'
-	);
+);
+
 match(
 	'[\\\\w]+', ' blabergna ',
 	false,
 	'don\'t parse \w in [\\\\w] (no match)'
-	);
+);
+
 match(
 	'[\\\\w]+', '  \\\\worda[[[]',
 	'\\\\w',
 	'don\'t parse \w in [\\\\w] (match)'
-	);
+);
+
 match(
 	'[a\W]+', 'bbbbbbb a a%$+ accccc',
 	' a a%$+ a',
 	'\W in []'
-	);
+);
+
 match(
-	'\\\\\\w+', '  \Üblahä  ',
-	'\Üblahä',
+	'\\\\\\w+', '  \ÃœblahÃ¤  ',
+	'\ÃœblahÃ¤',
 	'parse \w in \\\\\\w at beginning'
-	);
+);
+
 match(
-	'aaa\\\\\\w+', '  aaa\Üblahä  ',
-	'aaa\Üblahä',
+	'aaa\\\\\\w+', '  aaa\ÃœblahÃ¤  ',
+	'aaa\ÃœblahÃ¤',
 	'parse \w in \\\\\\w after chars'
-	);
-is(
-	it::replace(
-		array(
-			'regex1' => 'repl1',
-			'regex2' => 'repl2',
-			'regex3' => 'repl3' ),
-		'regex2 regex1 regex3' ),
-	'repl2 repl1 repl3',
-	'test tr regex function'
-	);
-is(
-	it::match( '\w+', 'word1 wörd2 word_3', array('all' => true )),
-	array( 'word1', 'wörd2', 'word_3' ),
-	"test match_all function"
-	);
+);
+
+match(
+	'\w+', 'word1 wÃ¶rd2 word_3',
+	array('word1', 'wÃ¶rd2', 'word_3'),
+	"test match_all function",
+	array('all' => true)
+);
+
 match(
 	'aBcD', '  aBcD  ',
 	'aBcD',
 	"caseinsensitive is default"
-	);
+);
+
 match(
-	'ö', 'Ö',
-	'Ö',
-	'match umlaute in latin1 case insensitive'
-	);
+	'\w+', 'MÃ¼ller',
+	'MÃ¼ller',
+	'\w matches umlaut in utf-8 mode'
+);
 
-is(
-	it::match(utf8_encode('aöBÜ'), utf8_encode("AÖbü"), array('utf8' => true)),
-	utf8_encode('AÖbü'),
-	"match utf-8 umlaute in case insensitive"
+match(
+	'M.ller', 'MÃ¼ller',
+	'MÃ¼ller',
+	'. matches umlaut in utf-8 mode'
 );
 
-$oldcharset = ini_get('default_charset');
-ini_set('default_charset', 'utf-8');
 match(
-	utf8_encode('aöBÜ'), utf8_encode('AÖbü'),
-	utf8_encode('AÖbü'),
-	"match utf-8 umlaute in case insensitive using default_charset"
+	utf8_decode('Ã¶'), utf8_decode('Ã–'),
+	utf8_decode('Ã–'),
+	'match umlaute in de_CH.latin1 case insensitive',
+	array('utf8' => false)
 );
-is(
-	it::match('aöBÜ', 'AÖbü', array('utf8' => false)),
-	'AÖbü',
-	"non-utf-8 override with default_charset=utf-8"
+
+match(
+	utf8_decode('aÃ¶BÃœ'), utf8_decode('AÃ–bÃ¼'),
+	utf8_decode('AÃ–bÃ¼'),
+	"match umlaute with non-utf-8 override in p",
+	array('utf8' => false)
 );
+
+
 match(
-	'\w+', utf8_encode('Müller'),
-	utf8_encode('Müller'),
-	'\w matches umlaut in utf-8 mode'
+	'abc', "aBc",
+	false,
+	"set case sensitivity by parameter",
+	array('casesensitive' => 1),
 );
+
 match(
-	'M.ller', utf8_encode('Müller'),
-	utf8_encode('Müller'),
-	'. matches umlaut in utf-8 mode'
+	'\w+', 'word1 wÃ¶rd2 word_3',
+	array('word1', 'wÃ¶rd2', 'word_3'),
+	"test all => 1 without captures",
+	array('all' => 1)
 );
-ini_set('default_charset', $oldcharset);
 
-is(
-	it::match( 'abc', "aBc", array('casesensitive' => 1 )),
-	false,
-	"set case sensitivity by parameter"
-	);
+match(
+	'\w+\s+(\d+)', 'word1 12 wÃ¶rd2 3 word_3 4',
+	array('12', '3', '4'),
+	"test all => 1 with one capture",
+	array('all' => 1)
+);
 
+match(
+	'(\w+)\s+(\d+)', 'word1 12 wÃ¶rd2 3 word_3 4',
+	array(array('word1', '12'), array('wÃ¶rd2', '3'), array('word_3', '4')),
+	"test all => 1 with captures",
+	array('all' => 1)
+);
+
+match(
+	'(\w+)\s+(\d+)', 'word1 12 wÃ¶rd2 3 word_3 4',
+	array(array('word1', 'wÃ¶rd2', 'word_3'), array('12', '3', '4')),
+	"test all => 1,pattern_order => 1",
+	array('all' => 1, 'pattern_order' => 1)
+);
+
+ini_set('default_charset', 'iso-8859-1');
+match(
+	'aÃ¶BÃœ', "AÃ–bÃ¼",
+	'AÃ–bÃ¼',
+	"match utf-8 umlaute in case insensitive mode with utf8 override",
+	array('utf8' => true)
+);
+ini_set('default_charset', 'utf-8');
+
+
+#
+# tests for it::replace()
+#
 is(
-	it::match( '\w+', 'word1 wörd2 word_3', array('all' => 1 )),
-	array( 'word1', 'wörd2', 'word_3' ),
-	"test all=>1 without captures"
-	);
-is(
-	it::match( '\w+\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('all' => 1 )),
-	array( '12', '3', '4' ),
-	"test all=>1 with one capture"
-	);
-is(
-	it::match( '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('all' => 1 )),
-	array( array( 'word1', '12' ), array( 'wörd2', '3' ), array( 'word_3', '4' ) ),
-	"test all=>1 with captures"
-	);
-is(
-	it::match( '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('all' => 1, 'pattern_order' => 1 )),
-	array( array( 'word1', 'wörd2', 'word_3' ), array( '12', '3', '4' ) ),
-	"test all=>1,pattern_order=>1"
-	);
+	it::replace(
+		array(
+			'regex1' => 'repl1',
+			'regex2' => 'repl2',
+			'regex3' => 'repl3'),
+		'regex2 regex1 regex3'),
+	'repl2 repl1 repl3',
+	'test tr regex function'
+);
 
 is(it::replace(array('a' => "1", 'b' => "2"), "ab"), "12");
 is(it::replace(array('!' => "x"), "!"), "x");
-is(it::replace(array('\w' => "x"), "oö"), "xx");
-is(it::replace(array('[[:alpha:]]' => "x"), "ö"), "x");
-is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x");
-is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x");
-is(it::replace(array('ö' => "x"), "Ö"), "x");
+is(it::replace(array('\w' => "x"), "oÃ¶"), "xx");
+is(it::replace(array('[[:alpha:]]' => "x"), "Ã¶"), "x");
+is(it::replace(array('\w' => "x", '#' => "!"), "#Ã¶"), "!x");
+is(it::replace(array('#' => "!", '\w' => "x"), "#Ã¶"), "!x");
+is(it::replace(array('Ã¶' => "x"), "Ã–"), "x");
 is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa");
 
+
+setlocale(LC_CTYPE, $oldlocale);
+ini_set('default_charset', $oldcharset);	# end of tests that must run with specific charset
+
+
 # it::filter_keys tests
 
 $data = array('a' => 1, 'b' => 2, 'c' => 3);
-- 
cgit v1.2.3


From b7e99acb4b7f53799751c07ac9aae3de5fabd99e Mon Sep 17 00:00:00 2001
From: Urban MÃ¼ller
Date: Thu, 22 Mar 2012 22:45:55 +0000
Subject: adapt to utf-8

---
 tests/exec.t | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tests')

diff --git a/tests/exec.t b/tests/exec.t
index 689bd26..9425420 100755
--- a/tests/exec.t
+++ b/tests/exec.t
@@ -11,7 +11,7 @@ is(it::shell_command("echo {arg}", array('arg' => '')), "echo ''", "quote empty
 
 foreach (array("", "C", "de_CH", "de_CH.utf8") as $locale) {
 	setlocale(LC_ALL, $locale);
-	$arg = "preüpost";
+	$arg = "preÃ¼post";
 	if (it::match('utf8', $locale))
 		$arg = utf8_encode($arg);
 	is(it::exec("echo " . $arg), $arg . "\n", "exec with umlaut (locale '$locale')");
-- 
cgit v1.2.3


From 6c6828a8f0904110a67fe89031f9d4eaedf29213 Mon Sep 17 00:00:00 2001
From: Christian Weber
Date: Fri, 23 Mar 2012 15:28:10 +0000
Subject: it_xml uses correct target encoding by default, adapt tests to utf-8
 deafult

---
 tests/it_xml.t | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

(limited to 'tests')

diff --git a/tests/it_xml.t b/tests/it_xml.t
index f74c54b..d74fadf 100755
--- a/tests/it_xml.t
+++ b/tests/it_xml.t
@@ -5,13 +5,14 @@
 
 function match($xmldata, $expected, $name, $prefix = "", $p = array())
 {
-	$classname = $prefix ? ($prefix . "_xml") : "it_xml";
+	$classname = ($prefix ?: "it") . "_xml";
 	$varname  = $prefix . "foo";
 	$xmldata = "<root>$xmldata</root>";
 	$xml = new $classname($xmldata, $p);
+	$mod_utf8 = $p['encoding'] != "iso-8859-1" ? "u" : "";
 
 	is(
-		preg_replace('/[#\s]+/', " ", print_r($xml->$varname, true)),
+		preg_replace('/[#\s]+/' . $mod_utf8, " ", print_r($xml->$varname, true)),
 		$expected,
 		"$name (string)"
 	);
@@ -24,11 +25,10 @@ function match($xmldata, $expected, $name, $prefix = "", $p = array())
 	fclose($tmpfile);
 
 	is(
-		preg_replace('/[#\s]+/', " ", print_r($xml->$varname, true)),
+		preg_replace('/[#\s]+/' . $mod_utf8, " ", print_r($xml->$varname, true)),
 		$expected,
 		"$name (file)"
 	);
-
 }
 
 match(
@@ -44,8 +44,8 @@ match(
 );
 
 match(
-	'<foo title="Zürich">Stüssihofstadt</foo>',
-	'foo Object ( [attr] => Array ( [title] => Zürich ) [val] => Stüssihofstadt ) ',
+	'<foo title="ZÃ¼rich">StÃ¼ssihofstadt</foo>',
+	'foo Object ( [attr] => Array ( [title] => ZÃ¼rich ) [val] => StÃ¼ssihofstadt ) ',
 	'simple tag with latin1 content and attribute'
 );
 
@@ -62,26 +62,33 @@ match(
 );
 
 match(
-	'<foo>&amp;amp; &lt;a&gt; &#38;amp; &#60;b&#62; &#x26;amp; &#x3C;c&#x3E; &uuml;</foo>',
-	'foo Object ( [val] => &amp; <a> &amp; <b> &amp; <c> ü ) ',
-	'Predecode illegal entities while keeping properly encoded ones'
+	'<foo>x &uuml; y</foo>',
+	utf8_decode('foo Object ( [val] => x Ã¼ y ) '),
+	'Manual encoding override',
+	"",
+	array('encoding' => "iso-8859-1")
 );
 
 match(
 	'<foo>&amp;amp; &lt;a&gt; &#38;amp; &#60;b&#62; &#x26;amp; &#x3C;c&#x3E; &uuml;</foo>',
-	utf8_encode('foo Object ( [val] => &amp; <a> &amp; <b> &amp; <c> ü ) '),
-	'Predecode illegal entities while keeping properly encoded ones (UTF-8)',
-	"",
-	array('encoding' => "UTF-8")
+	'foo Object ( [val] => &amp; <a> &amp; <b> &amp; <c> Ã¼ ) ',
+	'Predecode illegal entities while keeping properly encoded ones',
 );
 
+match(
+	'<foo>&amp;amp; &lt;a&gt; &#38;amp; &#60;b&#62; &#x26;amp; &#x3C;c&#x3E; &#xFC;</foo>',
+	utf8_decode('foo Object ( [val] => &amp; <a> &amp; <b> &amp; <c> Ã¼ ) '),
+	'Predecode illegal entities while keeping properly encoded ones (iso-8859-1)',
+	"",
+	array('encoding' => "iso-8859-1")
+);
 
 match(
 	"<foo>a\x05b</foo>",
 	'foo Object ( [val] => a b ) ',
 	'Illegal latin 1 character',
 	"",
-	array('encoding' => "ISO-8859-1")
+	array('encoding' => "iso-8859-1")
 );
 
 # Test inheritance
-- 
cgit v1.2.3


From b7200b739ff651a7647d2d666e3674a7fe3cb6e2 Mon Sep 17 00:00:00 2001
From: Urban MÃ¼ller
Date: Mon, 26 Mar 2012 15:11:39 +0000
Subject: fixed it_html::fix_encoding

---
 tests/it_html.t | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'tests')

diff --git a/tests/it_html.t b/tests/it_html.t
index c955359..a576b47 100755
--- a/tests/it_html.t
+++ b/tests/it_html.t
@@ -168,4 +168,16 @@ is(
 is(it_html::entity_decode("&#8217;"), "'");
 is(it_html::entity_decode("&#xfff;"), " ");
 is(it_html::entity_decode("&#999;"),  " ");
+
+is(it_html::fix_encoding("Meier"), "Meier");
+is(it_html::fix_encoding("MÃ¼ller"), "MÃ¼ller");
+is(it_html::fix_encoding("AslÄ±"), "AslÄ±");
+is(it_html::fix_encoding("Ã©Â»"), "Ã©Â»");
+
+is(it_html::fix_encoding(utf8_encode("MÃ¼ller"), true), "MÃ¼ller", "double encoded latin1");	# Double encoded latin1
+is(it_html::fix_encoding(utf8_encode("AslÄ±"), true), "AslÄ±");		# Double encoded non-latin1
+is(it_html::fix_encoding(utf8_encode("Ã©Â»"), true), "Ã©Â»");		# Double encoded special combination
+
+is(it_html::fix_encoding(utf8_decode("MÃ¼ller"), true), "MÃ¼ller");	# Incorrectly decoded latin1
+
 ?>
-- 
cgit v1.2.3


From bbcc6615dc5316a73f07b262950ac18d50c80497 Mon Sep 17 00:00:00 2001
From: Christian Schneider
Date: Mon, 26 Mar 2012 15:16:01 +0000
Subject: Improve it_html tests

---
 tests/it_html.t | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'tests')

diff --git a/tests/it_html.t b/tests/it_html.t
index a576b47..b1d271e 100755
--- a/tests/it_html.t
+++ b/tests/it_html.t
@@ -165,19 +165,19 @@ is(
 	 "q&#8592;x",
 	'it_html::sanitize preserve non-decodable numeric entities'
 );
-is(it_html::entity_decode("&#8217;"), "'");
-is(it_html::entity_decode("&#xfff;"), " ");
-is(it_html::entity_decode("&#999;"),  " ");
+is(it_html::entity_decode("&#8217;"), "'", "it_html::entity_decode numeric decimal entity");
+is(it_html::entity_decode("&#xfff;"), " ", "it_html::entity_decode invalid numeric hex entity");
+is(it_html::entity_decode("&#999;"),  " ", "it_html::entity_decode invalid numeric decimal entity");
 
-is(it_html::fix_encoding("Meier"), "Meier");
-is(it_html::fix_encoding("MÃ¼ller"), "MÃ¼ller");
-is(it_html::fix_encoding("AslÄ±"), "AslÄ±");
-is(it_html::fix_encoding("Ã©Â»"), "Ã©Â»");
+is(it_html::fix_encoding("Meier"), "Meier", "it_html::fix_encoding ascii");
+is(it_html::fix_encoding("MÃ¼ller"), "MÃ¼ller", "it_html::fix_encoding utf-8 latin1");
+is(it_html::fix_encoding("AslÄ±"), "AslÄ±", "it_html::fix_encoding utf-8 non-latin1");
+is(it_html::fix_encoding("Ã©Â»"), "Ã©Â»", "it_html::fix_encoding utf-8 latin1 special combination");
 
-is(it_html::fix_encoding(utf8_encode("MÃ¼ller"), true), "MÃ¼ller", "double encoded latin1");	# Double encoded latin1
-is(it_html::fix_encoding(utf8_encode("AslÄ±"), true), "AslÄ±");		# Double encoded non-latin1
-is(it_html::fix_encoding(utf8_encode("Ã©Â»"), true), "Ã©Â»");		# Double encoded special combination
+is(it_html::fix_encoding(utf8_encode("MÃ¼ller"), true), "MÃ¼ller", "it_html::fix_encoding double encoded latin1");
+is(it_html::fix_encoding(utf8_encode("AslÄ±"), true), "AslÄ±", "it_html::fix_encoding double encoded non-latin1");
+is(it_html::fix_encoding(utf8_encode("Ã©Â»"), true), "Ã©Â»", "it_html::fix_encoding double encoded latin1 special combination");
 
-is(it_html::fix_encoding(utf8_decode("MÃ¼ller"), true), "MÃ¼ller");	# Incorrectly decoded latin1
+is(it_html::fix_encoding(utf8_decode("MÃ¼ller"), true), "MÃ¼ller", "it_html::fix_encoding incorrectly encoded latin1");
 
 ?>
-- 
cgit v1.2.3


From 0147e6e3aea620a54b0c3f6c932c658ee72a45a0 Mon Sep 17 00:00:00 2001
From: Nathan Gass
Date: Mon, 26 Mar 2012 15:20:24 +0000
Subject: new utf8 safe functions it::grep and it::substr_replace

---
 tests/it.t | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tests')

diff --git a/tests/it.t b/tests/it.t
index ec95cc4..317b8a3 100755
--- a/tests/it.t
+++ b/tests/it.t
@@ -305,6 +305,10 @@ is(it::replace(array('#' => "!", '\w' => "x"), "#Ã¶"), "!x");
 is(it::replace(array('Ã¶' => "x"), "Ã–"), "x");
 is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa");
 
+is(it::grep('ismatch', array('ismatch', 'isnomatch')), array('ismatch'),  'grep with simple regex');
+is(it::grep('!', array('ismatch!', 'isnomatch')),      array('ismatch!'), '! in regex');
+is(it::grep('lower|UPPER', array('lower', 'LOWER', 'upper', 'UPPER'), 'casesensitive' => 1), array(0 => 'lower', 3 => 'UPPER'), 'set casesensitive');
+is(it::grep('match', array('foo' => 'match', 'bar' => 'gna')), array('foo' => 'match'), 'with keys');
 
 setlocale(LC_CTYPE, $oldlocale);
 ini_set('default_charset', $oldcharset);	# end of tests that must run with specific charset
@@ -342,4 +346,8 @@ is(it::date('datetime', 1000000.543), it::date('datetime', "1000000"), '... larg
 is(it::date('time', "10.5"), "10:05", 'interpret string with points with strtotime');
 is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtotime');
 
+# it::substr_replace
+is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii');
+is(it::substr_replace('âœ”â˜¯â™¥', 'â˜ƒâ˜ƒ', 1, 1), 'âœ”â˜ƒâ˜ƒâ™¥', 'it::substr_replace for utf-8');
+
 ?>
-- 
cgit v1.2.3


From 52239c6482d10cbea631befd8f96b74425731b72 Mon Sep 17 00:00:00 2001
From: Nathan Gass
Date: Mon, 26 Mar 2012 15:41:08 +0000
Subject: make get and get_multi tests encoding agnostic

---
 tests/it_url.t | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

(limited to 'tests')

diff --git a/tests/it_url.t b/tests/it_url.t
index e8507aa..ee683ce 100755
--- a/tests/it_url.t
+++ b/tests/it_url.t
@@ -87,25 +87,22 @@ is(
 
 $url = new it_url('http://www.gna.ch/');
 $page = $url->get();
-is(
-	it::match('(</html>)', $page),
-	'</html>',
+ok(
+	strpos($page, '</html>'), #UTF8SAFE
 	'$url->get with url in constructor'
 );
 
 $url = new it_url('http://bogus.url');
 $page = $url->get('http://www.gna.ch/');
-is(
-	it::match('(</html>)', $page),
-	'</html>',
+ok(
+	strpos($page, '</html>'), #UTF8SAFE
 	'$url->get(url) with url as string arg'
 );
 
 $url = new it_url('http://bogus.url');
 $page = $url->get(array('url' => 'http://www.gna.ch/'));
-is(
-	it::match('(</html>)', $page),
-	'</html>',
+ok(
+	strpos($page, '</html>'), #UTF8SAFE
 	'$url->get(\'url\' => url) with url as named arg'
 );
 is(
@@ -121,15 +118,14 @@ is(
 
 unset($url, $page);
 $page = it_url::get('http://www.gna.ch/');
-is(
-	it::match('(</html>)', $page),
-	'</html>',
+ok(
+	strpos($page, '</html>'), #UTF8SAFE
 	'it_url::get() static call'
 );
 
 $pages = it_url::get_multi('urls' => array('a' => 'http://www.gna.ch/', 'b' => 'http://search.ch/'));
-ok(it::match('</html>', $pages['a']), 'it_url::get_multi got first url');
-ok(it::match('</html>', $pages['b']), 'it_url::get_multi got second url');
+ok(strpos($pages['a'], '</html>'), 'it_url::get_multi got first url'); #UTF8SAFE
+ok(strpos($pages['b'], '</html>'), 'it_url::get_multi got second url'); #UTF8SAFE
 is(count($pages), 2, 'it_url::get_multi no additional array elements');
 
 ?>
-- 
cgit v1.2.3


From 49d2a5ce1b6ad201f051263db7c3a1f5ad6a39ab Mon Sep 17 00:00:00 2001
From: Nathan Gass
Date: Mon, 26 Mar 2012 16:12:05 +0000
Subject: space after \#

---
 tests/it_url.t | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'tests')

diff --git a/tests/it_url.t b/tests/it_url.t
index ee683ce..10fd015 100755
--- a/tests/it_url.t
+++ b/tests/it_url.t
@@ -88,21 +88,21 @@ is(
 $url = new it_url('http://www.gna.ch/');
 $page = $url->get();
 ok(
-	strpos($page, '</html>'), #UTF8SAFE
+	strpos($page, '</html>'), # UTF8SAFE
 	'$url->get with url in constructor'
 );
 
 $url = new it_url('http://bogus.url');
 $page = $url->get('http://www.gna.ch/');
 ok(
-	strpos($page, '</html>'), #UTF8SAFE
+	strpos($page, '</html>'), # UTF8SAFE
 	'$url->get(url) with url as string arg'
 );
 
 $url = new it_url('http://bogus.url');
 $page = $url->get(array('url' => 'http://www.gna.ch/'));
 ok(
-	strpos($page, '</html>'), #UTF8SAFE
+	strpos($page, '</html>'), # UTF8SAFE
 	'$url->get(\'url\' => url) with url as named arg'
 );
 is(
@@ -119,13 +119,13 @@ is(
 unset($url, $page);
 $page = it_url::get('http://www.gna.ch/');
 ok(
-	strpos($page, '</html>'), #UTF8SAFE
+	strpos($page, '</html>'), # UTF8SAFE
 	'it_url::get() static call'
 );
 
 $pages = it_url::get_multi('urls' => array('a' => 'http://www.gna.ch/', 'b' => 'http://search.ch/'));
-ok(strpos($pages['a'], '</html>'), 'it_url::get_multi got first url'); #UTF8SAFE
-ok(strpos($pages['b'], '</html>'), 'it_url::get_multi got second url'); #UTF8SAFE
+ok(strpos($pages['a'], '</html>'), 'it_url::get_multi got first url'); # UTF8SAFE
+ok(strpos($pages['b'], '</html>'), 'it_url::get_multi got second url'); # UTF8SAFE
 is(count($pages), 2, 'it_url::get_multi no additional array elements');
 
 ?>
-- 
cgit v1.2.3


From b1c0b4946572027c8de564730a89ec584c830bf3 Mon Sep 17 00:00:00 2001
From: Christian Schneider
Date: Wed, 28 Mar 2012 13:00:39 +0000
Subject: Added it::any2utf8, fixed it::replace fast path to add u modified,
 added error reporting for invalid utf-8 input to it::match and it::replace

---
 tests/it_html.t | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tests')

diff --git a/tests/it_html.t b/tests/it_html.t
index b1d271e..770d11a 100755
--- a/tests/it_html.t
+++ b/tests/it_html.t
@@ -153,6 +153,7 @@ is(it_html::entity_decode("&#65;"),   "A");
 #
 
 it_html::configure(array('charset' => "iso-8859-1"));
+ini_set('default_charset', "iso-8859-1");
 
 is(
 	it_html::sanitize('q&uuml;x'),
-- 
cgit v1.2.3


From 2c7860119626cdf65fa4d7de38b7e0a1fdb816df Mon Sep 17 00:00:00 2001
From: Christian Schneider
Date: Wed, 28 Mar 2012 13:05:31 +0000
Subject: Added tests for it::replace with nbsp, it::any2utf8

---
 tests/it.t | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'tests')

diff --git a/tests/it.t b/tests/it.t
index 317b8a3..408d84a 100755
--- a/tests/it.t
+++ b/tests/it.t
@@ -304,6 +304,7 @@ is(it::replace(array('\w' => "x", '#' => "!"), "#Ã¶"), "!x");
 is(it::replace(array('#' => "!", '\w' => "x"), "#Ã¶"), "!x");
 is(it::replace(array('Ã¶' => "x"), "Ã–"), "x");
 is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa");
+is(it::replace(array('\s' => "x"), it_html::entity_decode("&nbsp;")), "x", "match non-breaking space as white-space character");
 
 is(it::grep('ismatch', array('ismatch', 'isnomatch')), array('ismatch'),  'grep with simple regex');
 is(it::grep('!', array('ismatch!', 'isnomatch')),      array('ismatch!'), '! in regex');
@@ -350,4 +351,18 @@ is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtot
 is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii');
 is(it::substr_replace('âœ”â˜¯â™¥', 'â˜ƒâ˜ƒ', 1, 1), 'âœ”â˜ƒâ˜ƒâ™¥', 'it::substr_replace for utf-8');
 
+is(it::any2utf8('Meier'), 'Meier', "it::any2utf8 ascii input");
+is(it::any2utf8('MÃ¼ller'), 'MÃ¼ller', "it::any2utf8 utf8 input");
+is(it::any2utf8('AslÄ±'), 'AslÄ±', "it::any2utf8 utf8 non-latin1 input");
+is(it::any2utf8(utf8_decode('MÃ¼ller')), 'MÃ¼ller', "it::any2utf8 latin1 input");
+
+is(it::any2utf8(
+	' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~Â Â¡Â¢Â£Â¤Â¥Â¦Â§Â¨Â©ÂªÂ«Â¬Â­Â®Â¯Â°Â±Â²Â³Â´ÂµÂ¶Â·Â¸Â¹ÂºÂ»Â¼Â½Â¾Â¿Ã€ÃÃ‚ÃƒÃ„Ã…Ã†Ã‡ÃˆÃ‰ÃŠÃ‹ÃŒÃÃŽÃÃÃ‘Ã’Ã“Ã”Ã•Ã–Ã—Ã˜Ã™ÃšÃ›ÃœÃÃžÃŸÃ Ã¡Ã¢Ã£Ã¤Ã¥Ã¦Ã§Ã¨Ã©ÃªÃ«Ã¬Ã­Ã®Ã¯Ã°Ã±Ã²Ã³Ã´ÃµÃ¶Ã·Ã¸Ã¹ÃºÃ»Ã¼Ã½Ã¾Ã¿'),
+	' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~Â Â¡Â¢Â£Â¤Â¥Â¦Â§Â¨Â©ÂªÂ«Â¬Â­Â®Â¯Â°Â±Â²Â³Â´ÂµÂ¶Â·Â¸Â¹ÂºÂ»Â¼Â½Â¾Â¿Ã€ÃÃ‚ÃƒÃ„Ã…Ã†Ã‡ÃˆÃ‰ÃŠÃ‹ÃŒÃÃŽÃÃÃ‘Ã’Ã“Ã”Ã•Ã–Ã—Ã˜Ã™ÃšÃ›ÃœÃÃžÃŸÃ Ã¡Ã¢Ã£Ã¤Ã¥Ã¦Ã§Ã¨Ã©ÃªÃ«Ã¬Ã­Ã®Ã¯Ã°Ã±Ã²Ã³Ã´ÃµÃ¶Ã·Ã¸Ã¹ÃºÃ»Ã¼Ã½Ã¾Ã¿',
+	"it::any2utf8 utf8 input (exhaustive alphabet)"); 
+is(it::any2utf8(
+	utf8_decode(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~Â Â¡Â¢Â£Â¤Â¥Â¦Â§Â¨Â©ÂªÂ«Â¬Â­Â®Â¯Â°Â±Â²Â³Â´ÂµÂ¶Â·Â¸Â¹ÂºÂ»Â¼Â½Â¾Â¿Ã€ÃÃ‚ÃƒÃ„Ã…Ã†Ã‡ÃˆÃ‰ÃŠÃ‹ÃŒÃÃŽÃÃÃ‘Ã’Ã“Ã”Ã•Ã–Ã—Ã˜Ã™ÃšÃ›ÃœÃÃžÃŸÃ Ã¡Ã¢Ã£Ã¤Ã¥Ã¦Ã§Ã¨Ã©ÃªÃ«Ã¬Ã­Ã®Ã¯Ã°Ã±Ã²Ã³Ã´ÃµÃ¶Ã·Ã¸Ã¹ÃºÃ»Ã¼Ã½Ã¾Ã¿')),
+	' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~Â Â¡Â¢Â£Â¤Â¥Â¦Â§Â¨Â©ÂªÂ«Â¬Â­Â®Â¯Â°Â±Â²Â³Â´ÂµÂ¶Â·Â¸Â¹ÂºÂ»Â¼Â½Â¾Â¿Ã€ÃÃ‚ÃƒÃ„Ã…Ã†Ã‡ÃˆÃ‰ÃŠÃ‹ÃŒÃÃŽÃÃÃ‘Ã’Ã“Ã”Ã•Ã–Ã—Ã˜Ã™ÃšÃ›ÃœÃÃžÃŸÃ Ã¡Ã¢Ã£Ã¤Ã¥Ã¦Ã§Ã¨Ã©ÃªÃ«Ã¬Ã­Ã®Ã¯Ã°Ã±Ã²Ã³Ã´ÃµÃ¶Ã·Ã¸Ã¹ÃºÃ»Ã¼Ã½Ã¾Ã¿',
+	"it::any2utf8 latin1 input (exhaustive alphabet)"); 
+
 ?>
-- 
cgit v1.2.3


From a7ac24d9f9698ff0a1d42268d79f74cd21f70eb4 Mon Sep 17 00:00:00 2001
From: Christian Schneider
Date: Wed, 28 Mar 2012 13:40:12 +0000
Subject: Fix it::ucwords and added tests for it::ucfirst and it::ucwords

---
 tests/it.t | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tests')

diff --git a/tests/it.t b/tests/it.t
index 408d84a..8e4a7e2 100755
--- a/tests/it.t
+++ b/tests/it.t
@@ -347,6 +347,10 @@ is(it::date('datetime', 1000000.543), it::date('datetime', "1000000"), '... larg
 is(it::date('time', "10.5"), "10:05", 'interpret string with points with strtotime');
 is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtotime');
 
+# it::uc*
+is(it::ucfirst('foo bÃ¤r Ã¼ber'), 'Foo bÃ¤r Ã¼ber');
+is(it::ucwords('foo bÃ¤r Ã¼ber'), 'Foo BÃ¤r Ãœber');
+
 # it::substr_replace
 is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii');
 is(it::substr_replace('âœ”â˜¯â™¥', 'â˜ƒâ˜ƒ', 1, 1), 'âœ”â˜ƒâ˜ƒâ™¥', 'it::substr_replace for utf-8');
-- 
cgit v1.2.3


From a60fb69ff8e7755ab969298ab85a2b63bbcc8b14 Mon Sep 17 00:00:00 2001
From: Christian Weber
Date: Fri, 30 Mar 2012 16:13:15 +0000
Subject: add GREEN HEART unit test for it_html::fix_encoding()

---
 tests/it_html.t | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tests')

diff --git a/tests/it_html.t b/tests/it_html.t
index 770d11a..2323e5e 100755
--- a/tests/it_html.t
+++ b/tests/it_html.t
@@ -181,4 +181,6 @@ is(it_html::fix_encoding(utf8_encode("Ã©Â»"), true), "Ã©Â»", "it_html::fix_encod
 
 is(it_html::fix_encoding(utf8_decode("MÃ¼ller"), true), "MÃ¼ller", "it_html::fix_encoding incorrectly encoded latin1");
 
+is(it_html::fix_encoding("aðŸ’šb"), "aðŸ’šb", "it_html::fix_encoding incorrectly encoded GREEN HEART");
+
 ?>
-- 
cgit v1.2.3


From 3242f516a6c3e89fa898d96927746447e15d7750 Mon Sep 17 00:00:00 2001
From: Christian Weber
Date: Fri, 30 Mar 2012 16:17:08 +0000
Subject: correct GREEN HEART test description

---
 tests/it_html.t | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tests')

diff --git a/tests/it_html.t b/tests/it_html.t
index 2323e5e..174c487 100755
--- a/tests/it_html.t
+++ b/tests/it_html.t
@@ -181,6 +181,6 @@ is(it_html::fix_encoding(utf8_encode("Ã©Â»"), true), "Ã©Â»", "it_html::fix_encod
 
 is(it_html::fix_encoding(utf8_decode("MÃ¼ller"), true), "MÃ¼ller", "it_html::fix_encoding incorrectly encoded latin1");
 
-is(it_html::fix_encoding("aðŸ’šb"), "aðŸ’šb", "it_html::fix_encoding incorrectly encoded GREEN HEART");
+is(it_html::fix_encoding("aðŸ’šb"), "aðŸ’šb", "it_html::fix_encoding correctly handles 4-byte utf-8 character GREEN HEART");
 
 ?>
-- 
cgit v1.2.3