summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorChristian Helbling2012-04-04 09:25:06 +0000
committerChristian Helbling2012-04-04 09:25:06 +0000
commita0231e4df6f8bfa82451c05998bb349ece764610 (patch)
tree6c1d9e76e8a6388fdf7f1d2435f0da082bd7de62 /tests
parent1336ce6ac3baacd1cecbf776cfafa81f8d025272 (diff)
parent14f1c25b43628013507da706544d5c55cb1bc461 (diff)
downloaditools-a0231e4df6f8bfa82451c05998bb349ece764610.tar.gz
itools-a0231e4df6f8bfa82451c05998bb349ece764610.tar.bz2
itools-a0231e4df6f8bfa82451c05998bb349ece764610.zip
Merged devel-utf8 back into live
Diffstat (limited to 'tests')
-rwxr-xr-xtests/exec.t2
-rwxr-xr-xtests/getopt.t2
-rwxr-xr-xtests/it.t335
-rwxr-xr-xtests/it_html.t59
-rwxr-xr-xtests/it_url.t24
-rwxr-xr-xtests/it_xml.t35
6 files changed, 287 insertions, 170 deletions
diff --git a/tests/exec.t b/tests/exec.t
index 689bd26..9425420 100755
--- a/tests/exec.t
+++ b/tests/exec.t
@@ -11,7 +11,7 @@ is(it::shell_command("echo {arg}", array('arg' => '')), "echo ''", "quote empty
foreach (array("", "C", "de_CH", "de_CH.utf8") as $locale) {
setlocale(LC_ALL, $locale);
- $arg = "prepost";
+ $arg = "preüpost";
if (it::match('utf8', $locale))
$arg = utf8_encode($arg);
is(it::exec("echo " . $arg), $arg . "\n", "exec with umlaut (locale '$locale')");
diff --git a/tests/getopt.t b/tests/getopt.t
index 7a84588..d67738d 100755
--- a/tests/getopt.t
+++ b/tests/getopt.t
@@ -17,7 +17,7 @@ function getopt_ok($argv, $exp, $name)
return is($got['argument'], $exp, $name);
}
-foreach (array("" => "blah gnaber", " (umlaute)" => "pre post") as $variant => $testarg) {
+foreach (array("" => "blah gnaber", " (umlaute)" => "pre üäpost") as $variant => $testarg) {
getopt_ok(array('-a', $testarg), $testarg, "Short version" . $variant);
getopt_ok(array('--argument', $testarg), $testarg, "Long version with space" . $variant);
getopt_ok(array("--argument=$testarg"), $testarg, "Long version with equal" . $variant);
diff --git a/tests/it.t b/tests/it.t
index 1a308ec..8e4a7e2 100755
--- a/tests/it.t
+++ b/tests/it.t
@@ -3,252 +3,317 @@
# Tests for it.class
-function match($regex, $string, $expect, $name)
+
+#
+# tests for it::match()
+#
+$oldcharset = ini_get('default_charset');
+$oldlocale = setlocale(LC_CTYPE, 0);
+
+ini_set('default_charset', 'utf-8');
+setlocale(LC_CTYPE, 'de_CH'); # required becuase we're checking German umlauts in latin1 mode
+
+
+function match($regex, $string, $expect, $name, $p = array())
{
$GLOBALS['TEST_MORE_LEVEL'] = 1;
- $pass = is (it::match($regex, $string), $expect, $name);
+ $pass = is (it::match($regex, $string, $p), $expect, $name);
if (!$pass) {
- diag(" regex given: $regex");
+ diag(" regex given: $regex" . ($p ? " " .D($p) : ""));
diag(" regex converted: " . it::convertregex($regex));
}
$GLOBALS['TEST_MORE_LEVEL'] = 0;
}
+
match(
'b', 'aaaabaaaa',
'b',
'simple regex'
- );
+);
+
match(
'a/b', ' a/b ',
'a/b',
'regex with /'
);
+
match(
'aa(bb)aa(cc)aa(dd)qq', 'aabbaaccaaddqq',
- array( 'bb', 'cc', 'dd' ),
+ array('bb', 'cc', 'dd'),
'return array of captures'
- );
+);
+
match(
'\bblah\b', ' blah ',
'blah',
'match \b at spaces'
- );
+);
+
match(
'\bblah\b', 'blah',
'blah',
'match \b at end of string'
- );
+);
+
match(
'\bblah\b', 'ablahc',
false,
'don\'t match \b at word chars'
- );
+);
+
match(
- '\bblah\b', 'blah',
+ '\bblah\b', 'Üblahä',
false,
- 'don\'t match \b at umlaute in latin1'
- );
+ 'don\'t match \b at umlaute'
+);
+
match(
'\Bblah\B', ' blah ',
false,
'don\'t match \B at spaces'
- );
+);
+
match(
'\Bblah\B', 'blah',
false,
'don\'t match \B at end of string'
- );
+);
+
match(
'\Bblah\B', 'ablahc',
'blah',
'match \B at word chars'
- );
+);
+
match(
- '\Bblah\B', 'blah',
+ '\Bblah\B', 'Üblahä',
'blah',
- 'match \B at umlaute in latin1'
- );
+ 'match \B at umlaute'
+);
+
match(
- '\w+', ' |#blah ',
- 'blah',
+ '\w+', ' |#Üblahä ',
+ 'Üblahä',
'include umlaute in \w'
- );
+);
+
match(
- '[[:alpha:]]+', ' |#blah ',
- 'blah',
+ '[[:alpha:]]+', ' |#blahä ',
+ 'blahä',
'include umlaute in [[:alpha:]]'
- );
+);
+
match(
- '\W+', ' |#blah ',
+ '\W+', ' |#Üblahä ',
' |#',
'don\'t include umlaute in \W'
- );
+);
+
match(
- '\ba', 'a',
+ '\ba', 'äa',
'',
'\b must know umlauts'
- );
+);
-eval( '$escapedwordregex = "' . it::convertregex( '\w' ) . '";' );
-$escapedwordregex = preg_replace( '|[\\\\/]|', '', $escapedwordregex );
+match(
+ 'aaa\\\\w+', ' aaa\www ',
+ 'aaa\www',
+ 'don\'t parse \w in \\\\w at beginning (match)'
+);
+
+match(
+ 'aaa\\\\w+', ' aaa\www ',
+ 'aaa\www',
+ 'don\'t parse \w in \\\\w after chars (match)'
+);
+
+eval('$escapedwordregex = "' . it::convertregex('\w') . '";');
+$escapedwordregex = preg_replace('|[\\\\/]|', '', $escapedwordregex);
match(
'\\\\w+', $escapedwordregex,
false,
'don\'t parse \w in \\\\w at beginning (no match)'
- );
-match(
- 'aaa\\\\w+', ' aaa\www ',
- 'aaa\www',
- 'don\'t parse \w in \\\\w at beginning (match)'
- );
+);
+
match(
'aaa\\\\w+', 'aaa' . $escapedwordregex,
false,
'don\'t parse \w in \\\\w after chars (no match)'
- );
-match(
- 'aaa\\\\w+', ' aaa\www ',
- 'aaa\www',
- 'don\'t parse \w in \\\\w after chars (match)'
- );
+);
+
match(
'\\\\\\\\w+', '\\' . $escapedwordregex,
false,
'don\'t parse \w in \\\\\\\w (no match)'
- );
+);
+
match(
'\\\\\\\\w+', ' \\\\www ',
'\\\\www',
'don\'t parse \\\\\\\\w as \w (match)'
- );
+);
+
match(
'[\w]+', '[[[]]]---',
false,
'replace \w in [\w] correctly (no match)'
- );
+);
+
match(
'[\w]+', ' \\\\aword[[[]]] ',
'aword',
'replace \w in [\w] correctly (match)'
- );
+);
+
match(
'[\\\\w]+', ' blabergna ',
false,
'don\'t parse \w in [\\\\w] (no match)'
- );
+);
+
match(
'[\\\\w]+', ' \\\\worda[[[]',
'\\\\w',
'don\'t parse \w in [\\\\w] (match)'
- );
+);
+
match(
'[a\W]+', 'bbbbbbb a a%$+ accccc',
' a a%$+ a',
'\W in []'
- );
+);
+
match(
- '\\\\\\w+', ' \blah ',
- '\blah',
+ '\\\\\\w+', ' \Üblahä ',
+ '\Üblahä',
'parse \w in \\\\\\w at beginning'
- );
+);
+
match(
- 'aaa\\\\\\w+', ' aaa\blah ',
- 'aaa\blah',
+ 'aaa\\\\\\w+', ' aaa\Üblahä ',
+ 'aaa\Üblahä',
'parse \w in \\\\\\w after chars'
- );
-is(
- it::replace(
- array(
- 'regex1' => 'repl1',
- 'regex2' => 'repl2',
- 'regex3' => 'repl3' ),
- 'regex2 regex1 regex3' ),
- 'repl2 repl1 repl3',
- 'test tr regex function'
- );
-is(
- it::match( '\w+', 'word1 wrd2 word_3', array('all' => true )),
- array( 'word1', 'wrd2', 'word_3' ),
- "test match_all function"
- );
+);
+
+match(
+ '\w+', 'word1 wörd2 word_3',
+ array('word1', 'wörd2', 'word_3'),
+ "test match_all function",
+ array('all' => true)
+);
+
match(
'aBcD', ' aBcD ',
'aBcD',
"caseinsensitive is default"
- );
+);
+
match(
- '', '',
- '',
- 'match umlaute in latin1 case insensitive'
- );
+ '\w+', 'Müller',
+ 'Müller',
+ '\w matches umlaut in utf-8 mode'
+);
-is(
- it::match(utf8_encode('aB'), utf8_encode("Ab"), array('utf8' => true)),
- utf8_encode('Ab'),
- "match utf-8 umlaute in case insensitive"
+match(
+ 'M.ller', 'Müller',
+ 'Müller',
+ '. matches umlaut in utf-8 mode'
);
-$oldcharset = ini_get('default_charset');
-ini_set('default_charset', 'utf-8');
match(
- utf8_encode('aB'), utf8_encode('Ab'),
- utf8_encode('Ab'),
- "match utf-8 umlaute in case insensitive using default_charset"
+ utf8_decode('ö'), utf8_decode('Ö'),
+ utf8_decode('Ö'),
+ 'match umlaute in de_CH.latin1 case insensitive',
+ array('utf8' => false)
);
-is(
- it::match('aB', 'Ab', array('utf8' => false)),
- 'Ab',
- "non-utf-8 override with default_charset=utf-8"
+
+match(
+ utf8_decode('aöBÜ'), utf8_decode('AÖbü'),
+ utf8_decode('AÖbü'),
+ "match umlaute with non-utf-8 override in p",
+ array('utf8' => false)
);
+
+
match(
- '\w+', utf8_encode('Mller'),
- utf8_encode('Mller'),
- '\w matches umlaut in utf-8 mode'
+ 'abc', "aBc",
+ false,
+ "set case sensitivity by parameter",
+ array('casesensitive' => 1),
);
+
match(
- 'M.ller', utf8_encode('Mller'),
- utf8_encode('Mller'),
- '. matches umlaut in utf-8 mode'
+ '\w+', 'word1 wörd2 word_3',
+ array('word1', 'wörd2', 'word_3'),
+ "test all => 1 without captures",
+ array('all' => 1)
);
-ini_set('default_charset', $oldcharset);
-is(
- it::match( 'abc', "aBc", array('casesensitive' => 1 )),
- false,
- "set case sensitivity by parameter"
- );
+match(
+ '\w+\s+(\d+)', 'word1 12 wörd2 3 word_3 4',
+ array('12', '3', '4'),
+ "test all => 1 with one capture",
+ array('all' => 1)
+);
+match(
+ '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4',
+ array(array('word1', '12'), array('wörd2', '3'), array('word_3', '4')),
+ "test all => 1 with captures",
+ array('all' => 1)
+);
+
+match(
+ '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4',
+ array(array('word1', 'wörd2', 'word_3'), array('12', '3', '4')),
+ "test all => 1,pattern_order => 1",
+ array('all' => 1, 'pattern_order' => 1)
+);
+
+ini_set('default_charset', 'iso-8859-1');
+match(
+ 'aöBÜ', "AÖbü",
+ 'AÖbü',
+ "match utf-8 umlaute in case insensitive mode with utf8 override",
+ array('utf8' => true)
+);
+ini_set('default_charset', 'utf-8');
+
+
+#
+# tests for it::replace()
+#
is(
- it::match( '\w+', 'word1 wrd2 word_3', array('all' => 1 )),
- array( 'word1', 'wrd2', 'word_3' ),
- "test all=>1 without captures"
- );
-is(
- it::match( '\w+\s+(\d+)', 'word1 12 wrd2 3 word_3 4', array('all' => 1 )),
- array( '12', '3', '4' ),
- "test all=>1 with one capture"
- );
-is(
- it::match( '(\w+)\s+(\d+)', 'word1 12 wrd2 3 word_3 4', array('all' => 1 )),
- array( array( 'word1', '12' ), array( 'wrd2', '3' ), array( 'word_3', '4' ) ),
- "test all=>1 with captures"
- );
-is(
- it::match( '(\w+)\s+(\d+)', 'word1 12 wrd2 3 word_3 4', array('all' => 1, 'pattern_order' => 1 )),
- array( array( 'word1', 'wrd2', 'word_3' ), array( '12', '3', '4' ) ),
- "test all=>1,pattern_order=>1"
- );
+ it::replace(
+ array(
+ 'regex1' => 'repl1',
+ 'regex2' => 'repl2',
+ 'regex3' => 'repl3'),
+ 'regex2 regex1 regex3'),
+ 'repl2 repl1 repl3',
+ 'test tr regex function'
+);
is(it::replace(array('a' => "1", 'b' => "2"), "ab"), "12");
is(it::replace(array('!' => "x"), "!"), "x");
-is(it::replace(array('\w' => "x"), "o"), "xx");
-is(it::replace(array('[[:alpha:]]' => "x"), ""), "x");
-is(it::replace(array('\w' => "x", '#' => "!"), "#"), "!x");
-is(it::replace(array('#' => "!", '\w' => "x"), "#"), "!x");
-is(it::replace(array('' => "x"), ""), "x");
+is(it::replace(array('\w' => "x"), "oö"), "xx");
+is(it::replace(array('[[:alpha:]]' => "x"), "ö"), "x");
+is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x");
+is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x");
+is(it::replace(array('ö' => "x"), "Ö"), "x");
is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa");
+is(it::replace(array('\s' => "x"), it_html::entity_decode(" ")), "x", "match non-breaking space as white-space character");
+
+is(it::grep('ismatch', array('ismatch', 'isnomatch')), array('ismatch'), 'grep with simple regex');
+is(it::grep('!', array('ismatch!', 'isnomatch')), array('ismatch!'), '! in regex');
+is(it::grep('lower|UPPER', array('lower', 'LOWER', 'upper', 'UPPER'), 'casesensitive' => 1), array(0 => 'lower', 3 => 'UPPER'), 'set casesensitive');
+is(it::grep('match', array('foo' => 'match', 'bar' => 'gna')), array('foo' => 'match'), 'with keys');
+
+setlocale(LC_CTYPE, $oldlocale);
+ini_set('default_charset', $oldcharset); # end of tests that must run with specific charset
+
# it::filter_keys tests
@@ -282,4 +347,26 @@ is(it::date('datetime', 1000000.543), it::date('datetime', "1000000"), '... larg
is(it::date('time', "10.5"), "10:05", 'interpret string with points with strtotime');
is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtotime');
+# it::uc*
+is(it::ucfirst('foo bär über'), 'Foo bär über');
+is(it::ucwords('foo bär über'), 'Foo Bär Über');
+
+# it::substr_replace
+is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii');
+is(it::substr_replace('✔☯♥', '☃☃', 1, 1), '✔☃☃♥', 'it::substr_replace for utf-8');
+
+is(it::any2utf8('Meier'), 'Meier', "it::any2utf8 ascii input");
+is(it::any2utf8('Müller'), 'Müller', "it::any2utf8 utf8 input");
+is(it::any2utf8('Aslı'), 'Aslı', "it::any2utf8 utf8 non-latin1 input");
+is(it::any2utf8(utf8_decode('Müller')), 'Müller', "it::any2utf8 latin1 input");
+
+is(it::any2utf8(
+ ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'),
+ ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ',
+ "it::any2utf8 utf8 input (exhaustive alphabet)");
+is(it::any2utf8(
+ utf8_decode(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ')),
+ ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ',
+ "it::any2utf8 latin1 input (exhaustive alphabet)");
+
?>
diff --git a/tests/it_html.t b/tests/it_html.t
index 0def431..174c487 100755
--- a/tests/it_html.t
+++ b/tests/it_html.t
@@ -4,6 +4,7 @@
# Tests for html.class
# Traditional html generation
+ini_set('default_charset', "utf-8");
new it_html(array('htmltype' => "html"));
is(
@@ -95,30 +96,22 @@ is(
<P><a href="javascript:window.close()" title="Wolken"><img src="http://farm1.static.flickr.com/177/377214376_bcba167a7d_m.jpg" width="240" height="180" alt="Wolken" style="border: 1px solid #ddd;" /></a></p>
'),
- ' <a href="http://www.flickr.com/people/swisspics%25/">swisspics</a> posted &lt; &lt; &amp; yesterday a <i>photo</i> <i>tag missmatch</i>:<br /><br /> <p><img src="http://farm1.static.flickr.com/177/377214376_bcba167a7d_m.jpg" alt="" /></p> ',
+ ' <a href="http://www.flickr.com/people/swisspics%25/">swisspics</a> posted &lt; &lt; ä &amp; yesterday a <i>photo</i> <i>tag missmatch</i>:<br /><br /> <p><img src="http://farm1.static.flickr.com/177/377214376_bcba167a7d_m.jpg" alt="" /></p> ',
'it_html::sanitize tag soup'
);
is(
it_html::sanitize('q&#8592;x'),
- "q&#8592;x",
+ "q←x",
'it_html::sanitize preserve numeric entities'
);
-it_html::configure(array('charset' => "utf-8"));
is(
it_html::sanitize('q&uuml;x'),
"q\xc3\xbcx",
'it_html::sanitize with utf-8'
);
-it_html::configure(array('charset' => "iso-8859-1"));
-is(
- it_html::sanitize('q&uuml;x'),
- "q\xfcx",
- 'it_html::sanitize with latin1'
-);
-
is(
it_html::sanitize('<b>a<br>b</b>'),
"<b>a<br />b</b>",
@@ -126,8 +119,8 @@ is(
);
is(
- U("/foo.html", array('bar' => array('gna' => 42, 'qux' => array('quux' => "<Zrich>", 'gnp' => "fasel")))),
- '/foo.html?bar[gna]=42&bar[qux][quux]=%3CZ%FCrich%3E&bar[qux][gn%F6p]=fasel',
+ U("/foo.html", array('bar' => array('gna' => 42, 'qux' => array('quux' => "<Zürich>", 'gnöp' => "fasel")))),
+ '/foo.html?bar[gna]=42&bar[qux][quux]=%3CZ%C3%BCrich%3E&bar[qux][gn%C3%B6p]=fasel',
'U() with nested arrays'
);
@@ -149,11 +142,45 @@ is(
'U() converting of \ to /'
);
-is(it_html::entity_decode("&auml;"), "");
-is(it_html::entity_decode("&#8217;"), "'");
+is(it_html::entity_decode("&auml;"), "ä");
is(it_html::entity_decode("&#x4a;"), "J");
is(it_html::entity_decode("&#x4A;"), "J");
-is(it_html::entity_decode("&#xfff;"), " ");
is(it_html::entity_decode("&#65;"), "A");
-is(it_html::entity_decode("&#999;"), " ");
+
+
+#
+# check transliterations in iso-8859-1
+#
+
+it_html::configure(array('charset' => "iso-8859-1"));
+ini_set('default_charset', "iso-8859-1");
+
+is(
+ it_html::sanitize('q&uuml;x'),
+ "q\xfcx",
+ 'it_html::sanitize with latin1'
+);
+
+is(
+ it_html::sanitize('q&#8592;x'),
+ "q&#8592;x",
+ 'it_html::sanitize preserve non-decodable numeric entities'
+);
+is(it_html::entity_decode("&#8217;"), "'", "it_html::entity_decode numeric decimal entity");
+is(it_html::entity_decode("&#xfff;"), " ", "it_html::entity_decode invalid numeric hex entity");
+is(it_html::entity_decode("&#999;"), " ", "it_html::entity_decode invalid numeric decimal entity");
+
+is(it_html::fix_encoding("Meier"), "Meier", "it_html::fix_encoding ascii");
+is(it_html::fix_encoding("Müller"), "Müller", "it_html::fix_encoding utf-8 latin1");
+is(it_html::fix_encoding("Aslı"), "Aslı", "it_html::fix_encoding utf-8 non-latin1");
+is(it_html::fix_encoding("é»"), "é»", "it_html::fix_encoding utf-8 latin1 special combination");
+
+is(it_html::fix_encoding(utf8_encode("Müller"), true), "Müller", "it_html::fix_encoding double encoded latin1");
+is(it_html::fix_encoding(utf8_encode("Aslı"), true), "Aslı", "it_html::fix_encoding double encoded non-latin1");
+is(it_html::fix_encoding(utf8_encode("é»"), true), "é»", "it_html::fix_encoding double encoded latin1 special combination");
+
+is(it_html::fix_encoding(utf8_decode("Müller"), true), "Müller", "it_html::fix_encoding incorrectly encoded latin1");
+
+is(it_html::fix_encoding("a💚b"), "a💚b", "it_html::fix_encoding correctly handles 4-byte utf-8 character GREEN HEART");
+
?>
diff --git a/tests/it_url.t b/tests/it_url.t
index e8507aa..10fd015 100755
--- a/tests/it_url.t
+++ b/tests/it_url.t
@@ -87,25 +87,22 @@ is(
$url = new it_url('http://www.gna.ch/');
$page = $url->get();
-is(
- it::match('(</html>)', $page),
- '</html>',
+ok(
+ strpos($page, '</html>'), # UTF8SAFE
'$url->get with url in constructor'
);
$url = new it_url('http://bogus.url');
$page = $url->get('http://www.gna.ch/');
-is(
- it::match('(</html>)', $page),
- '</html>',
+ok(
+ strpos($page, '</html>'), # UTF8SAFE
'$url->get(url) with url as string arg'
);
$url = new it_url('http://bogus.url');
$page = $url->get(array('url' => 'http://www.gna.ch/'));
-is(
- it::match('(</html>)', $page),
- '</html>',
+ok(
+ strpos($page, '</html>'), # UTF8SAFE
'$url->get(\'url\' => url) with url as named arg'
);
is(
@@ -121,15 +118,14 @@ is(
unset($url, $page);
$page = it_url::get('http://www.gna.ch/');
-is(
- it::match('(</html>)', $page),
- '</html>',
+ok(
+ strpos($page, '</html>'), # UTF8SAFE
'it_url::get() static call'
);
$pages = it_url::get_multi('urls' => array('a' => 'http://www.gna.ch/', 'b' => 'http://search.ch/'));
-ok(it::match('</html>', $pages['a']), 'it_url::get_multi got first url');
-ok(it::match('</html>', $pages['b']), 'it_url::get_multi got second url');
+ok(strpos($pages['a'], '</html>'), 'it_url::get_multi got first url'); # UTF8SAFE
+ok(strpos($pages['b'], '</html>'), 'it_url::get_multi got second url'); # UTF8SAFE
is(count($pages), 2, 'it_url::get_multi no additional array elements');
?>
diff --git a/tests/it_xml.t b/tests/it_xml.t
index f74c54b..d74fadf 100755
--- a/tests/it_xml.t
+++ b/tests/it_xml.t
@@ -5,13 +5,14 @@
function match($xmldata, $expected, $name, $prefix = "", $p = array())
{
- $classname = $prefix ? ($prefix . "_xml") : "it_xml";
+ $classname = ($prefix ?: "it") . "_xml";
$varname = $prefix . "foo";
$xmldata = "<root>$xmldata</root>";
$xml = new $classname($xmldata, $p);
+ $mod_utf8 = $p['encoding'] != "iso-8859-1" ? "u" : "";
is(
- preg_replace('/[#\s]+/', " ", print_r($xml->$varname, true)),
+ preg_replace('/[#\s]+/' . $mod_utf8, " ", print_r($xml->$varname, true)),
$expected,
"$name (string)"
);
@@ -24,11 +25,10 @@ function match($xmldata, $expected, $name, $prefix = "", $p = array())
fclose($tmpfile);
is(
- preg_replace('/[#\s]+/', " ", print_r($xml->$varname, true)),
+ preg_replace('/[#\s]+/' . $mod_utf8, " ", print_r($xml->$varname, true)),
$expected,
"$name (file)"
);
-
}
match(
@@ -44,8 +44,8 @@ match(
);
match(
- '<foo title="Zrich">Stssihofstadt</foo>',
- 'foo Object ( [attr] => Array ( [title] => Zrich ) [val] => Stssihofstadt ) ',
+ '<foo title="Zürich">Stüssihofstadt</foo>',
+ 'foo Object ( [attr] => Array ( [title] => Zürich ) [val] => Stüssihofstadt ) ',
'simple tag with latin1 content and attribute'
);
@@ -62,26 +62,33 @@ match(
);
match(
- '<foo>&amp;amp; &lt;a&gt; &#38;amp; &#60;b&#62; &#x26;amp; &#x3C;c&#x3E; &uuml;</foo>',
- 'foo Object ( [val] => &amp; <a> &amp; <b> &amp; <c> ) ',
- 'Predecode illegal entities while keeping properly encoded ones'
+ '<foo>x &uuml; y</foo>',
+ utf8_decode('foo Object ( [val] => x ü y ) '),
+ 'Manual encoding override',
+ "",
+ array('encoding' => "iso-8859-1")
);
match(
'<foo>&amp;amp; &lt;a&gt; &#38;amp; &#60;b&#62; &#x26;amp; &#x3C;c&#x3E; &uuml;</foo>',
- utf8_encode('foo Object ( [val] => &amp; <a> &amp; <b> &amp; <c> ) '),
- 'Predecode illegal entities while keeping properly encoded ones (UTF-8)',
- "",
- array('encoding' => "UTF-8")
+ 'foo Object ( [val] => &amp; <a> &amp; <b> &amp; <c> ü ) ',
+ 'Predecode illegal entities while keeping properly encoded ones',
);
+match(
+ '<foo>&amp;amp; &lt;a&gt; &#38;amp; &#60;b&#62; &#x26;amp; &#x3C;c&#x3E; &#xFC;</foo>',
+ utf8_decode('foo Object ( [val] => &amp; <a> &amp; <b> &amp; <c> ü ) '),
+ 'Predecode illegal entities while keeping properly encoded ones (iso-8859-1)',
+ "",
+ array('encoding' => "iso-8859-1")
+);
match(
"<foo>a\x05b</foo>",
'foo Object ( [val] => a b ) ',
'Illegal latin 1 character',
"",
- array('encoding' => "ISO-8859-1")
+ array('encoding' => "iso-8859-1")
);
# Test inheritance