#!/www/server/bin/php -qC <?php # Tests for it.class # # tests for it::match() # $oldcharset = ini_get('default_charset'); $oldlocale = setlocale(LC_CTYPE, 0); ini_set('default_charset', 'utf-8'); setlocale(LC_CTYPE, 'de_CH'); # required becuase we're checking German umlauts in latin1 mode function match($regex, $string, $expect, $name, $p = array()) { $GLOBALS['TEST_MORE_LEVEL'] = 1; $pass = is (it::match($regex, $string, $p), $expect, $name); if (!$pass) { diag(" regex given: $regex" . ($p ? " " .D($p) : "")); diag(" regex converted: " . it::convertregex($regex)); } $GLOBALS['TEST_MORE_LEVEL'] = 0; } match( 'b', 'aaaabaaaa', 'b', 'simple regex' ); match( 'a/b', ' a/b ', 'a/b', 'regex with /' ); match( 'aa(bb)aa(cc)aa(dd)qq', 'aabbaaccaaddqq', array('bb', 'cc', 'dd'), 'return array of captures' ); match( '\bblah\b', ' blah ', 'blah', 'match \b at spaces' ); match( '\bblah\b', 'blah', 'blah', 'match \b at end of string' ); match( '\bblah\b', 'ablahc', false, 'don\'t match \b at word chars' ); match( '\bblah\b', 'Üblahä', false, 'don\'t match \b at umlaute' ); match( '\Bblah\B', ' blah ', false, 'don\'t match \B at spaces' ); match( '\Bblah\B', 'blah', false, 'don\'t match \B at end of string' ); match( '\Bblah\B', 'ablahc', 'blah', 'match \B at word chars' ); match( '\Bblah\B', 'Üblahä', 'blah', 'match \B at umlaute' ); match( '\w+', ' |#Üblahä ', 'Üblahä', 'include umlaute in \w' ); match( '[[:alpha:]]+', ' |#blahä ', 'blahä', 'include umlaute in [[:alpha:]]' ); match( '\W+', ' |#Üblahä ', ' |#', 'don\'t include umlaute in \W' ); match( '\ba', 'äa', '', '\b must know umlauts' ); match( 'aaa\\\\w+', ' aaa\www ', 'aaa\www', 'don\'t parse \w in \\\\w at beginning (match)' ); match( 'aaa\\\\w+', ' aaa\www ', 'aaa\www', 'don\'t parse \w in \\\\w after chars (match)' ); eval('$escapedwordregex = "' . it::convertregex('\w') . '";'); $escapedwordregex = preg_replace('|[\\\\/]|', '', $escapedwordregex); match( '\\\\w+', $escapedwordregex, false, 'don\'t parse \w in \\\\w at beginning (no match)' ); match( 'aaa\\\\w+', 'aaa' . $escapedwordregex, false, 'don\'t parse \w in \\\\w after chars (no match)' ); match( '\\\\\\\\w+', '\\' . $escapedwordregex, false, 'don\'t parse \w in \\\\\\\w (no match)' ); match( '\\\\\\\\w+', ' \\\\www ', '\\\\www', 'don\'t parse \\\\\\\\w as \w (match)' ); match( '[\w]+', '[[[]]]---', false, 'replace \w in [\w] correctly (no match)' ); match( '[\w]+', ' \\\\aword[[[]]] ', 'aword', 'replace \w in [\w] correctly (match)' ); match( '[\\\\w]+', ' blabergna ', false, 'don\'t parse \w in [\\\\w] (no match)' ); match( '[\\\\w]+', ' \\\\worda[[[]', '\\\\w', 'don\'t parse \w in [\\\\w] (match)' ); match( '[a\W]+', 'bbbbbbb a a%$+ accccc', ' a a%$+ a', '\W in []' ); match( '\\\\\\w+', ' \Üblahä ', '\Üblahä', 'parse \w in \\\\\\w at beginning' ); match( 'aaa\\\\\\w+', ' aaa\Üblahä ', 'aaa\Üblahä', 'parse \w in \\\\\\w after chars' ); match( '\w+', 'word1 wörd2 word_3', array('word1', 'wörd2', 'word_3'), "test match_all function", array('all' => true) ); match( 'aBcD', ' aBcD ', 'aBcD', "caseinsensitive is default" ); match( '\w+', 'Müller', 'Müller', '\w matches umlaut in utf-8 mode' ); match( 'M.ller', 'Müller', 'Müller', '. matches umlaut in utf-8 mode' ); match( utf8_decode('ö'), utf8_decode('Ö'), utf8_decode('Ö'), 'match umlaute in de_CH.latin1 case insensitive', array('utf8' => false) ); match( utf8_decode('aöBÜ'), utf8_decode('AÖbü'), utf8_decode('AÖbü'), "match umlaute with non-utf-8 override in p", array('utf8' => false) ); match( 'abc', "aBc", false, "set case sensitivity by parameter", array('casesensitive' => 1), ); match( '\w+', 'word1 wörd2 word_3', array('word1', 'wörd2', 'word_3'), "test all => 1 without captures", array('all' => 1) ); match( '\w+\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array('12', '3', '4'), "test all => 1 with one capture", array('all' => 1) ); match( '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array(array('word1', '12'), array('wörd2', '3'), array('word_3', '4')), "test all => 1 with captures", array('all' => 1) ); match( '(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4', array(array('word1', 'wörd2', 'word_3'), array('12', '3', '4')), "test all => 1,pattern_order => 1", array('all' => 1, 'pattern_order' => 1) ); ini_set('default_charset', 'iso-8859-1'); match( 'aöBÜ', "AÖbü", 'AÖbü', "match utf-8 umlaute in case insensitive mode with utf8 override", array('utf8' => true) ); ini_set('default_charset', 'utf-8'); # # tests for it::replace() # is( it::replace( array( 'regex1' => 'repl1', 'regex2' => 'repl2', 'regex3' => 'repl3'), 'regex2 regex1 regex3'), 'repl2 repl1 repl3', 'test tr regex function' ); is(it::replace(array('a' => "1", 'b' => "2"), "ab"), "12"); is(it::replace(array('!' => "x"), "!"), "x"); is(it::replace(array('\w' => "x"), "oö"), "xx"); is(it::replace(array('[[:alpha:]]' => "x"), "ö"), "x"); is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x"); is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x"); is(it::replace(array('ö' => "x"), "Ö"), "x"); is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa"); is(it::replace(array('\s' => "x"), it_html::entity_decode(" ")), "x", "match non-breaking space as white-space character"); is(it::replace(array('a' => "b", 'b' => "c"), "a"), "c"); is(it::grep('ismatch', array('ismatch', 'isnomatch')), array('ismatch'), 'grep with simple regex'); is(it::grep('!', array('ismatch!', 'isnomatch')), array('ismatch!'), '! in regex'); is(it::grep('lower|UPPER', array('lower', 'LOWER', 'upper', 'UPPER'), 'casesensitive' => 1), array(0 => 'lower', 3 => 'UPPER'), 'set casesensitive'); is(it::grep('match', array('foo' => 'match', 'bar' => 'gna')), array('foo' => 'match'), 'with keys'); setlocale(LC_CTYPE, $oldlocale); ini_set('default_charset', $oldcharset); # end of tests that must run with specific charset # it::filter_keys tests $data = array('a' => 1, 'b' => 2, 'c' => 3); is(it::filter_keys($data, 'a'), array('a' => 1), "select one key"); is(it::filter_keys($data, array('a', 'b')), array('a' => 1, 'b' => 2), "select two keys with array"); is(it::filter_keys($data, 'a,b'), array('a' => 1, 'b' => 2), "select two keys with string"); is( array_keys(it::filter_keys($data, 'b,a')), array('a', 'b'), "keep order of data array per default"); is( array_keys(it::filter_keys($data, 'b,a', array('reorder' => true))), array('b', 'a'), "reorder with given key order"); # it::date tests $GLOBALS['debug_time'] = "2014-01-01"; is(it::date(), "2014-01-01 00:00:00"); is(it::date('date', '2011-10-25'), '25.10.2011', 'parse date string with strtotime'); is(it::date('date', '2011-10-25 + 3 days'), '28.10.2011', 'some date arithmetic'); is(it::date('datetime', it::time()), it::date('datetime'), 'recognize int as timestamp'); is(it::date('datetime', it::time()*1.0), it::date('datetime'), 'recognize float as timestamp'); is(it::date('datetime', it::time() . ''), it::date('datetime'), 'recognize digit string as timestamp'); is(it::date('datetime', '@' . it::time()), it::date('datetime'), 'recognize strtotime timestamp format'); is(it::date('datetime', 10), it::date('datetime', "10"), 'numeric and string give same result'); is(it::date('datetime', 10.0), it::date('datetime', "10"), '... as long as num is properly truncated'); is(it::date('datetime', 10.5), it::date('datetime', "10"), '... with one digit after point'); is(it::date('datetime', 10.56), it::date('datetime', "10"), '... with two digits after point'); is(it::date('datetime', 1000000), it::date('datetime', "1000000"), '... large nummer'); is(it::date('datetime', 1000000.543), it::date('datetime', "1000000"), '... large nummer and point'); is(it::date('time', "10.5"), "10:05", 'interpret string with points with strtotime'); is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtotime'); # it::uc* is(it::ucfirst('foo bär über'), 'Foo bär über'); is(it::ucwords('foo bär über'), 'Foo Bär Über'); # it::substr_replace is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii'); is(it::substr_replace('✔☯♥', '☃☃', 1, 1), '✔☃☃♥', 'it::substr_replace for utf-8'); is(grapheme_strlen("\xc1"), null, "need grapheme_strlen side effect for any2utf8"); is(it::any2utf8('Meier'), 'Meier', "it::any2utf8 ascii input"); is(it::any2utf8('Müller'), 'Müller', "it::any2utf8 utf8 input"); is(it::any2utf8('Aslı'), 'Aslı', "it::any2utf8 utf8 non-latin1 input"); is(it::any2utf8(utf8_decode('Müller')), 'Müller', "it::any2utf8 latin1 input"); is(it::any2utf8( ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'), ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', "it::any2utf8 utf8 input (exhaustive alphabet)"); is(it::any2utf8( utf8_decode(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ')), ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', "it::any2utf8 latin1 input (exhaustive alphabet)"); is(it::any2utf8(utf8_encode("ü")), "ü", "it::any2utf8 double encoding"); is(it::any2utf8("Meier"), "Meier", "it::any2utf8 ascii"); is(it::any2utf8("Müller"), "Müller", "it::any2utf8 utf-8 latin1"); is(it::any2utf8("Aslı"), "Aslı", "it::any2utf8 utf-8 non-latin1"); is(it::any2utf8("é»"), "é»", "it::any2utf8 utf-8 latin1 special combination"); is(it::any2utf8(utf8_encode("Müller")), "Müller", "it::any2utf8 doubly encoded utf8"); is(it::any2utf8(utf8_encode(utf8_encode("Müller"))), "Müller", "it::any2utf8 triply encoded utf8"); is(it::any2utf8(utf8_decode("Müller")), "Müller", "it::any2utf8 incorrectly encoded latin1"); is(it::any2utf8("a💚b"), "a💚b", "it::any2utf8 correctly handles 4-byte utf-8 character GREEN HEART"); is(it::any2utf8(array("foo", utf8_decode("bär"))), array("foo", "bär"), "any2utf8 on arrays"); is(it::any2utf8(array("foo", array(utf8_decode("bär")))), array("foo", array("bär")), "any2utf8 on recursive arrays"); is(it::any2utf8(array(1, true, false, null)), array(1, true, false, null), "any2utf8 should leave types alone"); foreach (array($dummy, false, true, null, 1, "a", "Ä", "/", array()) as $var) is(it::json_decode(it::json_encode($var)), $var);