#!/www/server/bin/php -qC
<?php

# Tests for it.class


#
# tests for it::match()
#
$oldcharset = ini_get('default_charset');
$oldlocale = setlocale(LC_CTYPE, 0);

ini_set('default_charset', 'utf-8');
setlocale(LC_CTYPE, 'de_CH');		# required becuase we're checking German umlauts in latin1 mode


function match($regex, $string, $expect, $name, $p = array())
{
	$GLOBALS['TEST_MORE_LEVEL'] = 1;
	$pass = is (it::match($regex, $string, $p), $expect, $name);
	if (!$pass) {
		diag("        regex given: $regex" . ($p ? " " .D($p) : ""));
		diag("    regex converted: " . it::convertregex($regex));
	} 
	$GLOBALS['TEST_MORE_LEVEL'] = 0;
}


match(
	'b', 'aaaabaaaa',
	'b',
	'simple regex'
);

match(
	'a/b', '   a/b   ',
	'a/b',
	'regex with /'
);

match(
	'aa(bb)aa(cc)aa(dd)qq', 'aabbaaccaaddqq',
	array('bb', 'cc', 'dd'),
	'return array of captures'
);

match(
	'\bblah\b', ' blah ',
	'blah',
	'match \b at spaces'
);

match(
	'\bblah\b', 'blah',
	'blah',
	'match \b at end of string'
);

match(
	'\bblah\b', 'ablahc',
	false,
	'don\'t match \b at word chars'
);

match(
	'\bblah\b', 'Üblahä',
	false,
	'don\'t match \b at umlaute'
);

match(
	'\Bblah\B', ' blah ',
	false,
	'don\'t match \B at spaces'
);

match(
	'\Bblah\B', 'blah',
	false,
	'don\'t match \B at end of string'
);

match(
	'\Bblah\B', 'ablahc',
	'blah',
	'match \B at word chars'
);

match(
	'\Bblah\B', 'Üblahä',
	'blah',
	'match \B at umlaute'
);

match(
	'\w+', '  |#Üblahä   ',
	'Üblahä',
	'include umlaute in \w'
);

match(
	'[[:alpha:]]+', '  |#blahä   ',
	'blahä',
	'include umlaute in [[:alpha:]]'
);

match(
	'\W+', '  |#Üblahä  ',
	'  |#',
	'don\'t include umlaute in \W'
);

match(
	'\ba', 'äa',
	'',
	'\b must know umlauts'
);

match(
	'aaa\\\\w+', '   aaa\www  ',
	'aaa\www',
	'don\'t parse \w in \\\\w at beginning (match)'
);

match(
	'aaa\\\\w+', '   aaa\www  ',
	'aaa\www',
	'don\'t parse \w in \\\\w after chars (match)'
);

eval('$escapedwordregex = "' . it::convertregex('\w') . '";');
$escapedwordregex = preg_replace('|[\\\\/]|', '', $escapedwordregex);

match(
	'\\\\w+',  $escapedwordregex,
	false,
	'don\'t parse \w in \\\\w at beginning (no match)'
);

match(
	'aaa\\\\w+', 'aaa' . $escapedwordregex,
	false,
	'don\'t parse \w in \\\\w after chars (no match)'
);

match(
	'\\\\\\\\w+', '\\' . $escapedwordregex,
	false,
	'don\'t parse \w in \\\\\\\w (no match)'
);

match(
	'\\\\\\\\w+', '  \\\\www  ',
	'\\\\www',
	'don\'t parse \\\\\\\\w as \w (match)'
);

match(
	'[\w]+', '[[[]]]---',
	false,
	'replace \w in [\w] correctly (no match)'
);

match(
	'[\w]+', '  \\\\aword[[[]]]   ',
	'aword',
	'replace \w in [\w] correctly (match)'
);

match(
	'[\\\\w]+', ' blabergna ',
	false,
	'don\'t parse \w in [\\\\w] (no match)'
);

match(
	'[\\\\w]+', '  \\\\worda[[[]',
	'\\\\w',
	'don\'t parse \w in [\\\\w] (match)'
);

match(
	'[a\W]+', 'bbbbbbb a a%$+ accccc',
	' a a%$+ a',
	'\W in []'
);

match(
	'\\\\\\w+', '  \Üblahä  ',
	'\Üblahä',
	'parse \w in \\\\\\w at beginning'
);

match(
	'aaa\\\\\\w+', '  aaa\Üblahä  ',
	'aaa\Üblahä',
	'parse \w in \\\\\\w after chars'
);

match(
	'\w+', 'word1 wörd2 word_3',
	array('word1', 'wörd2', 'word_3'),
	"test match_all function",
	array('all' => true)
);

match(
	'aBcD', '  aBcD  ',
	'aBcD',
	"caseinsensitive is default"
);

match(
	'\w+', 'Müller',
	'Müller',
	'\w matches umlaut in utf-8 mode'
);

match(
	'M.ller', 'Müller',
	'Müller',
	'. matches umlaut in utf-8 mode'
);

match(
	utf8_decode('ö'), utf8_decode('Ö'),
	utf8_decode('Ö'),
	'match umlaute in de_CH.latin1 case insensitive',
	array('utf8' => false)
);

match(
	utf8_decode('aöBÜ'), utf8_decode('AÖbü'),
	utf8_decode('AÖbü'),
	"match umlaute with non-utf-8 override in p",
	array('utf8' => false)
);


match(
	'abc', "aBc",
	false,
	"set case sensitivity by parameter",
	array('casesensitive' => 1),
);

match(
	'\w+', 'word1 wörd2 word_3',
	array('word1', 'wörd2', 'word_3'),
	"test all => 1 without captures",
	array('all' => 1)
);

match(
	'\w+\s+(\d+)', 'word1 12 wörd2 3 word_3 4',
	array('12', '3', '4'),
	"test all => 1 with one capture",
	array('all' => 1)
);

match(
	'(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4',
	array(array('word1', '12'), array('wörd2', '3'), array('word_3', '4')),
	"test all => 1 with captures",
	array('all' => 1)
);

match(
	'(\w+)\s+(\d+)', 'word1 12 wörd2 3 word_3 4',
	array(array('word1', 'wörd2', 'word_3'), array('12', '3', '4')),
	"test all => 1,pattern_order => 1",
	array('all' => 1, 'pattern_order' => 1)
);

ini_set('default_charset', 'iso-8859-1');
match(
	'aöBÜ', "AÖbü",
	'AÖbü',
	"match utf-8 umlaute in case insensitive mode with utf8 override",
	array('utf8' => true)
);
ini_set('default_charset', 'utf-8');


#
# tests for it::replace()
#
is(
	it::replace(
		array(
			'regex1' => 'repl1',
			'regex2' => 'repl2',
			'regex3' => 'repl3'),
		'regex2 regex1 regex3'),
	'repl2 repl1 repl3',
	'test tr regex function'
);

is(it::replace(array('a' => "1", 'b' => "2"), "ab"), "12");
is(it::replace(array('!' => "x"), "!"), "x");
is(it::replace(array('\w' => "x"), "oö"), "xx");
is(it::replace(array('[[:alpha:]]' => "x"), "ö"), "x");
is(it::replace(array('\w' => "x", '#' => "!"), "#ö"), "!x");
is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x");
is(it::replace(array('ö' => "x"), "Ö"), "x");
is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa");
is(it::replace(array('\s' => "x"), it_html::entity_decode("&nbsp;")), "x", "match non-breaking space as white-space character");
is(it::replace(array('a' => "b", 'b' => "c"), "a"), "c");

is(it::grep('ismatch', array('ismatch', 'isnomatch')), array('ismatch'),  'grep with simple regex');
is(it::grep('!', array('ismatch!', 'isnomatch')),      array('ismatch!'), '! in regex');
is(it::grep('lower|UPPER', array('lower', 'LOWER', 'upper', 'UPPER'), 'casesensitive' => 1), array(0 => 'lower', 3 => 'UPPER'), 'set casesensitive');
is(it::grep('match', array('foo' => 'match', 'bar' => 'gna')), array('foo' => 'match'), 'with keys');

setlocale(LC_CTYPE, $oldlocale);
ini_set('default_charset', $oldcharset);	# end of tests that must run with specific charset


# it::filter_keys tests

$data = array('a' => 1, 'b' => 2, 'c' => 3);
is(it::filter_keys($data, 'a'),             array('a' => 1),           "select one key");
is(it::filter_keys($data, array('a', 'b')), array('a' => 1, 'b' => 2), "select two keys with array");
is(it::filter_keys($data, 'a,b'),           array('a' => 1, 'b' => 2), "select two keys with string");
is(
	array_keys(it::filter_keys($data, 'b,a')),
	array('a', 'b'),
	"keep order of data array per default");
is(
	array_keys(it::filter_keys($data, 'b,a', array('reorder' => true))),
	array('b', 'a'),
	"reorder with given key order");

# it::date tests

$GLOBALS['debug_time'] = "2014-01-01";
is(it::date(), "2014-01-01 00:00:00");

is(it::date('date', '2011-10-25'), '25.10.2011', 'parse date string with strtotime');
is(it::date('date', '2011-10-25 + 3 days'), '28.10.2011', 'some date arithmetic');
is(it::date('datetime', it::time()), it::date('datetime'), 'recognize int as timestamp');
is(it::date('datetime', it::time()*1.0), it::date('datetime'), 'recognize float as timestamp');
is(it::date('datetime', it::time() . ''), it::date('datetime'), 'recognize digit string as timestamp');
is(it::date('datetime', '@' . it::time()), it::date('datetime'), 'recognize strtotime timestamp format');
is(it::date('datetime', 10), it::date('datetime', "10"), 'numeric and string give same result');
is(it::date('datetime', 10.0), it::date('datetime', "10"), '... as long as num is properly truncated');
is(it::date('datetime', 10.5), it::date('datetime', "10"), '... with one digit after point');
is(it::date('datetime', 10.56), it::date('datetime', "10"), '... with two digits after point');
is(it::date('datetime', 1000000), it::date('datetime', "1000000"), '... large nummer');
is(it::date('datetime', 1000000.543), it::date('datetime', "1000000"), '... large nummer and point');
is(it::date('time', "10.5"), "10:05", 'interpret string with points with strtotime');
is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtotime');

# it::uc*
is(it::ucfirst('foo bär über'), 'Foo bär über');
is(it::ucwords('foo bär über'), 'Foo Bär Über');

# it::substr_replace
is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii');
is(it::substr_replace('✔☯♥', '☃☃', 1, 1), '✔☃☃♥', 'it::substr_replace for utf-8');

is(grapheme_strlen("\xc1"), null, "need grapheme_strlen side effect for any2utf8");

is(it::any2utf8('Meier'), 'Meier', "it::any2utf8 ascii input");
is(it::any2utf8('Müller'), 'Müller', "it::any2utf8 utf8 input");
is(it::any2utf8('Aslı'), 'Aslı', "it::any2utf8 utf8 non-latin1 input");
is(it::any2utf8(utf8_decode('Müller')), 'Müller', "it::any2utf8 latin1 input");

is(it::any2utf8(
	' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'),
	' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ',
	"it::any2utf8 utf8 input (exhaustive alphabet)"); 
is(it::any2utf8(
	utf8_decode(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ')),
	' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ',
	"it::any2utf8 latin1 input (exhaustive alphabet)"); 

is(it::any2utf8(utf8_encode("ü")), "ü", "it::any2utf8 double encoding");

is(it::any2utf8("Meier"), "Meier", "it::any2utf8 ascii");
is(it::any2utf8("Müller"), "Müller", "it::any2utf8 utf-8 latin1");
is(it::any2utf8("Aslı"), "Aslı", "it::any2utf8 utf-8 non-latin1");
is(it::any2utf8("é»"), "é»", "it::any2utf8 utf-8 latin1 special combination");
is(it::any2utf8(utf8_encode("Müller")), "Müller", "it::any2utf8 doubly encoded utf8");
is(it::any2utf8(utf8_encode(utf8_encode("Müller"))), "Müller", "it::any2utf8 triply encoded utf8");
is(it::any2utf8(utf8_decode("Müller")), "Müller", "it::any2utf8 incorrectly encoded latin1");
is(it::any2utf8("a💚b"), "a💚b", "it::any2utf8 correctly handles 4-byte utf-8 character GREEN HEART");

is(it::any2utf8(array("foo", utf8_decode("bär"))), array("foo", "bär"), "any2utf8 on arrays");
is(it::any2utf8(array("foo", array(utf8_decode("bär")))), array("foo", array("bär")), "any2utf8 on recursive arrays");
is(it::any2utf8(array(1, true, false, null)), array(1, true, false, null), "any2utf8 should leave types alone");

foreach (array($dummy, false, true, null, 1, "a", "Ä", "/", array()) as $var)
	is(it::json_decode(it::json_encode($var)), $var);