From 0147e6e3aea620a54b0c3f6c932c658ee72a45a0 Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Mon, 26 Mar 2012 15:20:24 +0000 Subject: new utf8 safe functions it::grep and it::substr_replace --- it.class | 21 +++++++++++++++++++++ tests/it.t | 8 ++++++++ 2 files changed, 29 insertions(+) diff --git a/it.class b/it.class index 924f271..f683568 100644 --- a/it.class +++ b/it.class @@ -387,6 +387,22 @@ static function replace($replacements, $string, $p = array()) return $result; } +/** + * Returns only the array elements matching the given regex + * @param $pattern Regex to match against + * @param $array array to grep + * @return New array + */ +static function grep($pattern, $array, $p = array()) +{ + if (!preg_match('/\\\\[wb]|[!\x80-\xff]|\[\[:/i', $pattern) && !$p) + $result = preg_grep('!' . $pattern . '!i' . (ini_get('default_charset') == 'utf-8' ? 'u' : ''), $array); # fast path for simple patterns + else + $result = preg_grep(it::convertregex($pattern, $p), $array); + + return $result; +} + /** * Uppercase first character similar to ucfirst() but for mbstring.internal_encoding */ @@ -403,6 +419,11 @@ static function ucwords($string) return preg_replace_callback('/\b\w/u', function($m) { return mb_strtoupper($m[1]); }, mb_strtolower($string)); } +static function substr_replace($string, $replacement, $start, $length) +{ + return grapheme_substr($string, 0, $start) . $replacement . grapheme_substr($string, $start + $length); +} + /** * Extract key => value pairs from assoc array by key * @param $array array to filter diff --git a/tests/it.t b/tests/it.t index ec95cc4..317b8a3 100755 --- a/tests/it.t +++ b/tests/it.t @@ -305,6 +305,10 @@ is(it::replace(array('#' => "!", '\w' => "x"), "#ö"), "!x"); is(it::replace(array('ö' => "x"), "Ö"), "x"); is(it::replace(array('a' => "1"), "aaa", array('limit' => 1)), "1aa"); +is(it::grep('ismatch', array('ismatch', 'isnomatch')), array('ismatch'), 'grep with simple regex'); +is(it::grep('!', array('ismatch!', 'isnomatch')), array('ismatch!'), '! in regex'); +is(it::grep('lower|UPPER', array('lower', 'LOWER', 'upper', 'UPPER'), 'casesensitive' => 1), array(0 => 'lower', 3 => 'UPPER'), 'set casesensitive'); +is(it::grep('match', array('foo' => 'match', 'bar' => 'gna')), array('foo' => 'match'), 'with keys'); setlocale(LC_CTYPE, $oldlocale); ini_set('default_charset', $oldcharset); # end of tests that must run with specific charset @@ -342,4 +346,8 @@ is(it::date('datetime', 1000000.543), it::date('datetime', "1000000"), '... larg is(it::date('time', "10.5"), "10:05", 'interpret string with points with strtotime'); is(it::date('time', "10.05"), "10:05", 'interpret string with points with strtotime'); +# it::substr_replace +is(it::substr_replace('abcdefgh', 'xyz', 2, 4), substr_replace('abcdefgh', 'xyz', 2, 4), 'it::substr_replace the same as substr_replace for ascii'); +is(it::substr_replace('✔☯♥', '☃☃', 1, 1), '✔☃☃♥', 'it::substr_replace for utf-8'); + ?> -- cgit v1.2.3