From c0938f79a452c44e28fccc326775891ba6c743e3 Mon Sep 17 00:00:00 2001 From: Christian Weber Date: Wed, 29 Feb 2012 10:12:11 +0000 Subject: it::convertregex() sets /u if default_charset is 'utf-8' (can be overridden) --- it.class | 27 ++++++++------------------- tests/it.t | 36 +++++++++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/it.class b/it.class index 818c9e0..292c154 100644 --- a/it.class +++ b/it.class @@ -310,35 +310,24 @@ static function toascii($text) * @param $p['casesensitive'] Regex is case sensitive (omit modifier i) * @param $p['multiline'] add modifier m: ^ and $ match \n * @param $p['singleline'] add modifier s: . matches \n - * @param $p['utf8'] add modifier u + * @param $p['utf8'] add modifier u. This is the default if default_charset is utf-8, override with $p['utf8'] = false * @param $p['extended'] add modifier x (non signifcant whitespace) * @return converted regex to use with preg */ static function convertregex($pattern, $p = array()) { - $pattern = preg_replace('|/|', '\/', $pattern); - $modifiers = ''; - - if (!$p['casesensitive']) - $modifiers .= 'i'; - if ($p['exec']) it::fatal("Option exec to it::replace has been removed for security reasons"); - foreach (array( - 'multiline' => 'm', - 'singleline' => 's', - 'utf8' => 'u', - 'extended' => 'x', - ) as $key => $mod) - { - if ($p[$key]) - $modifiers .= $mod; - } - - return "/$pattern/$modifiers"; + return '/' . strtr($pattern, array('/' => '\/')) . '/' . + (!$p['casesensitive'] ? 'i' : '') . + ($p['multiline'] ? 'm' : '') . + ($p['singleline'] ? 's' : '') . + ($p['extended'] ? 'x' : '') . + ((!isset($p['utf8']) && ini_get('default_charset') == 'utf-8' || $p['utf8']) ? 'u' : ''); } + /** * Try to match string against regex. Case insensitive by default. * @param $pattern Regex to match against diff --git a/tests/it.t b/tests/it.t index 67160d0..c0efa2c 100755 --- a/tests/it.t +++ b/tests/it.t @@ -3,13 +3,13 @@ # Tests for it.class -function match( $regex, $string, $exp, $name ) +function match($regex, $string, $expect, $name) { $GLOBALS['TEST_MORE_LEVEL'] = 1; - $pass = is( it::match( $regex, $string ), $exp, $name ); - if( !$pass ) { - diag( " regex given: $regex" ); - diag( " regex converted: " . it::convertregex( $regex ) ); + $pass = is (it::match($regex, $string), $expect, $name); + if (!$pass) { + diag(" regex given: $regex"); + diag(" regex converted: " . it::convertregex($regex)); } $GLOBALS['TEST_MORE_LEVEL'] = 0; } @@ -183,6 +183,32 @@ match( 'Ö', 'match umlaute in latin1 case insensitive' ); + +is( + it::match(utf8_encode('aöBÜ'), utf8_encode("AÖbü"), array('utf8' => true)), + utf8_encode('AÖbü'), + "match utf-8 umlaute in case insensitive" +); + +$oldcharset = ini_get('default_charset'); +ini_set('default_charset', 'utf-8'); +match( + utf8_encode('aöBÜ'), utf8_encode('AÖbü'), + utf8_encode('AÖbü'), + "match utf-8 umlaute in case insensitive using default_charset" +); +is( + it::match('aöBÜ', 'AÖbü', array('utf8' => false)), + 'AÖbü', + "non-utf-8 override with default_charset=utf-8" +); +match( + '\w+', utf8_encode('Müller'), + utf8_encode('Müller'), + '\w matches umlaut in utf-8 mode' +); +ini_set('default_charset', $oldcharset); + is( it::match( 'abc', "aBc", array('casesensitive' => 1 )), false, -- cgit v1.2.3