diff options
author | Christian Weber | 2012-02-29 10:12:11 +0000 |
---|---|---|
committer | Christian Weber | 2012-02-29 10:12:11 +0000 |
commit | c0938f79a452c44e28fccc326775891ba6c743e3 (patch) | |
tree | 98db80457a9bb72f2c80621d3c36cbaa020b279a | |
parent | 4d945c4a8ced3835064592f35ce2d68083e51b09 (diff) | |
download | itools-c0938f79a452c44e28fccc326775891ba6c743e3.tar.gz itools-c0938f79a452c44e28fccc326775891ba6c743e3.tar.bz2 itools-c0938f79a452c44e28fccc326775891ba6c743e3.zip |
it::convertregex() sets /u if default_charset is 'utf-8' (can be overridden)
-rw-r--r-- | it.class | 27 | ||||
-rwxr-xr-x | tests/it.t | 36 |
2 files changed, 39 insertions, 24 deletions
@@ -310,35 +310,24 @@ static function toascii($text) * @param $p['casesensitive'] Regex is case sensitive (omit modifier i) * @param $p['multiline'] add modifier m: ^ and $ match \n * @param $p['singleline'] add modifier s: . matches \n - * @param $p['utf8'] add modifier u + * @param $p['utf8'] add modifier u. This is the default if default_charset is utf-8, override with $p['utf8'] = false * @param $p['extended'] add modifier x (non signifcant whitespace) * @return converted regex to use with preg */ static function convertregex($pattern, $p = array()) { - $pattern = preg_replace('|/|', '\/', $pattern); - $modifiers = ''; - - if (!$p['casesensitive']) - $modifiers .= 'i'; - if ($p['exec']) it::fatal("Option exec to it::replace has been removed for security reasons"); - foreach (array( - 'multiline' => 'm', - 'singleline' => 's', - 'utf8' => 'u', - 'extended' => 'x', - ) as $key => $mod) - { - if ($p[$key]) - $modifiers .= $mod; - } - - return "/$pattern/$modifiers"; + return '/' . strtr($pattern, array('/' => '\/')) . '/' . + (!$p['casesensitive'] ? 'i' : '') . + ($p['multiline'] ? 'm' : '') . + ($p['singleline'] ? 's' : '') . + ($p['extended'] ? 'x' : '') . + ((!isset($p['utf8']) && ini_get('default_charset') == 'utf-8' || $p['utf8']) ? 'u' : ''); } + /** * Try to match string against regex. Case insensitive by default. * @param $pattern Regex to match against @@ -3,13 +3,13 @@ # Tests for it.class -function match( $regex, $string, $exp, $name ) +function match($regex, $string, $expect, $name) { $GLOBALS['TEST_MORE_LEVEL'] = 1; - $pass = is( it::match( $regex, $string ), $exp, $name ); - if( !$pass ) { - diag( " regex given: $regex" ); - diag( " regex converted: " . it::convertregex( $regex ) ); + $pass = is (it::match($regex, $string), $expect, $name); + if (!$pass) { + diag(" regex given: $regex"); + diag(" regex converted: " . it::convertregex($regex)); } $GLOBALS['TEST_MORE_LEVEL'] = 0; } @@ -183,6 +183,32 @@ match( 'Ö', 'match umlaute in latin1 case insensitive' ); + +is( + it::match(utf8_encode('aöBÜ'), utf8_encode("AÖbü"), array('utf8' => true)), + utf8_encode('AÖbü'), + "match utf-8 umlaute in case insensitive" +); + +$oldcharset = ini_get('default_charset'); +ini_set('default_charset', 'utf-8'); +match( + utf8_encode('aöBÜ'), utf8_encode('AÖbü'), + utf8_encode('AÖbü'), + "match utf-8 umlaute in case insensitive using default_charset" +); +is( + it::match('aöBÜ', 'AÖbü', array('utf8' => false)), + 'AÖbü', + "non-utf-8 override with default_charset=utf-8" +); +match( + '\w+', utf8_encode('Müller'), + utf8_encode('Müller'), + '\w matches umlaut in utf-8 mode' +); +ini_set('default_charset', $oldcharset); + is( it::match( 'abc', "aBc", array('casesensitive' => 1 )), false, |