summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Weber2012-02-29 10:12:11 +0000
committerChristian Weber2012-02-29 10:12:11 +0000
commitc0938f79a452c44e28fccc326775891ba6c743e3 (patch)
tree98db80457a9bb72f2c80621d3c36cbaa020b279a
parent4d945c4a8ced3835064592f35ce2d68083e51b09 (diff)
downloaditools-c0938f79a452c44e28fccc326775891ba6c743e3.tar.gz
itools-c0938f79a452c44e28fccc326775891ba6c743e3.tar.bz2
itools-c0938f79a452c44e28fccc326775891ba6c743e3.zip
it::convertregex() sets /u if default_charset is 'utf-8' (can be overridden)
-rw-r--r--it.class27
-rwxr-xr-xtests/it.t36
2 files changed, 39 insertions, 24 deletions
diff --git a/it.class b/it.class
index 818c9e0..292c154 100644
--- a/it.class
+++ b/it.class
@@ -310,35 +310,24 @@ static function toascii($text)
* @param $p['casesensitive'] Regex is case sensitive (omit modifier i)
* @param $p['multiline'] add modifier m: ^ and $ match \n
* @param $p['singleline'] add modifier s: . matches \n
- * @param $p['utf8'] add modifier u
+ * @param $p['utf8'] add modifier u. This is the default if default_charset is utf-8, override with $p['utf8'] = false
* @param $p['extended'] add modifier x (non signifcant whitespace)
* @return converted regex to use with preg
*/
static function convertregex($pattern, $p = array())
{
- $pattern = preg_replace('|/|', '\/', $pattern);
- $modifiers = '';
-
- if (!$p['casesensitive'])
- $modifiers .= 'i';
-
if ($p['exec'])
it::fatal("Option exec to it::replace has been removed for security reasons");
- foreach (array(
- 'multiline' => 'm',
- 'singleline' => 's',
- 'utf8' => 'u',
- 'extended' => 'x',
- ) as $key => $mod)
- {
- if ($p[$key])
- $modifiers .= $mod;
- }
-
- return "/$pattern/$modifiers";
+ return '/' . strtr($pattern, array('/' => '\/')) . '/' .
+ (!$p['casesensitive'] ? 'i' : '') .
+ ($p['multiline'] ? 'm' : '') .
+ ($p['singleline'] ? 's' : '') .
+ ($p['extended'] ? 'x' : '') .
+ ((!isset($p['utf8']) && ini_get('default_charset') == 'utf-8' || $p['utf8']) ? 'u' : '');
}
+
/**
* Try to match string against regex. Case insensitive by default.
* @param $pattern Regex to match against
diff --git a/tests/it.t b/tests/it.t
index 67160d0..c0efa2c 100755
--- a/tests/it.t
+++ b/tests/it.t
@@ -3,13 +3,13 @@
# Tests for it.class
-function match( $regex, $string, $exp, $name )
+function match($regex, $string, $expect, $name)
{
$GLOBALS['TEST_MORE_LEVEL'] = 1;
- $pass = is( it::match( $regex, $string ), $exp, $name );
- if( !$pass ) {
- diag( " regex given: $regex" );
- diag( " regex converted: " . it::convertregex( $regex ) );
+ $pass = is (it::match($regex, $string), $expect, $name);
+ if (!$pass) {
+ diag(" regex given: $regex");
+ diag(" regex converted: " . it::convertregex($regex));
}
$GLOBALS['TEST_MORE_LEVEL'] = 0;
}
@@ -183,6 +183,32 @@ match(
'Ö',
'match umlaute in latin1 case insensitive'
);
+
+is(
+ it::match(utf8_encode('aöBÜ'), utf8_encode("AÖbü"), array('utf8' => true)),
+ utf8_encode('AÖbü'),
+ "match utf-8 umlaute in case insensitive"
+);
+
+$oldcharset = ini_get('default_charset');
+ini_set('default_charset', 'utf-8');
+match(
+ utf8_encode('aöBÜ'), utf8_encode('AÖbü'),
+ utf8_encode('AÖbü'),
+ "match utf-8 umlaute in case insensitive using default_charset"
+);
+is(
+ it::match('aöBÜ', 'AÖbü', array('utf8' => false)),
+ 'AÖbü',
+ "non-utf-8 override with default_charset=utf-8"
+);
+match(
+ '\w+', utf8_encode('Müller'),
+ utf8_encode('Müller'),
+ '\w matches umlaut in utf-8 mode'
+);
+ini_set('default_charset', $oldcharset);
+
is(
it::match( 'abc', "aBc", array('casesensitive' => 1 )),
false,