From 37f1f926bebf1d181dda083a593d5845ab5f4551 Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Thu, 22 Mar 2012 18:29:35 +0000 Subject: commit existing local utf8 fixes --- it.class | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'it.class') diff --git a/it.class b/it.class index 624f601..c36fb29 100644 --- a/it.class +++ b/it.class @@ -123,6 +123,7 @@ static function timerlog($label = '') * @param $p['graceperiod'] number of seconds within which additional errors are ignored if id is set * @param $p['timewindow'] number of seconds after graceperiod within which the second error must occur if id is set * @param $p['backtraceskip'] number of stack levels to drop + * @param $p['skipfiles'] files to skip in backtrace * @param $p['blockmail'] number of seconds to block mails after having sent a mail [3600] * @param $p['blockmailid'] block mail for $p['blockmail'] seconds with same id. Default: $p['to'] * @param $p['omitdebuginfo'] Do not add stack dump, locals and environment to output [false] @@ -193,7 +194,7 @@ static function error($p = array(), $body = null, $to = null) # $body and $to de if ($toscreen || $sendmail) { - $trace = it_debug::backtrace($p['backtraceskip']); # moved in here for performance in mass error case + $trace = it_debug::backtrace(array('skiplevels' => $p['backtraceskip'], 'skipfiles' => $p['skipfiles'])); # moved in here for performance in mass error case if (strlen($p['body']) > 500000) { @@ -230,7 +231,7 @@ static function error($p = array(), $body = null, $to = null) # $body and $to de it::mail(array('To' => $p['to'], 'Subject' => substr($p['title'], 0, 80), 'Body' => $body) + (($cc = $GLOBALS['it_defaultconfig']['error_cc']) ? array('Cc' => $cc) : array())); } else if ($_SERVER['REMOTE_ADDR']) # toscreen mode: web - echo "
{$p['title']}\n".rtrim($body).""; + echo "
" . htmlspecialchars($p['title'] . "\n" . rtrim($body), ENT_COMPAT, $GLOBALS['it_html']->p['charset']) . ""; else # toscreen mode: shell (outputs to stderr) error_log($p['title'] . " in " . ($trace ? $trace : "{$p['file']}:{$p['line']} Url: $url") . " " . (EDC('verbose') ? D($p['locals']) : "")); } @@ -400,6 +401,22 @@ static function replace($replacements, $string, $p = array()) return $result; } +/** + * Uppercase first character similar to ucfirst() but for mbstring.internal_encoding + */ +static function ucfirst($string) +{ + return mb_strtoupper(mb_substr($string, 0, 1)) . mb_substr($string, 1); +} + +/** + * Uppercase first character of each word similar to ucwords() but for mbstring.internal_encoding + */ +static function ucwords($string) +{ + return preg_replace_callback('/\b\w/u', function($m) { return mb_strtoupper($m[1]); }, mb_strtolower($string)); +} + /** * Extract key => value pairs from assoc array by key * @param $array array to filter -- cgit v1.2.3 From 8d9bcc5de7ebf6d44fea96bfea920a77c7fcbd00 Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Thu, 22 Mar 2012 18:36:01 +0000 Subject: remove locale switching from it::match and it::replace --- it.class | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'it.class') diff --git a/it.class b/it.class index c36fb29..924f271 100644 --- a/it.class +++ b/it.class @@ -335,7 +335,6 @@ static function convertregex($pattern, $p = null) * @param $string String to match * @param $p['offset_capture'] Set flag preg_offset_capture (returns offsets with the matches). * @param $p['all'] Return every match as array instead of first match. - * @param $p['locale'] Use given locale (default: de_CH), mainly affects handling of iso-latin chars * @param $p contains pattern modifiers, @see convertregex() * @return Matched string or false */ @@ -347,15 +346,10 @@ static function match($pattern, $string, $p = null) { $flags = $p['offset_capture'] ? PREG_OFFSET_CAPTURE : 0; - $oldlocale = setlocale(LC_CTYPE, 0); - setlocale(LC_CTYPE, $p['locale'] ? $p['locale'] : "de_CH"); - if ($p['all']) $r = preg_match_all(it::convertregex($pattern, $p), $string, $m, $flags | PREG_PATTERN_ORDER, $p['offset']); else $r = preg_match(it::convertregex($pattern, $p), $string, $m, $flags, $p['offset']); - - setlocale(LC_CTYPE, $oldlocale); } if (!$r) # no match @@ -386,17 +380,9 @@ static function match($pattern, $string, $p = null) static function replace($replacements, $string, $p = array()) { foreach ($replacements as $pattern => $dummy) - $patterns[] = !preg_match('/\\\\[wb]|[!\x80-\xff]|\[\[:/i', $pattern) && !$p ? "!$pattern!i" : it::convertregex($complex = $pattern, $p); + $patterns[] = !preg_match('/\\\\[wb]|[!\x80-\xff]|\[\[:/i', $pattern) && !$p ? "!$pattern!i" : it::convertregex($pattern, $p); - if (!$complex && !$p) - $result = preg_replace($patterns, $replacements, $string); - else - { - $oldlocale = setlocale(LC_CTYPE, 0); - setlocale(LC_CTYPE, 'de_CH'); - $result = preg_replace($patterns, $replacements, $string, isset($p['limit']) ? $p['limit'] : -1); - setlocale(LC_CTYPE, $oldlocale); - } + $result = preg_replace($patterns, $replacements, $string, isset($p['limit']) ? $p['limit'] : -1); return $result; } -- cgit v1.2.3 From 0147e6e3aea620a54b0c3f6c932c658ee72a45a0 Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Mon, 26 Mar 2012 15:20:24 +0000 Subject: new utf8 safe functions it::grep and it::substr_replace --- it.class | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'it.class') diff --git a/it.class b/it.class index 924f271..f683568 100644 --- a/it.class +++ b/it.class @@ -387,6 +387,22 @@ static function replace($replacements, $string, $p = array()) return $result; } +/** + * Returns only the array elements matching the given regex + * @param $pattern Regex to match against + * @param $array array to grep + * @return New array + */ +static function grep($pattern, $array, $p = array()) +{ + if (!preg_match('/\\\\[wb]|[!\x80-\xff]|\[\[:/i', $pattern) && !$p) + $result = preg_grep('!' . $pattern . '!i' . (ini_get('default_charset') == 'utf-8' ? 'u' : ''), $array); # fast path for simple patterns + else + $result = preg_grep(it::convertregex($pattern, $p), $array); + + return $result; +} + /** * Uppercase first character similar to ucfirst() but for mbstring.internal_encoding */ @@ -403,6 +419,11 @@ static function ucwords($string) return preg_replace_callback('/\b\w/u', function($m) { return mb_strtoupper($m[1]); }, mb_strtolower($string)); } +static function substr_replace($string, $replacement, $start, $length) +{ + return grapheme_substr($string, 0, $start) . $replacement . grapheme_substr($string, $start + $length); +} + /** * Extract key => value pairs from assoc array by key * @param $array array to filter -- cgit v1.2.3 From b1c0b4946572027c8de564730a89ec584c830bf3 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Wed, 28 Mar 2012 13:00:39 +0000 Subject: Added it::any2utf8, fixed it::replace fast path to add u modified, added error reporting for invalid utf-8 input to it::match and it::replace --- it.class | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'it.class') diff --git a/it.class b/it.class index f683568..6d516c9 100644 --- a/it.class +++ b/it.class @@ -356,6 +356,8 @@ static function match($pattern, $string, $p = null) { if (preg_last_error() == PREG_BACKTRACK_LIMIT_ERROR) it::error("Exceeded pcre.backtrack_limit of " . ini_get('pcre.backtrack_limit') . " bytes"); + else if (preg_last_error() == PREG_BAD_UTF8_ERROR) + it::error("Input to it::match is not valid utf-8"); $result = $p['all'] ? array() : null; } @@ -379,11 +381,15 @@ static function match($pattern, $string, $p = null) */ static function replace($replacements, $string, $p = array()) { + $encoding = ini_get('default_charset') == 'utf-8' ? 'u' : ''; foreach ($replacements as $pattern => $dummy) - $patterns[] = !preg_match('/\\\\[wb]|[!\x80-\xff]|\[\[:/i', $pattern) && !$p ? "!$pattern!i" : it::convertregex($pattern, $p); + $patterns[] = !preg_match('/\\\\[wb]|[!\x80-\xff]|\[\[:/i', $pattern) && !$p ? "!$pattern!i$encoding" : it::convertregex($pattern, $p); $result = preg_replace($patterns, $replacements, $string, isset($p['limit']) ? $p['limit'] : -1); + if ($result === null && preg_last_error() == PREG_BAD_UTF8_ERROR) + it::error("Input to it::replace is not valid utf-8"); + return $result; } @@ -403,6 +409,16 @@ static function grep($pattern, $array, $p = array()) return $result; } +/** + * Convert string to utf8 if it was not already utf-8 before + * @param $value String to convert + * @return Same string in utf-8 encoding + */ +function any2utf8($value) +{ + return strlen($value) && strlen(htmlspecialchars($value, 0, 'utf-8')) == 0 ? utf8_encode($value) : $value; # Use side-effect of htmlspecialchars: Fails if not valid utf-8 encoding +} + /** * Uppercase first character similar to ucfirst() but for mbstring.internal_encoding */ -- cgit v1.2.3 From 52ced90c535518482618ff726eb5c512da40b203 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Wed, 28 Mar 2012 13:03:54 +0000 Subject: Replace htmlspecialchars side effect with grapheme_strlen returning null on invalid utf-8 --- it.class | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'it.class') diff --git a/it.class b/it.class index 6d516c9..b4a0d9e 100644 --- a/it.class +++ b/it.class @@ -416,7 +416,7 @@ static function grep($pattern, $array, $p = array()) */ function any2utf8($value) { - return strlen($value) && strlen(htmlspecialchars($value, 0, 'utf-8')) == 0 ? utf8_encode($value) : $value; # Use side-effect of htmlspecialchars: Fails if not valid utf-8 encoding + return grapheme_strlen($value) === null ? utf8_encode($value) : $value; } /** -- cgit v1.2.3 From a7ac24d9f9698ff0a1d42268d79f74cd21f70eb4 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Wed, 28 Mar 2012 13:40:12 +0000 Subject: Fix it::ucwords and added tests for it::ucfirst and it::ucwords --- it.class | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'it.class') diff --git a/it.class b/it.class index b4a0d9e..6a796d4 100644 --- a/it.class +++ b/it.class @@ -432,7 +432,7 @@ static function ucfirst($string) */ static function ucwords($string) { - return preg_replace_callback('/\b\w/u', function($m) { return mb_strtoupper($m[1]); }, mb_strtolower($string)); + return preg_replace_callback('/\b\w/u', function($m) { return mb_strtoupper($m[0]); }, mb_strtolower($string)); } static function substr_replace($string, $replacement, $start, $length) -- cgit v1.2.3 From 14f1c25b43628013507da706544d5c55cb1bc461 Mon Sep 17 00:00:00 2001 From: Christian Helbling Date: Wed, 4 Apr 2012 09:19:58 +0000 Subject: htmlspecialchars charset hack inside it::error --- it.class | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'it.class') diff --git a/it.class b/it.class index 6a796d4..1be8f7c 100644 --- a/it.class +++ b/it.class @@ -231,7 +231,7 @@ static function error($p = array(), $body = null, $to = null) # $body and $to de it::mail(array('To' => $p['to'], 'Subject' => substr($p['title'], 0, 80), 'Body' => $body) + (($cc = $GLOBALS['it_defaultconfig']['error_cc']) ? array('Cc' => $cc) : array())); } else if ($_SERVER['REMOTE_ADDR']) # toscreen mode: web - echo "
" . htmlspecialchars($p['title'] . "\n" . rtrim($body), ENT_COMPAT, $GLOBALS['it_html']->p['charset']) . ""; + echo "
" . htmlspecialchars($p['title'] . "\n" . rtrim($body), ENT_COMPAT, "iso-8859-1") . ""; # works with iso-8859-1 or utf-8, UTF8SAFE else # toscreen mode: shell (outputs to stderr) error_log($p['title'] . " in " . ($trace ? $trace : "{$p['file']}:{$p['line']} Url: $url") . " " . (EDC('verbose') ? D($p['locals']) : "")); } -- cgit v1.2.3