From 5e55c26d6ae3ab321a765fc66b7359a5a9edae8f Mon Sep 17 00:00:00 2001
From: Urban Müller
Date: Fri, 23 Mar 2012 18:32:18 +0000
Subject: encoding checker
---
it_html.class | 36 +++++++++++++++++++++++++++---------
1 file changed, 27 insertions(+), 9 deletions(-)
(limited to 'it_html.class')
diff --git a/it_html.class b/it_html.class
index 1aeed94..41d799f 100644
--- a/it_html.class
+++ b/it_html.class
@@ -258,6 +258,24 @@ function _parse_args($args)
}
+# internal
+function fix_encoding($string)
+{
+ if (preg_match('/[\x20-\x7f][\x80-\xff][\x20-\x7f]/', $string))
+ {
+ it::error(array('title' => utf8_encode("incorrectly utf8-encoded: " . trim($string)), 'skipfiles' => "it_html"));
+ $string = utf8_encode($string);
+ }
+ else if ($string && preg_match('/[\x80-\xff]/', $string) && htmlspecialchars(utf8_decode($string), ENT_COMPAT, "utf-8") !== "")
+ {
+ it::error(array('title' => utf8_encode("doubly utf8-encoded: " . trim($string)), 'skipfiles' => "it_html"));
+ $string = utf8_decode($string);
+ }
+
+ return $string;
+}
+
+
/**
* function div($args...)
* Return a
...
element
@@ -312,16 +330,12 @@ function _tag($name, $args)
else
$result .= " />$newline";
- if ($GLOBALS['debug_utf8check'] && $GLOBALS['it_html']->p['charset'] == "utf-8" && preg_match('/[\x20-\x7f][\x80-\xff][\x20-\x7f]/', $result))
- {
- it::error(array('title' => utf8_encode("incorrectly utf8-encoded: " . trim($result)), 'skipfiles' => "it_html"));
- $result = utf8_encode($result);
- }
+ if ($GLOBALS['debug_utf8check'] && $GLOBALS['it_html']->p['charset'] == "utf-8")
+ $result = self::fix_encoding($result);
return $result;
}
-
/**
* Return a containing optional data.
* @param $name tag name ('style', etc.)
@@ -482,9 +496,13 @@ function latinize($string)
*/
function Q($string)
{
- if (preg_match('/[<>&"\x00-\x08\x0a-\x0c\x0e-\x1f\x80-\xff]/', $origstring = $string)) # WARNING: copy/pasted to _tag()
- if (($string = htmlspecialchars($GLOBALS['it_html']->p['charset'] == "iso-8859-1" ? it_html::latinize($string) : $string, ENT_COMPAT, $GLOBALS['it_html']->p['charset'])) === "" && $GLOBALS['debug_utf8check'])
- it::error(array('title' => utf8_encode("incorrectly utf8-encoded: " . trim($origstring)), 'skipfiles' => "it_html"));
+ if (preg_match('/[<>&"\x00-\x08\x0a-\x0c\x0e-\x1f\x80-\xff]/', $string)) # WARNING: copy/pasted to _tag()
+ {
+ if ($GLOBALS['debug_utf8check'] && $GLOBALS['it_html']->p['charset'] == "utf-8")
+ $string = self::fix_encoding($string);
+
+ $string = htmlspecialchars($GLOBALS['it_html']->p['charset'] == "iso-8859-1" ? it_html::latinize($string) : $string, ENT_COMPAT, $GLOBALS['it_html']->p['charset']);
+ }
return $GLOBALS['debug_q'] && $string ? "$string" : $string;
}
--
cgit v1.2.3