diff options
Diffstat (limited to 'it_html.class')
-rw-r--r-- | it_html.class | 38 |
1 files changed, 31 insertions, 7 deletions
diff --git a/it_html.class b/it_html.class index 92aa9ba..4c9f3e7 100644 --- a/it_html.class +++ b/it_html.class @@ -38,7 +38,7 @@ function it_html($p = array()) { # Default configuration of html class $this->p = $p + array( - 'charset' => 'iso-8859-1', + 'charset' => ini_get('default_charset') ?: 'iso-8859-1', 'doctype' => null, # Custom doctype (will usually be calculated from htmltype) 'head' => '', # Code to put into head() section 'htmltype' => 'xhtml', # 'html' (=old-style), 'xhtml' or 'xhtml-mobile' @@ -48,7 +48,7 @@ function it_html($p = array()) 'name' => 'it_html', # Name of global variable $this is assigned to (string), XXX Copy and paste in configure() to keep PHP4 compatibility 'nonewlinetags' => 'a,b,em,img,input,label,span,noscript', # tags that do not like newlines after them 'notexported' => 'configure,sanitize',# Those methods are not exported - 'prettyprint' => false, # Should output be prettily indented? + 'prettyprint' => it::is_devel(), # Should output be prettily indented? 'show_boot_dom' => false, # If true, append invisible <div id="it_boot_dom"> at the end of body 'show_content_type' => true, # If true, add <meta http-equiv="Content-Type" ...> header 'show_favicon' => true, # If true, add <link> tag to /favicon.ico if it exists @@ -258,6 +258,21 @@ function _parse_args($args) } +# internal +function fix_encoding($string, $silent = false) +{ + if (grapheme_strlen($string) === null) + list($string, $error) = array(utf8_encode($string), utf8_encode("incorrectly utf8-encoded: " . trim($string))); + else if ($string && preg_match('/[\x80-\xff]/', $string) && grapheme_strlen(utf8_decode($string)) !== null && utf8_encode(utf8_decode($string)) === $string) + list($string, $error) = array(utf8_decode($string), utf8_encode("doubly utf8-encoded: " . trim($string))); + + if ($error && !$silent) + it::error(array('title' => $error, 'skipfiles' => "it_html")); + + return $string; +} + + /** * function div($args...) * Return a <div>...</div> element @@ -312,10 +327,12 @@ function _tag($name, $args) else $result .= " />$newline"; + if ($GLOBALS['debug_utf8check'] && $GLOBALS['it_html']->p['charset'] == "utf-8") + $result = self::fix_encoding($result); + return $result; } - /** * Return a <tag> containing optional data. * @param $name tag name ('style', etc.) @@ -414,9 +431,11 @@ function _strip_tags($html) function sanitize($html) { $result = ""; + $charset = $GLOBALS['it_html']->p['charset'] ? $GLOBALS['it_html']->p['charset'] : 'iso-8859-1'; + if ($charset == "utf-8") + $html = it::any2utf8($html); $html = it::replace(array('[\0\s]+' => " "), $html); # \s also matches \r and \n $urlpattern = 'https?://[^">]+'; - $charset = $GLOBALS['it_html']->p['charset'] ? $GLOBALS['it_html']->p['charset'] : 'iso-8859-1'; if ($tag = it::match("(.*)<(div|p|i|b)\b[^>]*>(.*?)</\\2>(.*)", $html)) { @@ -450,11 +469,11 @@ function sanitize($html) } /** - * Decode all entities, ensure latin-1 encoding + * Decode all entities to encoding set for it_html */ function entity_decode($string) { - $string = preg_replace('/&#(8217|65533);/', "'", html_entity_decode($string)); + $string = preg_replace('/&#(8217|65533);/', "'", html_entity_decode($string, ENT_COMPAT, $GLOBALS['it_html']->p['charset'])); $string = preg_replace_callback('/�*([0-9a-f]+);/i', function($m) { return hexdec($m[1]) <= 255 ? chr(hexdec($m[1])) : " "; }, $string); $string = preg_replace_callback('/�*([0-9]+);/', function($m) { return $m[1] <= 255 ? chr($m[1]) : " "; }, $string); @@ -476,8 +495,13 @@ function latinize($string) */ function Q($string) { - if (preg_match('/[<>&"\x00-\x08\x0a-\x0c\x0e-\x1f\x80-\x9f]/', $string)) # WARNING: copy/pasted to _tag() + if (preg_match('/[<>&"\x00-\x08\x0a-\x0c\x0e-\x1f\x80-\xff]/', $string)) # WARNING: copy/pasted to _tag() + { + if ($GLOBALS['debug_utf8check'] && $GLOBALS['it_html']->p['charset'] == "utf-8") + $string = self::fix_encoding($string); + $string = htmlspecialchars($GLOBALS['it_html']->p['charset'] == "iso-8859-1" ? it_html::latinize($string) : $string, ENT_COMPAT, $GLOBALS['it_html']->p['charset']); + } return $GLOBALS['debug_q'] && $string ? "<span style='background:#8FF'>$string</span>" : $string; } |