diff options
-rw-r--r-- | it_html.class | 15 | ||||
-rwxr-xr-x | tests/it_html.t | 42 |
2 files changed, 38 insertions, 19 deletions
diff --git a/it_html.class b/it_html.class index 92aa9ba..1aeed94 100644 --- a/it_html.class +++ b/it_html.class @@ -38,7 +38,7 @@ function it_html($p = array()) { # Default configuration of html class $this->p = $p + array( - 'charset' => 'iso-8859-1', + 'charset' => ini_get('default_charset') ?: 'iso-8859-1', 'doctype' => null, # Custom doctype (will usually be calculated from htmltype) 'head' => '', # Code to put into head() section 'htmltype' => 'xhtml', # 'html' (=old-style), 'xhtml' or 'xhtml-mobile' @@ -312,6 +312,12 @@ function _tag($name, $args) else $result .= " />$newline"; + if ($GLOBALS['debug_utf8check'] && $GLOBALS['it_html']->p['charset'] == "utf-8" && preg_match('/[\x20-\x7f][\x80-\xff][\x20-\x7f]/', $result)) + { + it::error(array('title' => utf8_encode("incorrectly utf8-encoded: " . trim($result)), 'skipfiles' => "it_html")); + $result = utf8_encode($result); + } + return $result; } @@ -454,7 +460,7 @@ function sanitize($html) */ function entity_decode($string) { - $string = preg_replace('/&#(8217|65533);/', "'", html_entity_decode($string)); + $string = preg_replace('/&#(8217|65533);/', "'", html_entity_decode($string, ENT_COMPAT, $GLOBALS['it_html']->p['charset'])); $string = preg_replace_callback('/�*([0-9a-f]+);/i', function($m) { return hexdec($m[1]) <= 255 ? chr(hexdec($m[1])) : " "; }, $string); $string = preg_replace_callback('/�*([0-9]+);/', function($m) { return $m[1] <= 255 ? chr($m[1]) : " "; }, $string); @@ -476,8 +482,9 @@ function latinize($string) */ function Q($string) { - if (preg_match('/[<>&"\x00-\x08\x0a-\x0c\x0e-\x1f\x80-\x9f]/', $string)) # WARNING: copy/pasted to _tag() - $string = htmlspecialchars($GLOBALS['it_html']->p['charset'] == "iso-8859-1" ? it_html::latinize($string) : $string, ENT_COMPAT, $GLOBALS['it_html']->p['charset']); + if (preg_match('/[<>&"\x00-\x08\x0a-\x0c\x0e-\x1f\x80-\xff]/', $origstring = $string)) # WARNING: copy/pasted to _tag() + if (($string = htmlspecialchars($GLOBALS['it_html']->p['charset'] == "iso-8859-1" ? it_html::latinize($string) : $string, ENT_COMPAT, $GLOBALS['it_html']->p['charset'])) === "" && $GLOBALS['debug_utf8check']) + it::error(array('title' => utf8_encode("incorrectly utf8-encoded: " . trim($origstring)), 'skipfiles' => "it_html")); return $GLOBALS['debug_q'] && $string ? "<span style='background:#8FF'>$string</span>" : $string; } diff --git a/tests/it_html.t b/tests/it_html.t index 0def431..c955359 100755 --- a/tests/it_html.t +++ b/tests/it_html.t @@ -4,6 +4,7 @@ # Tests for html.class # Traditional html generation +ini_set('default_charset', "utf-8"); new it_html(array('htmltype' => "html")); is( @@ -95,30 +96,22 @@ is( <P><a href="javascript:window.close()" title="Wolken"><img src="http://farm1.static.flickr.com/177/377214376_bcba167a7d_m.jpg" width="240" height="180" alt="Wolken" style="border: 1px solid #ddd;" /></a></p> '), - ' <a href="http://www.flickr.com/people/swisspics%25/">swisspics</a> posted < < ä & yesterday a <i>photo</i> <i>tag missmatch</i>:<br /><br /> <p><img src="http://farm1.static.flickr.com/177/377214376_bcba167a7d_m.jpg" alt="" /></p> ', + ' <a href="http://www.flickr.com/people/swisspics%25/">swisspics</a> posted < < ä & yesterday a <i>photo</i> <i>tag missmatch</i>:<br /><br /> <p><img src="http://farm1.static.flickr.com/177/377214376_bcba167a7d_m.jpg" alt="" /></p> ', 'it_html::sanitize tag soup' ); is( it_html::sanitize('q←x'), - "q←x", + "qâ†x", 'it_html::sanitize preserve numeric entities' ); -it_html::configure(array('charset' => "utf-8")); is( it_html::sanitize('qüx'), "q\xc3\xbcx", 'it_html::sanitize with utf-8' ); -it_html::configure(array('charset' => "iso-8859-1")); -is( - it_html::sanitize('qüx'), - "q\xfcx", - 'it_html::sanitize with latin1' -); - is( it_html::sanitize('<b>a<br>b</b>'), "<b>a<br />b</b>", @@ -126,8 +119,8 @@ is( ); is( - U("/foo.html", array('bar' => array('gna' => 42, 'qux' => array('quux' => "<Zürich>", 'gnöp' => "fasel")))), - '/foo.html?bar[gna]=42&bar[qux][quux]=%3CZ%FCrich%3E&bar[qux][gn%F6p]=fasel', + U("/foo.html", array('bar' => array('gna' => 42, 'qux' => array('quux' => "<Zürich>", 'gnöp' => "fasel")))), + '/foo.html?bar[gna]=42&bar[qux][quux]=%3CZ%C3%BCrich%3E&bar[qux][gn%C3%B6p]=fasel', 'U() with nested arrays' ); @@ -149,11 +142,30 @@ is( 'U() converting of \ to /' ); -is(it_html::entity_decode("ä"), "ä"); -is(it_html::entity_decode("’"), "'"); +is(it_html::entity_decode("ä"), "ä"); is(it_html::entity_decode("J"), "J"); is(it_html::entity_decode("J"), "J"); -is(it_html::entity_decode("࿿"), " "); is(it_html::entity_decode("A"), "A"); + + +# +# check transliterations in iso-8859-1 +# + +it_html::configure(array('charset' => "iso-8859-1")); + +is( + it_html::sanitize('qüx'), + "q\xfcx", + 'it_html::sanitize with latin1' +); + +is( + it_html::sanitize('q←x'), + "q←x", + 'it_html::sanitize preserve non-decodable numeric entities' +); +is(it_html::entity_decode("’"), "'"); +is(it_html::entity_decode("࿿"), " "); is(it_html::entity_decode("ϧ"), " "); ?> |