diff options
author | Urban Müller | 2012-06-29 10:02:29 +0000 |
---|---|---|
committer | Urban Müller | 2012-06-29 10:02:29 +0000 |
commit | 8f988e6449488f2c58f9589399a582804c52d31f (patch) | |
tree | b82b31123401a958d6ddce5c1e55bbedec56e748 | |
parent | 33bd44fd181ee2d4366834bcdadecda0f6907822 (diff) | |
download | itools-8f988e6449488f2c58f9589399a582804c52d31f.tar.gz itools-8f988e6449488f2c58f9589399a582804c52d31f.tar.bz2 itools-8f988e6449488f2c58f9589399a582804c52d31f.zip |
numeric entities already handled (except for illegals, remove those)
-rw-r--r-- | it_html.class | 3 | ||||
-rwxr-xr-x | tests/it_html.t | 1 |
2 files changed, 2 insertions, 2 deletions
diff --git a/it_html.class b/it_html.class index ece7070..bf9f8ff 100644 --- a/it_html.class +++ b/it_html.class @@ -459,8 +459,7 @@ function sanitize($html) function entity_decode($string) { $string = preg_replace('/&#(8217|65533);/', "'", html_entity_decode($string, ENT_COMPAT, $GLOBALS['it_html']->p['charset'])); - $string = preg_replace_callback('/�*([0-9a-f]+);/i', function($m) { return hexdec($m[1]) <= 255 ? chr(hexdec($m[1])) : " "; }, $string); - $string = preg_replace_callback('/�*([0-9]+);/', function($m) { return $m[1] <= 255 ? chr($m[1]) : " "; }, $string); + $string = preg_replace('/&#[^;]*;/i', " ", $string); # remove remaining illegal numeric entities, e.g. 0x80-0x9f return $string; } diff --git a/tests/it_html.t b/tests/it_html.t index 307bc7f..7e43faa 100755 --- a/tests/it_html.t +++ b/tests/it_html.t @@ -169,6 +169,7 @@ is( is(it_html::entity_decode("’"), "'", "it_html::entity_decode numeric decimal entity"); is(it_html::entity_decode("࿿"), " ", "it_html::entity_decode invalid numeric hex entity"); is(it_html::entity_decode("ϧ"), " ", "it_html::entity_decode invalid numeric decimal entity"); +is(it_html::entity_decode("‹"), " ", "it_html::entity_decode entity von 0x80-0x9f"); ?> |