From 48291d1064105f40bd72e5ede776d94c25178e32 Mon Sep 17 00:00:00 2001 From: Urban Müller Date: Wed, 26 Feb 2025 18:04:24 +0100 Subject: decode all html entities including ' --- it_html.class | 2 +- test/it_html.t | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/it_html.class b/it_html.class index e9c1c67..e3053b6 100644 --- a/it_html.class +++ b/it_html.class @@ -432,7 +432,7 @@ static function sanitize($html) static function entity_decode($string) { $charset = $GLOBALS['it_html']->p['charset']; - $string = preg_replace('/&#(8217|65533);/', "'", html_entity_decode($string, ENT_COMPAT, $charset)); + $string = preg_replace('/&#(8217|65533);/', "'", html_entity_decode($string, ENT_QUOTES | ENT_HTML5, $charset)); $string = preg_replace('/&#[^;]*;/i', " ", $string); # remove remaining illegal numeric entities, e.g. 0x80-0x9f return self::_cleanup($string, $charset); diff --git a/test/it_html.t b/test/it_html.t index 11e05dd..19b892c 100755 --- a/test/it_html.t +++ b/test/it_html.t @@ -296,6 +296,11 @@ is(it_html::entity_decode("ä"), "ä"); is(it_html::entity_decode("J"), "J"); is(it_html::entity_decode("J"), "J"); is(it_html::entity_decode("A"), "A"); +is(it_html::entity_decode('"'), '"'); +is(it_html::entity_decode('''), "'"); +is(it_html::entity_decode('<'), "<"); +is(it_html::entity_decode('>'), ">"); +is(it_html::entity_decode('&'), "&"); # tests for itools extension is(table(null), "
\n", "table() null argument"); -- cgit v1.2.3