From 243879bb340b08b9cc0eee8d988d8025980a390c Mon Sep 17 00:00:00 2001 From: David Flatz Date: Fri, 26 Apr 2024 18:19:42 +0200 Subject: Handle whitespace between attribute name and value; add some TODO tests to be more compliant to specification --- it_html.class | 2 +- test/it_html.t | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/it_html.class b/it_html.class index a4a4ab5..44a2137 100644 --- a/it_html.class +++ b/it_html.class @@ -415,7 +415,7 @@ static function sanitize($html) $tagname = strtolower($tagname); $result .= it_html::sanitize($head) . "<$tagname>" . it_html::sanitize($content) . "" . it_html::sanitize($tail); } - else if ($tag = it::match('(.*)]+?href="(' . $urlpattern . ')"[^>]*?>(.*?)(.*)', $html)) + else if ($tag = it::match('(.*)]+?\bhref\s*=\s*"(' . $urlpattern . ')"[^>]*?>(.*?)(.*)', $html)) { # Link tags, keeps only href attribute list($head, $href, $content, $tail) = $tag; diff --git a/test/it_html.t b/test/it_html.t index e6477da..80753db 100755 --- a/test/it_html.t +++ b/test/it_html.t @@ -253,6 +253,24 @@ is( 'empty tags removal' ); +is( + it_html::sanitize('foo'), + 'foo', + 'it_html::sanitize handle anchors with spaces between attribute name and value' +); + +is( + it_html::sanitize("foo"), + 'foo', + 'TODO it_html::sanitize handle anchors with single quotes at attribute value' +); + +is( + it_html::sanitize("foo"), + 'foo', + 'TODO it_html::sanitize handle anchors with unquoted attribute value' +); + foreach (json_decode(it::file_get_contents(dirname($argv[0]) . '/U_tests.json'), true) as $test) is(U(...$test['args']), $test['exp'], $test['name']); -- cgit v1.2.3