From 243879bb340b08b9cc0eee8d988d8025980a390c Mon Sep 17 00:00:00 2001
From: David Flatz
Date: Fri, 26 Apr 2024 18:19:42 +0200
Subject: Handle whitespace between attribute name and value; add some TODO
 tests to be more compliant to specification
---
 it_html.class  |  2 +-
 test/it_html.t | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/it_html.class b/it_html.class
index a4a4ab5..44a2137 100644
--- a/it_html.class
+++ b/it_html.class
@@ -415,7 +415,7 @@ static function sanitize($html)
 		$tagname = strtolower($tagname);
 		$result .= it_html::sanitize($head) . "<$tagname>" . it_html::sanitize($content) . "$tagname>" . it_html::sanitize($tail);
 	}
-	else if ($tag = it::match('(.*)]+?href="(' . $urlpattern . ')"[^>]*?>(.*?)(.*)', $html))
+	else if ($tag = it::match('(.*)]+?\bhref\s*=\s*"(' . $urlpattern . ')"[^>]*?>(.*?)(.*)', $html))
 	{
 		# Link tags, keeps only href attribute
 		list($head, $href, $content, $tail) = $tag;
diff --git a/test/it_html.t b/test/it_html.t
index e6477da..80753db 100755
--- a/test/it_html.t
+++ b/test/it_html.t
@@ -253,6 +253,24 @@ is(
 	'empty tags removal'
 );
 
+is(
+	it_html::sanitize('foo'),
+	'foo',
+	'it_html::sanitize handle anchors with spaces between attribute name and value'
+);
+
+is(
+	it_html::sanitize("foo"),
+	'foo',
+	'TODO it_html::sanitize handle anchors with single quotes at attribute value'
+);
+
+is(
+	it_html::sanitize("foo"),
+	'foo',
+	'TODO it_html::sanitize handle anchors with unquoted attribute value'
+);
+
 foreach (json_decode(it::file_get_contents(dirname($argv[0]) . '/U_tests.json'), true) as $test)
 	is(U(...$test['args']), $test['exp'], $test['name']);
 
-- 
cgit v1.2.3