summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Flatz2024-04-26 18:19:42 +0200
committerDavid Flatz2024-04-26 18:19:42 +0200
commit243879bb340b08b9cc0eee8d988d8025980a390c (patch)
treed5d0becb5d8f0bebda73ea46ff00c8c2e1b6200c
parentc2136d616576a2ff9f36f477870ba32317ac666a (diff)
downloaditools-243879bb340b08b9cc0eee8d988d8025980a390c.tar.gz
itools-243879bb340b08b9cc0eee8d988d8025980a390c.tar.bz2
itools-243879bb340b08b9cc0eee8d988d8025980a390c.zip
Handle whitespace between attribute name and value; add some TODO tests to be more compliant to specification
-rw-r--r--it_html.class2
-rwxr-xr-xtest/it_html.t18
2 files changed, 19 insertions, 1 deletions
diff --git a/it_html.class b/it_html.class
index a4a4ab5..44a2137 100644
--- a/it_html.class
+++ b/it_html.class
@@ -415,7 +415,7 @@ static function sanitize($html)
$tagname = strtolower($tagname);
$result .= it_html::sanitize($head) . "<$tagname>" . it_html::sanitize($content) . "</$tagname>" . it_html::sanitize($tail);
}
- else if ($tag = it::match('(.*)<a\b[^>]+?href="(' . $urlpattern . ')"[^>]*?>(.*?)</a>(.*)', $html))
+ else if ($tag = it::match('(.*)<a\b[^>]+?\bhref\s*=\s*"(' . $urlpattern . ')"[^>]*?>(.*?)</a>(.*)', $html))
{
# Link tags, keeps only href attribute
list($head, $href, $content, $tail) = $tag;
diff --git a/test/it_html.t b/test/it_html.t
index e6477da..80753db 100755
--- a/test/it_html.t
+++ b/test/it_html.t
@@ -253,6 +253,24 @@ is(
'empty tags removal'
);
+is(
+ it_html::sanitize('<a href = "http://search.ch/">foo</a>'),
+ '<a href="http://search.ch/">foo</a>',
+ 'it_html::sanitize handle anchors with spaces between attribute name and value'
+);
+
+is(
+ it_html::sanitize("<a href='http://search.ch/'>foo</a>"),
+ '<a href="http://search.ch/">foo</a>',
+ 'TODO it_html::sanitize handle anchors with single quotes at attribute value'
+);
+
+is(
+ it_html::sanitize("<a href=index.html>foo</a>"),
+ '<a href="index.html">foo</a>',
+ 'TODO it_html::sanitize handle anchors with unquoted attribute value'
+);
+
foreach (json_decode(it::file_get_contents(dirname($argv[0]) . '/U_tests.json'), true) as $test)
is(U(...$test['args']), $test['exp'], $test['name']);