diff options
author | David Flatz | 2024-04-26 18:19:42 +0200 |
---|---|---|
committer | David Flatz | 2024-04-26 18:19:42 +0200 |
commit | 243879bb340b08b9cc0eee8d988d8025980a390c (patch) | |
tree | d5d0becb5d8f0bebda73ea46ff00c8c2e1b6200c | |
parent | c2136d616576a2ff9f36f477870ba32317ac666a (diff) | |
download | itools-243879bb340b08b9cc0eee8d988d8025980a390c.tar.gz itools-243879bb340b08b9cc0eee8d988d8025980a390c.tar.bz2 itools-243879bb340b08b9cc0eee8d988d8025980a390c.zip |
Handle whitespace between attribute name and value; add some TODO tests to be more compliant to specification
-rw-r--r-- | it_html.class | 2 | ||||
-rwxr-xr-x | test/it_html.t | 18 |
2 files changed, 19 insertions, 1 deletions
diff --git a/it_html.class b/it_html.class index a4a4ab5..44a2137 100644 --- a/it_html.class +++ b/it_html.class @@ -415,7 +415,7 @@ static function sanitize($html) $tagname = strtolower($tagname); $result .= it_html::sanitize($head) . "<$tagname>" . it_html::sanitize($content) . "</$tagname>" . it_html::sanitize($tail); } - else if ($tag = it::match('(.*)<a\b[^>]+?href="(' . $urlpattern . ')"[^>]*?>(.*?)</a>(.*)', $html)) + else if ($tag = it::match('(.*)<a\b[^>]+?\bhref\s*=\s*"(' . $urlpattern . ')"[^>]*?>(.*?)</a>(.*)', $html)) { # Link tags, keeps only href attribute list($head, $href, $content, $tail) = $tag; diff --git a/test/it_html.t b/test/it_html.t index e6477da..80753db 100755 --- a/test/it_html.t +++ b/test/it_html.t @@ -253,6 +253,24 @@ is( 'empty tags removal' ); +is( + it_html::sanitize('<a href = "http://search.ch/">foo</a>'), + '<a href="http://search.ch/">foo</a>', + 'it_html::sanitize handle anchors with spaces between attribute name and value' +); + +is( + it_html::sanitize("<a href='http://search.ch/'>foo</a>"), + '<a href="http://search.ch/">foo</a>', + 'TODO it_html::sanitize handle anchors with single quotes at attribute value' +); + +is( + it_html::sanitize("<a href=index.html>foo</a>"), + '<a href="index.html">foo</a>', + 'TODO it_html::sanitize handle anchors with unquoted attribute value' +); + foreach (json_decode(it::file_get_contents(dirname($argv[0]) . '/U_tests.json'), true) as $test) is(U(...$test['args']), $test['exp'], $test['name']); |