diff options
author | Christian Schneider | 2008-12-08 17:18:12 +0000 |
---|---|---|
committer | Christian Schneider | 2008-12-08 17:18:12 +0000 |
commit | 8c3977ce0f6395eb79be1b2579830b4193ea1605 (patch) | |
tree | 22f59cc61ee445e0e4e15aa0f7616af0e80aea08 | |
parent | 5502f0e89648b4de978d70c5841c340830ec7943 (diff) | |
download | itools-8c3977ce0f6395eb79be1b2579830b4193ea1605.tar.gz itools-8c3977ce0f6395eb79be1b2579830b4193ea1605.tar.bz2 itools-8c3977ce0f6395eb79be1b2579830b4193ea1605.zip |
Fix it_html::sanitize with b/br combination (tag prefix of other tag bug)
-rw-r--r-- | it_html.class | 8 | ||||
-rwxr-xr-x | tests/it_html.t | 6 |
2 files changed, 10 insertions, 4 deletions
diff --git a/it_html.class b/it_html.class index 9debd34..3ccc084 100644 --- a/it_html.class +++ b/it_html.class @@ -378,26 +378,26 @@ function sanitize($html) $urlpattern = 'https?://[^">]+'; $charset = $GLOBALS['it_html']->p['charset'] ? $GLOBALS['it_html']->p['charset'] : 'iso-8859-1'; - if ($tag = it::match("(.*)<(div|p|i|b)[^>]*>(.*?)</\\2>(.*)", $html)) + if ($tag = it::match("(.*)<(div|p|i|b)\b[^>]*>(.*?)</\\2>(.*)", $html)) { # Simple tags with content, no attributes kept list($head, $tagname, $content, $tail) = $tag; $tagname = strtolower($tagname); $result .= it_html::sanitize($head) . "<$tagname>" . it_html::sanitize($content) . "</$tagname>" . it_html::sanitize($tail); } - else if ($tag = it::match('(.*)<a[^>]+?href="(' . $urlpattern . ')"[^>]*?>(.*?)</a>(.*)', $html)) + else if ($tag = it::match('(.*)<a\b[^>]+?href="(' . $urlpattern . ')"[^>]*?>(.*?)</a>(.*)', $html)) { # Link tags, keeps only href attribute list($head, $href, $content, $tail) = $tag; $result .= it_html::sanitize($head) . '<a href="' . it_html::Q(html_entity_decode($href), ENT_COMPAT, $charset) . '">' . it_html::sanitize($content) . "</a>" . it_html::sanitize($tail); } - else if ($tag = it::match('(.*)<img[^>]+?src="(' . $urlpattern . ')"[^>]*?>(.*)', $html)) + else if ($tag = it::match('(.*)<img\b[^>]+?src="(' . $urlpattern . ')"[^>]*?>(.*)', $html)) { # Image tags, keeps only src attribute list($head, $src, $tail) = $tag; $result .= it_html::sanitize($head) . '<img src="' . it_html::Q(html_entity_decode($src, ENT_COMPAT, $charset)) . '" alt="" />' . it_html::sanitize($tail); } - else if ($tag = it::match("(.*)<(br|/tr)[^>]*>(.*)", $html)) + else if ($tag = it::match("(.*)<(br|/tr)\b[^>]*>(.*)", $html)) { # brs and table rows are converted so simple line breaks list($head, $tagname, $tail) = $tag; diff --git a/tests/it_html.t b/tests/it_html.t index 86afbf8..e77a0a1 100755 --- a/tests/it_html.t +++ b/tests/it_html.t @@ -116,6 +116,12 @@ is( ); is( + it_html::sanitize('<b>a<br>b</b>'), + "<b>a<br />b</b>", + 'it_html::sanitize with b and br (tag prefix of other tag bug)' +); + +is( U("/foo.html", array('bar' => array('gna' => 42, 'qux' => array('quux' => "<Zürich>", 'gnöp' => "fasel")))), '/foo.html?bar[gna]=42&bar[qux][quux]=%3CZ%FCrich%3E&bar[qux][gn%F6p]=fasel', 'U() with nested arrays' |