From 8c3977ce0f6395eb79be1b2579830b4193ea1605 Mon Sep 17 00:00:00 2001
From: Christian Schneider
Date: Mon, 8 Dec 2008 17:18:12 +0000
Subject: Fix it_html::sanitize with b/br combination (tag prefix of other tag
 bug)
---
 it_html.class   | 8 ++++----
 tests/it_html.t | 6 ++++++
 2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/it_html.class b/it_html.class
index 9debd34..3ccc084 100644
--- a/it_html.class
+++ b/it_html.class
@@ -378,26 +378,26 @@ function sanitize($html)
 	$urlpattern = 'https?://[^">]+';
 	$charset = $GLOBALS['it_html']->p['charset'] ? $GLOBALS['it_html']->p['charset'] : 'iso-8859-1';
 
-	if ($tag = it::match("(.*)<(div|p|i|b)[^>]*>(.*?)\\2>(.*)", $html))
+	if ($tag = it::match("(.*)<(div|p|i|b)\b[^>]*>(.*?)\\2>(.*)", $html))
 	{
 		# Simple tags with content, no attributes kept
 		list($head, $tagname, $content, $tail) = $tag;
 		$tagname = strtolower($tagname);
 		$result .= it_html::sanitize($head) . "<$tagname>" . it_html::sanitize($content) . "$tagname>" . it_html::sanitize($tail);
 	}
-	else if ($tag = it::match('(.*)]+?href="(' . $urlpattern . ')"[^>]*?>(.*?)(.*)', $html))
+	else if ($tag = it::match('(.*)]+?href="(' . $urlpattern . ')"[^>]*?>(.*?)(.*)', $html))
 	{
 		# Link tags, keeps only href attribute
 		list($head, $href, $content, $tail) = $tag;
 		$result .= it_html::sanitize($head) . '' . it_html::sanitize($content) . "" . it_html::sanitize($tail);
 	}
-	else if ($tag = it::match('(.*)
]+?src="(' . $urlpattern . ')"[^>]*?>(.*)', $html))
+	else if ($tag = it::match('(.*)
]+?src="(' . $urlpattern . ')"[^>]*?>(.*)', $html))
 	{
 		# Image tags, keeps only src attribute
 		list($head, $src, $tail) = $tag;
 		$result .= it_html::sanitize($head) . '
' . it_html::sanitize($tail);
 	}
-	else if ($tag = it::match("(.*)<(br|/tr)[^>]*>(.*)", $html))
+	else if ($tag = it::match("(.*)<(br|/tr)\b[^>]*>(.*)", $html))
 	{
 		# brs and table rows are converted so simple line breaks
 		list($head, $tagname, $tail) = $tag;
diff --git a/tests/it_html.t b/tests/it_html.t
index 86afbf8..e77a0a1 100755
--- a/tests/it_html.t
+++ b/tests/it_html.t
@@ -115,6 +115,12 @@ is(
 	'it_html::sanitize with latin1'
 );
 
+is(
+	it_html::sanitize('a
b'),
+	 "a
b",
+	'it_html::sanitize with b and br (tag prefix of other tag bug)'
+);
+
 is(
 	U("/foo.html", array('bar' => array('gna' => 42, 'qux' => array('quux' => "", 'gnöp' => "fasel")))),
 	'/foo.html?bar[gna]=42&bar[qux][quux]=%3CZ%FCrich%3E&bar[qux][gn%F6p]=fasel',
-- 
cgit v1.2.3