2 files changed, 18 insertions, 5 deletions
diff --git a/it_html.class b/it_html.class
index e3053b6..9bb8e7a 100644
--- a/it_html.class
+++ b/it_html.class
@@ -393,32 +393,33 @@ static function sanitize($html)
 	if ($charset == "utf-8")
 		$html = it::any2utf8($html);
 	$html = it::replace(array('[\0\s]+' => " "), $html);	# \s also matches \r and \n
-	$urlpattern = 'https?://[^">]+';
+	$urlpattern = '(?:https?://|mailto:)[^">]+';
+	$placeholder = bin2hex(random_bytes(16));
 
 	if ($tag = it::match("(.*?)<(div|p|ol|ul|li|i|b|strong|h[1-6])\b[^>]*>(.*?)</\\2>(.*)", $html))
 	{
 		# Simple tags with content, no attributes kept
 		list($head, $tagname, $content, $tail) = $tag;
 		$tagname = strtolower($tagname);
-		$result .= it_html::sanitize($head) . "<$tagname>" . it_html::sanitize($content) . "</$tagname>" . it_html::sanitize($tail);
+		$result .= it::replace([$placeholder => "<$tagname>" . it_html::sanitize($content) . "</$tagname>"], it_html::sanitize("$head$placeholder$tail"));
 	}
 	else if ($tag = it::match('(.*)<a\b[^>]+?\bhref\s*=\s*"(' . $urlpattern . ')"[^>]*?>(.*?)</a>(.*)', $html))
 	{
 		# Link tags, keeps only href attribute
 		list($head, $href, $content, $tail) = $tag;
-		$result .= it_html::sanitize($head) . '<a href="' . it_html::Q(it_html::U(html_entity_decode($href, ENT_COMPAT, $charset))) . '">' . it_html::sanitize($content) . "</a>" . it_html::sanitize($tail);
+		$result .= it::replace([$placeholder => '<a href="' . it_html::Q(it_html::U(html_entity_decode($href, ENT_COMPAT, $charset))) . '">' . it_html::sanitize($content) . "</a>"], it_html::sanitize("$head$placeholder$tail"));
 	}
 	else if ($tag = it::match('(.*)<img\b[^>]+?\bsrc\s*=\s*"(' . $urlpattern . ')"[^>]*?>(.*)', $html))
 	{
 		# Image tags, keeps only src attribute
 		list($head, $src, $tail) = $tag;
-		$result .= it_html::sanitize($head) . '<img src="' . it_html::Q(it_html::U(html_entity_decode($src, ENT_COMPAT, $charset))) . '" alt="" />' . it_html::sanitize($tail);
+		$result .= it::replace([$placeholder => '<img src="' . it_html::Q(it_html::U(html_entity_decode($src, ENT_COMPAT, $charset))) . '" alt="" />'], it_html::sanitize("$head$placeholder$tail"));
 	}
 	else if ($tag = it::match("(.*)<(br|/tr)\b[^>]*>(.*)", $html))
 	{
 		# brs and table rows are converted so simple line breaks
 		list($head, $tagname, $tail) = $tag;
-		$result .= it_html::sanitize($head) . "<br />" . it_html::sanitize($tail);
+		$result .= it::replace([$placeholder => "<br />"], it_html::sanitize("$head$placeholder$tail"));
 	}
 	else
 		$result = it::replace(array('&amp;(#\d+;)' => '&$1'), it_html::Q(html_entity_decode(strip_tags($html), ENT_COMPAT, $charset)));
diff --git a/test/it_html.t b/test/it_html.t
index 15f444d..20ab65f 100755
--- a/test/it_html.t
+++ b/test/it_html.t
@@ -260,6 +260,18 @@ is(
 );
 
 is(
+	it_html::sanitize('<a href="http://search.ch/"><strong>foo</strong></a>'),
+	'<a href="http://search.ch/"><strong>foo</strong></a>',
+	'it_html::sanitize handle nesting of tags inside <a>'
+);
+
+is(
+	it_html::sanitize('<a href="mailto:neuman@example.com">foo</a>'),
+	'<a href="mailto:neuman@example.com">foo</a>',
+	'it_html::sanitize handle mailto links'
+);
+
+is(
 	it_html::sanitize("<a href='http://search.ch/'>foo</a>"),
 	'<a href="http://search.ch/">foo</a>',
 	'TODO it_html::sanitize handle anchors with single quotes at attribute value'