From 25a946b8c2bf6638b7adfe2afa387fa26cb97e71 Mon Sep 17 00:00:00 2001 From: Urban Müller Date: Tue, 28 May 2024 17:49:09 +0200 Subject: Revert "Improve handling of nested tags in it_html::sanitize": getting "Exceeded pcre.backtrack_limit of 1000000 bytes" This reverts commit b484fab88a9229f7c87ea053564d0d8d3d2a565d. --- it_html.class | 10 +++------- test/it_html.t | 12 ------------ 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/it_html.class b/it_html.class index effe920..9780d5d 100644 --- a/it_html.class +++ b/it_html.class @@ -408,15 +408,11 @@ static function sanitize($html) $html = it::replace(array('[\0\s]+' => " "), $html); # \s also matches \r and \n $urlpattern = 'https?://[^">]+'; - if ($tag = it::match('(<(div|p|ol|ul|li|i|b|strong|h[1-6])\b[^>]*>((?:(?!)', $html, ['offset_capture' => 1])) + if ($tag = it::match("(.*?)<(div|p|ol|ul|li|i|b|strong|h[1-6])\b[^>]*>(.*?)(.*)", $html)) { # Simple tags with content, no attributes kept - $offset = $tag[0][1]; - $length = strlen($tag[0][0]); - $head = substr($html, 0, $offset); - $tail = substr($html, $offset + $length); - $content = $tag[2][0]; - $tagname = strtolower($tag[1][0]); + list($head, $tagname, $content, $tail) = $tag; + $tagname = strtolower($tagname); $result .= it_html::sanitize($head) . "<$tagname>" . it_html::sanitize($content) . "" . it_html::sanitize($tail); } else if ($tag = it::match('(.*)]+?\bhref\s*=\s*"(' . $urlpattern . ')"[^>]*?>(.*?)(.*)', $html)) diff --git a/test/it_html.t b/test/it_html.t index 380a779..11e05dd 100755 --- a/test/it_html.t +++ b/test/it_html.t @@ -289,18 +289,6 @@ is( 'TODO it_html::sanitize handle anchors with unquoted attribute value in img' ); -is( - it_html::sanitize(''), - '', - 'Nested unordered lists' -); - -is( - it_html::sanitize('

one one

'), - '

one one

', - 'More nested tags' -); - foreach (json_decode(it::file_get_contents(dirname($argv[0]) . '/U_tests.json'), true) as $test) is(U(...$test['args']), $test['exp'], $test['name']); -- cgit v1.2.3