From 723d9d7382e37cac06d9c1ebc00ded066ee0810b Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Wed, 19 Feb 2014 14:23:20 +0100 Subject: implement and test argument maxlength (for crawler) --- it_url.class | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'it_url.class') diff --git a/it_url.class b/it_url.class index c6c84f2..ba4ef76 100644 --- a/it_url.class +++ b/it_url.class @@ -224,6 +224,7 @@ function is_reachable($timeout = 5) * @param $p['headers']: optional array of HTTP headers to send * @param $p['timeout']: timeout per read in seconds, defaults to 5. fractions allowed * @param $p['totaltimeout']: timeout for the whole function call + * @param $p['maxlength']: maximum length of response * @param $p['filemtime']: Add HTTP header to only fetch when newer than this, otherwise return true instead of data * @param $p['data']: POST data array with key-value pairs * @param $p['retries']: Number of retries if download fails, default 1 @@ -330,7 +331,7 @@ function request($p=array()) { if ($url->headers['Transfer-Encoding'] == "chunked") # Bogus HTTP/1.1 chunked answer from server (e.g. Wordpress/Apache2/PHP5) { - while ($len = hexdec(fgets($fp))) + while (($len = hexdec(fgets($fp))) && (!$p['maxlength'] || strlen($url->data) + $len <= $p['maxlength'])) { $chunk = ""; @@ -342,7 +343,7 @@ function request($p=array()) } else { - while (!feof($fp) && (time() < $endtime)) + while (!feof($fp) && (time() < $endtime) && (!$p['maxlength'] || strlen($url->data) <= $p['maxlength'])) $url->data .= @fread($fp, 20480); } @@ -358,6 +359,9 @@ function request($p=array()) if (time() >= $endtime) $result = false; + if ($p['maxlength'] && (strlen($this->data) + $len > $p['maxlength'])) + $result = false; + return $result; } -- cgit v1.2.3