From 723d9d7382e37cac06d9c1ebc00ded066ee0810b Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Wed, 19 Feb 2014 14:23:20 +0100 Subject: implement and test argument maxlength (for crawler) --- it_url.class | 8 ++++++-- tests/it_url.t | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/it_url.class b/it_url.class index c6c84f2..ba4ef76 100644 --- a/it_url.class +++ b/it_url.class @@ -224,6 +224,7 @@ function is_reachable($timeout = 5) * @param $p['headers']: optional array of HTTP headers to send * @param $p['timeout']: timeout per read in seconds, defaults to 5. fractions allowed * @param $p['totaltimeout']: timeout for the whole function call + * @param $p['maxlength']: maximum length of response * @param $p['filemtime']: Add HTTP header to only fetch when newer than this, otherwise return true instead of data * @param $p['data']: POST data array with key-value pairs * @param $p['retries']: Number of retries if download fails, default 1 @@ -330,7 +331,7 @@ function request($p=array()) { if ($url->headers['Transfer-Encoding'] == "chunked") # Bogus HTTP/1.1 chunked answer from server (e.g. Wordpress/Apache2/PHP5) { - while ($len = hexdec(fgets($fp))) + while (($len = hexdec(fgets($fp))) && (!$p['maxlength'] || strlen($url->data) + $len <= $p['maxlength'])) { $chunk = ""; @@ -342,7 +343,7 @@ function request($p=array()) } else { - while (!feof($fp) && (time() < $endtime)) + while (!feof($fp) && (time() < $endtime) && (!$p['maxlength'] || strlen($url->data) <= $p['maxlength'])) $url->data .= @fread($fp, 20480); } @@ -358,6 +359,9 @@ function request($p=array()) if (time() >= $endtime) $result = false; + if ($p['maxlength'] && (strlen($this->data) + $len > $p['maxlength'])) + $result = false; + return $result; } diff --git a/tests/it_url.t b/tests/it_url.t index 6ece8d8..b3222f7 100755 --- a/tests/it_url.t +++ b/tests/it_url.t @@ -135,6 +135,22 @@ handle_server( ) ); +handle_server( + is( + it_url::get(array('url' => 'http://localhost:8000/', 'maxlength' => 100)), + "Testserver root output", + 'it_url::get() static call with port and maxlength', + ) +); + +handle_server( + is( + it_url::get(array('url' => 'http://localhost:8000/', 'maxlength' => 5)), + false, + 'it_url::get() static call with port and too small maxlength', + ) +); + handle_server( is( it_url::get('http://localhost:8000/temp_redirect'), -- cgit v1.2.3