diff options
author | Nathan Gass | 2014-02-19 14:23:20 +0100 |
---|---|---|
committer | Nathan Gass | 2014-02-19 14:23:20 +0100 |
commit | 723d9d7382e37cac06d9c1ebc00ded066ee0810b (patch) | |
tree | 6fe35c8feb6b78f9d5e8863980662ce8ca53acda | |
parent | 98d31bf3d260c1d0aebe88cb35404a0338fe144f (diff) | |
download | itools-723d9d7382e37cac06d9c1ebc00ded066ee0810b.tar.gz itools-723d9d7382e37cac06d9c1ebc00ded066ee0810b.tar.bz2 itools-723d9d7382e37cac06d9c1ebc00ded066ee0810b.zip |
implement and test argument maxlength (for crawler)
-rw-r--r-- | it_url.class | 8 | ||||
-rwxr-xr-x | tests/it_url.t | 16 |
2 files changed, 22 insertions, 2 deletions
diff --git a/it_url.class b/it_url.class index c6c84f2..ba4ef76 100644 --- a/it_url.class +++ b/it_url.class @@ -224,6 +224,7 @@ function is_reachable($timeout = 5) * @param $p['headers']: optional array of HTTP headers to send * @param $p['timeout']: timeout per read in seconds, defaults to 5. fractions allowed * @param $p['totaltimeout']: timeout for the whole function call + * @param $p['maxlength']: maximum length of response * @param $p['filemtime']: Add HTTP header to only fetch when newer than this, otherwise return true instead of data * @param $p['data']: POST data array with key-value pairs * @param $p['retries']: Number of retries if download fails, default 1 @@ -330,7 +331,7 @@ function request($p=array()) { if ($url->headers['Transfer-Encoding'] == "chunked") # Bogus HTTP/1.1 chunked answer from server (e.g. Wordpress/Apache2/PHP5) { - while ($len = hexdec(fgets($fp))) + while (($len = hexdec(fgets($fp))) && (!$p['maxlength'] || strlen($url->data) + $len <= $p['maxlength'])) { $chunk = ""; @@ -342,7 +343,7 @@ function request($p=array()) } else { - while (!feof($fp) && (time() < $endtime)) + while (!feof($fp) && (time() < $endtime) && (!$p['maxlength'] || strlen($url->data) <= $p['maxlength'])) $url->data .= @fread($fp, 20480); } @@ -358,6 +359,9 @@ function request($p=array()) if (time() >= $endtime) $result = false; + if ($p['maxlength'] && (strlen($this->data) + $len > $p['maxlength'])) + $result = false; + return $result; } diff --git a/tests/it_url.t b/tests/it_url.t index 6ece8d8..b3222f7 100755 --- a/tests/it_url.t +++ b/tests/it_url.t @@ -137,6 +137,22 @@ handle_server( handle_server( is( + it_url::get(array('url' => 'http://localhost:8000/', 'maxlength' => 100)), + "Testserver root output", + 'it_url::get() static call with port and maxlength', + ) +); + +handle_server( + is( + it_url::get(array('url' => 'http://localhost:8000/', 'maxlength' => 5)), + false, + 'it_url::get() static call with port and too small maxlength', + ) +); + +handle_server( + is( it_url::get('http://localhost:8000/temp_redirect'), "Testserver output after temporary redirect", 'it_url::get() follows temproary redirect', |