summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUrban Müller2024-04-18 18:17:42 +0200
committerUrban Müller2024-04-18 18:17:42 +0200
commit2946cad416b247327c033f61fda8270c6771248b (patch)
treee0ad6af34cc7d3c8c11e65dd637cac6284c91d79
parent543b7b80cd17dd633f3bd961a358d509070aeabf (diff)
downloaditools-2946cad416b247327c033f61fda8270c6771248b.tar.gz
itools-2946cad416b247327c033f61fda8270c6771248b.tar.bz2
itools-2946cad416b247327c033f61fda8270c6771248b.zip
introduce empty_on_fail, get keepfailed to work
-rw-r--r--it_url.class19
-rwxr-xr-xtest/it_url.t26
-rw-r--r--test/it_url.testserver.php5
3 files changed, 38 insertions, 12 deletions
diff --git a/it_url.class b/it_url.class
index 3d407eb..da1de16 100644
--- a/it_url.class
+++ b/it_url.class
@@ -109,22 +109,20 @@ static function _postprocess($data, $p)
* @param $p['accept_encoding'] Contents of the "Accept-Encoding: " header. Enables decoding of the response. Set to null to disable, "" (default) for all supported encodings.
* @param $p['postprocess'] function called with content and $p which has it_error. returns content or null (which triggers retry)
* @param $p['protocols'] Array of protocols to accept, defaults to ['http', 'https'], @see curl_opts for other values
+ * @param $p['empty_on_fail'] Return empty page if http status code is >= 400
* @return Content of resulting page (considering redirects, excluding headers or false on error) or array (empty on error) if 'assoc' => true
*/
-static function get($p = [], $timeout = null)
+static function get($p = [])
{
- return (new static)->_get($p, $timeout);
+ return (new static)->_get($p);
}
/**
* Non-static alias for get so we can make get() static
*/
-function _get($p = [], $timeout = null)
+function _get($p = [])
{
- if (isset($timeout))
- it::error("Deprecated second argument of it_url::get()!");
- if (is_string($p))
- $p = array('url' => $p, 'timeout' => 5);
+ $p = is_string($p) ? ['url' => $p, 'timeout' => 5] : $p;
$p += array('retries' => 1);
if (($filter = EDC('req')) && ($filter == 1 || strstr($p['url'], "/$filter.")))
@@ -139,6 +137,7 @@ function _get($p = [], $timeout = null)
$result = $this->request($p + ['followlocation' => true]);
$result = self::_postprocess($result, $p);
+ # FIXME 2024-07 UM some failures never send errs in request() because retries > 0
if ($p['retries'] > 0 && ((!$result && !it::match('^(204|4..)$', $this->result)) || it::match(self::$retryable, $this->result)))
{
usleep($p['retrysleep']*1000000);
@@ -353,6 +352,8 @@ function request($p=array())
}
else
{
+ if ($url->result >= 400 && ($p['empty_on_fail'] || $p['keepfailed']))
+ $got = $url->data = false;
$result =& $url->data;
$this->errstr = "HTTP Status " . $url->result;
}
@@ -576,7 +577,7 @@ static function get_cache_filename($p)
* @param $p['preprocess'] callback function (or array for methods) to change received file or array('function' => ..., 'in' => $src, 'out' => $dst, ...) with callback function plus args
* @param $p['safety'] DEPRECATED. see $p['it_error']
* @param $p['it_error'] parameters for it::error(), false means ignore errors, anything else gets passed to it::error() if errors occur
- * @param $p['keepfailed'] keep old versions of files if download fails (sending alerts conservatively)
+ * @param $p['keepfailed'] keep old versions of files if download fails
* @param $p['returnheaders'] Return array($path, $headers) instead of simply $path
* @param $p['postprocess'] UNSUPPORTED, use ::get_cache_contents
* @param $p['lock'] prevent multiple requests to same url from different processes [true]
@@ -740,7 +741,7 @@ static function _expired($path, $maxage, $randomexpire = 0)
{
if ($result = EDC('nocache') ? false : @filemtime($path))
{
- if (time() - $result > $maxage || rand(0, 100000) <= $randomexpire * 100000)
+ if (time() - $result >= $maxage || rand(0, 100000) <= $randomexpire * 100000)
EDC('getcache', "expired", $maxage, $path);
else
$result = false;
diff --git a/test/it_url.t b/test/it_url.t
index 02fe45c..01de74b 100755
--- a/test/it_url.t
+++ b/test/it_url.t
@@ -1,7 +1,7 @@
#!/www/server/bin/php -qC
<?php
-# Tests for url.class, currently only constructor's parser
+require 'it_url_server.php';
it::getopt("Usage: it_url.t [OPTIONS]");
@@ -131,8 +131,6 @@ is(
);
$_SERVER['PHP_SELF'] = $php_self;
-require 'it_url_server.php';
-
is(it_url::is_reachable('http://www.gna.ch/'), true, "is_reachable('http://www.gna.ch/')");
is(it_url::is_reachable('http://www.search.ch/not_found'), false, "is_reachable('http://www.search.ch/not_found')");
is(it_url::is_reachable('http://bogus.url'), false, "is_reachable('http://bogus.url')");
@@ -259,6 +257,28 @@ if (!ok(
$output = handle_server(
ok(
+ it_url::get(['url' => "http://$host/not_found_with_body", 'empty_on_fail' => false, 'it_error' => false]),
+ 'it_url::get() on 404 with body'
+ )
+);
+
+$output = handle_server(
+ ok(
+ !it_url::get(['url' => "http://$host/not_found_with_body", 'empty_on_fail' => true, 'it_error' => false]),
+ 'it_url::get() on 404 with body and empty_on_fail'
+ )
+);
+
+$output = handle_server(
+ is(
+ it::filter_keys(it_url::get(['url' => "http://$host/not_found_with_body", 'empty_on_fail' => true, 'it_error' => false, 'assoc' => true]), 'status,data'),
+ [],
+ 'it_url::get() on 404 with body, empty_on_fail and assoc'
+ )
+);
+
+$output = handle_server(
+ ok(
!it_url::get(['url' => "http://$host/repeat?num=0", 'retries' => 4]),
'it_url::get() on empty page'
)
diff --git a/test/it_url.testserver.php b/test/it_url.testserver.php
index 978e4af..ca5300c 100644
--- a/test/it_url.testserver.php
+++ b/test/it_url.testserver.php
@@ -80,6 +80,11 @@ switch ($_SERVER['PHP_SELF'])
echo $iserror ? "failure" : "success";
break;
+ case "/not_found_with_body":
+ http_response_code(404);
+ echo 'Testserver 404 output';
+ break;
+
default:
http_response_code(404);
fwrite($stderr, "Unknown path '$_SERVER[PHP_SELF]' not handled!\n");