From a8b1474b149e2a12253a31ec85b04f9f3933e92a Mon Sep 17 00:00:00 2001 From: Urban Müller Date: Thu, 30 Jul 2020 15:57:02 +0200 Subject: dont forget retry on last active url, support fetchsleep --- it_url.class | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/it_url.class b/it_url.class index d700b7e..792a839 100644 --- a/it_url.class +++ b/it_url.class @@ -331,12 +331,13 @@ function request($p=array()) /** * Get multiple URL in parallel with timeout. Needs to be called statically * @param $p parameter array with the following keys (same as it_url::get) - * @param $p['urls']: array/generator of urls to get - * @param $p['timeout']: timeout per read in seconds, defaults to 5. (TODO: fractions allowed?) - * @param $p['totaltimeout']: timeout for the whole function call (fractions allowed) - * @param $p['headers']: optional array of HTTP headers to send - * @param $p['parallel']: max number of parallel requests - * @param $p['noresults']: do not keep results around + * @param $p['urls'] array/generator of urls to get + * @param $p['timeout'] timeout per read in seconds, defaults to 5. (TODO: fractions allowed?) + * @param $p['totaltimeout'] timeout for the whole function call (fractions allowed) + * @param $p['headers'] optional array of HTTP headers to send + * @param $p['parallel'] max number of parallel requests + * @param $p['noresults'] do not keep results around + * @param $p['fetchsleep'] number of seconds to wait after fetch, fractions ok * @return array of contents (or false for errors like timesou) of resulting page using same * keys as the urls input array, considering redirects, excluding headers */ @@ -393,7 +394,7 @@ static function get_multi($p=null) } while ($mrc == CURLM_CALL_MULTI_PERFORM); $timeout = 0.001; # Very short timeout to work around problem with first select call on cURL 7.25.0 - while (!$abort && $active && $mrc == CURLM_OK) + while (!$abort && (($active && $mrc == CURLM_OK) || count($handles) > 0)) { if (curl_multi_select($mh, $timeout) == -1) usleep($timeout * 1000000); @@ -404,6 +405,7 @@ static function get_multi($p=null) { if ($info['msg'] == CURLMSG_DONE) { + usleep($p['fetchsleep'] * 1000000); $key = $keys[$info['handle']]; $content = curl_multi_getcontent($info['handle']); if (isset($p['postprocess'])) -- cgit v1.2.3