diff options
author | Urban Müller | 2020-07-30 15:57:02 +0200 |
---|---|---|
committer | Urban Müller | 2020-07-30 15:57:02 +0200 |
commit | a8b1474b149e2a12253a31ec85b04f9f3933e92a (patch) | |
tree | d643af1c64ecb80425ff9a76fc0769c3973da2dd /it_url.class | |
parent | 8df66b1d1b2407b16c9ee465a877c214b69a5da0 (diff) | |
download | itools-a8b1474b149e2a12253a31ec85b04f9f3933e92a.tar.gz itools-a8b1474b149e2a12253a31ec85b04f9f3933e92a.tar.bz2 itools-a8b1474b149e2a12253a31ec85b04f9f3933e92a.zip |
dont forget retry on last active url, support fetchsleep
Diffstat (limited to 'it_url.class')
-rw-r--r-- | it_url.class | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/it_url.class b/it_url.class index d700b7e..792a839 100644 --- a/it_url.class +++ b/it_url.class @@ -331,12 +331,13 @@ function request($p=array()) /** * Get multiple URL in parallel with timeout. Needs to be called statically * @param $p parameter array with the following keys (same as it_url::get) - * @param $p['urls']: array/generator of urls to get - * @param $p['timeout']: timeout per read in seconds, defaults to 5. (TODO: fractions allowed?) - * @param $p['totaltimeout']: timeout for the whole function call (fractions allowed) - * @param $p['headers']: optional array of HTTP headers to send - * @param $p['parallel']: max number of parallel requests - * @param $p['noresults']: do not keep results around + * @param $p['urls'] array/generator of urls to get + * @param $p['timeout'] timeout per read in seconds, defaults to 5. (TODO: fractions allowed?) + * @param $p['totaltimeout'] timeout for the whole function call (fractions allowed) + * @param $p['headers'] optional array of HTTP headers to send + * @param $p['parallel'] max number of parallel requests + * @param $p['noresults'] do not keep results around + * @param $p['fetchsleep'] number of seconds to wait after fetch, fractions ok * @return array of contents (or false for errors like timesou) of resulting page using same * keys as the urls input array, considering redirects, excluding headers */ @@ -393,7 +394,7 @@ static function get_multi($p=null) } while ($mrc == CURLM_CALL_MULTI_PERFORM); $timeout = 0.001; # Very short timeout to work around problem with first select call on cURL 7.25.0 - while (!$abort && $active && $mrc == CURLM_OK) + while (!$abort && (($active && $mrc == CURLM_OK) || count($handles) > 0)) { if (curl_multi_select($mh, $timeout) == -1) usleep($timeout * 1000000); @@ -404,6 +405,7 @@ static function get_multi($p=null) { if ($info['msg'] == CURLMSG_DONE) { + usleep($p['fetchsleep'] * 1000000); $key = $keys[$info['handle']]; $content = curl_multi_getcontent($info['handle']); if (isset($p['postprocess'])) |