summaryrefslogtreecommitdiff
path: root/it_url.class
diff options
context:
space:
mode:
authorUrban Müller2020-07-30 15:57:02 +0200
committerUrban Müller2020-07-30 15:57:02 +0200
commita8b1474b149e2a12253a31ec85b04f9f3933e92a (patch)
treed643af1c64ecb80425ff9a76fc0769c3973da2dd /it_url.class
parent8df66b1d1b2407b16c9ee465a877c214b69a5da0 (diff)
downloaditools-a8b1474b149e2a12253a31ec85b04f9f3933e92a.tar.gz
itools-a8b1474b149e2a12253a31ec85b04f9f3933e92a.tar.bz2
itools-a8b1474b149e2a12253a31ec85b04f9f3933e92a.zip
dont forget retry on last active url, support fetchsleep
Diffstat (limited to 'it_url.class')
-rw-r--r--it_url.class16
1 files changed, 9 insertions, 7 deletions
diff --git a/it_url.class b/it_url.class
index d700b7e..792a839 100644
--- a/it_url.class
+++ b/it_url.class
@@ -331,12 +331,13 @@ function request($p=array())
/**
* Get multiple URL in parallel with timeout. Needs to be called statically
* @param $p parameter array with the following keys (same as it_url::get)
- * @param $p['urls']: array/generator of urls to get
- * @param $p['timeout']: timeout per read in seconds, defaults to 5. (TODO: fractions allowed?)
- * @param $p['totaltimeout']: timeout for the whole function call (fractions allowed)
- * @param $p['headers']: optional array of HTTP headers to send
- * @param $p['parallel']: max number of parallel requests
- * @param $p['noresults']: do not keep results around
+ * @param $p['urls'] array/generator of urls to get
+ * @param $p['timeout'] timeout per read in seconds, defaults to 5. (TODO: fractions allowed?)
+ * @param $p['totaltimeout'] timeout for the whole function call (fractions allowed)
+ * @param $p['headers'] optional array of HTTP headers to send
+ * @param $p['parallel'] max number of parallel requests
+ * @param $p['noresults'] do not keep results around
+ * @param $p['fetchsleep'] number of seconds to wait after fetch, fractions ok
* @return array of contents (or false for errors like timesou) of resulting page using same
* keys as the urls input array, considering redirects, excluding headers
*/
@@ -393,7 +394,7 @@ static function get_multi($p=null)
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
$timeout = 0.001; # Very short timeout to work around problem with first select call on cURL 7.25.0
- while (!$abort && $active && $mrc == CURLM_OK)
+ while (!$abort && (($active && $mrc == CURLM_OK) || count($handles) > 0))
{
if (curl_multi_select($mh, $timeout) == -1)
usleep($timeout * 1000000);
@@ -404,6 +405,7 @@ static function get_multi($p=null)
{
if ($info['msg'] == CURLMSG_DONE)
{
+ usleep($p['fetchsleep'] * 1000000);
$key = $keys[$info['handle']];
$content = curl_multi_getcontent($info['handle']);
if (isset($p['postprocess']))