summaryrefslogtreecommitdiff
path: root/it_url.class
diff options
context:
space:
mode:
authorNathan Gass2019-11-21 17:58:10 +0100
committerNathan Gass2019-11-21 17:58:10 +0100
commit3b71ec3ae89cbda5ba55af8f5fea3c62462d3110 (patch)
tree501f08f302b9b3643053773f53d78b75cb0a0feb /it_url.class
parenta396d1753bc91b69cdda777f7382fcf72eac4ada (diff)
downloaditools-3b71ec3ae89cbda5ba55af8f5fea3c62462d3110.tar.gz
itools-3b71ec3ae89cbda5ba55af8f5fea3c62462d3110.tar.bz2
itools-3b71ec3ae89cbda5ba55af8f5fea3c62462d3110.zip
avoid reading all of urls array in memory to support generators
Diffstat (limited to 'it_url.class')
-rw-r--r--it_url.class37
1 files changed, 20 insertions, 17 deletions
diff --git a/it_url.class b/it_url.class
index ef17e53..c3786f1 100644
--- a/it_url.class
+++ b/it_url.class
@@ -310,7 +310,7 @@ function request($p=array())
/**
* Get multiple URL in parallel with timeout. Needs to be called statically
* @param $p parameter array with the following keys (same as it_url::get)
- * @param $p['urls']: array of urls to get
+ * @param $p['urls']: array/generator of urls to get
* @param $p['timeout']: timeout per read in seconds, defaults to 5. (TODO: fractions allowed?)
* @param $p['totaltimeout']: timeout for the whole function call (fractions allowed)
* @param $p['headers']: optional array of HTTP headers to send
@@ -330,14 +330,11 @@ static function get_multi($p=null)
$mh = curl_multi_init();
- $urls = array();
- foreach ($p['urls'] as $key => $url)
- $urls[$key] = is_array($url) ? $url : array('url' => $url);
-
- $keys = $handles = $retries = [];
- $addhandle = function ($key) use (&$keys, &$handles, $urls, $opts, $mh) {
+ $keys = $handles = $urls = $retries = [];
+ $addhandle = function ($key, $url) use (&$keys, &$handles, &$urls, $opts, $mh) {
+ $urls[$key] = $url;
$handle = curl_init();
- curl_setopt($handle, CURLOPT_URL, it::replace([ '^//' => "http://" ], $urls[$key]['url']));
+ curl_setopt($handle, CURLOPT_URL, it::replace([ '^//' => "http://" ], is_array($url) ? $url['url'] : $url));
curl_setopt_array($handle, $opts);
curl_multi_add_handle($mh, $handle);
$keys[$handle] = $key;
@@ -350,10 +347,14 @@ static function get_multi($p=null)
unset($handles[$key]);
};
- $tofetch = array_keys($urls);
- $parallel = $p['parallel'] ?: count($tofetch);
- while (count($handles) < $parallel && $tofetch)
- $addhandle(array_shift($tofetch));
+ if (!$p['noresults'])
+ $keyorder = array_keys($p['urls']);
+
+ reset($p['urls']);
+
+ $parallel = $p['parallel'] ?: PHP_INT_MAX;
+ while (count($handles) < $parallel && ($next = each($p['urls'])))
+ $addhandle($next['key'], $next['value']);
$start = gettimeofday(true);
@@ -383,20 +384,22 @@ static function get_multi($p=null)
if (!$p['noresults'])
$results_unordered[$key] = $content;
- if (($handler = $urls[$key]['handler']))
+ if (is_array($urls[$key]) && ($handler = $urls[$key]['handler']))
$abort = $handler($info['handle'], $content);
+ unset($urls[$key]);
$closehandle($key);
} else if($retries[$key]++ < $p['retries']) {
$closehandle($key); # closehandle must be called before addhandle as we use the same key
- $addhandle($key);
+ $addhandle($key, $urls[$key]);
} else {
$results_unordered[$key] = false;
+ unset($urls[$key]);
$closehandle($key);
}
- if (!$abort && count($handles) < $parallel && $tofetch)
- $addhandle(array_shift($tofetch));
+ if (!$abort && count($handles) < $parallel && ($next = each($p['urls'])))
+ $addhandle($next['key'], $next['value']);
}
}
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
@@ -407,7 +410,7 @@ static function get_multi($p=null)
$closehandle($key);
curl_multi_close($mh);
- return $p['noresults'] ? null : it::filter_keys($results_unordered, array_keys($urls), ['reorder' => true]);
+ return $p['noresults'] ? null : it::filter_keys($results_unordered, $keyorder, ['reorder' => true]);
}
/**