diff options
author | Nathan Gass | 2019-11-21 17:58:10 +0100 |
---|---|---|
committer | Nathan Gass | 2019-11-21 17:58:10 +0100 |
commit | 3b71ec3ae89cbda5ba55af8f5fea3c62462d3110 (patch) | |
tree | 501f08f302b9b3643053773f53d78b75cb0a0feb | |
parent | a396d1753bc91b69cdda777f7382fcf72eac4ada (diff) | |
download | itools-3b71ec3ae89cbda5ba55af8f5fea3c62462d3110.tar.gz itools-3b71ec3ae89cbda5ba55af8f5fea3c62462d3110.tar.bz2 itools-3b71ec3ae89cbda5ba55af8f5fea3c62462d3110.zip |
avoid reading all of urls array in memory to support generators
-rw-r--r-- | it_url.class | 37 |
1 files changed, 20 insertions, 17 deletions
diff --git a/it_url.class b/it_url.class index ef17e53..c3786f1 100644 --- a/it_url.class +++ b/it_url.class @@ -310,7 +310,7 @@ function request($p=array()) /** * Get multiple URL in parallel with timeout. Needs to be called statically * @param $p parameter array with the following keys (same as it_url::get) - * @param $p['urls']: array of urls to get + * @param $p['urls']: array/generator of urls to get * @param $p['timeout']: timeout per read in seconds, defaults to 5. (TODO: fractions allowed?) * @param $p['totaltimeout']: timeout for the whole function call (fractions allowed) * @param $p['headers']: optional array of HTTP headers to send @@ -330,14 +330,11 @@ static function get_multi($p=null) $mh = curl_multi_init(); - $urls = array(); - foreach ($p['urls'] as $key => $url) - $urls[$key] = is_array($url) ? $url : array('url' => $url); - - $keys = $handles = $retries = []; - $addhandle = function ($key) use (&$keys, &$handles, $urls, $opts, $mh) { + $keys = $handles = $urls = $retries = []; + $addhandle = function ($key, $url) use (&$keys, &$handles, &$urls, $opts, $mh) { + $urls[$key] = $url; $handle = curl_init(); - curl_setopt($handle, CURLOPT_URL, it::replace([ '^//' => "http://" ], $urls[$key]['url'])); + curl_setopt($handle, CURLOPT_URL, it::replace([ '^//' => "http://" ], is_array($url) ? $url['url'] : $url)); curl_setopt_array($handle, $opts); curl_multi_add_handle($mh, $handle); $keys[$handle] = $key; @@ -350,10 +347,14 @@ static function get_multi($p=null) unset($handles[$key]); }; - $tofetch = array_keys($urls); - $parallel = $p['parallel'] ?: count($tofetch); - while (count($handles) < $parallel && $tofetch) - $addhandle(array_shift($tofetch)); + if (!$p['noresults']) + $keyorder = array_keys($p['urls']); + + reset($p['urls']); + + $parallel = $p['parallel'] ?: PHP_INT_MAX; + while (count($handles) < $parallel && ($next = each($p['urls']))) + $addhandle($next['key'], $next['value']); $start = gettimeofday(true); @@ -383,20 +384,22 @@ static function get_multi($p=null) if (!$p['noresults']) $results_unordered[$key] = $content; - if (($handler = $urls[$key]['handler'])) + if (is_array($urls[$key]) && ($handler = $urls[$key]['handler'])) $abort = $handler($info['handle'], $content); + unset($urls[$key]); $closehandle($key); } else if($retries[$key]++ < $p['retries']) { $closehandle($key); # closehandle must be called before addhandle as we use the same key - $addhandle($key); + $addhandle($key, $urls[$key]); } else { $results_unordered[$key] = false; + unset($urls[$key]); $closehandle($key); } - if (!$abort && count($handles) < $parallel && $tofetch) - $addhandle(array_shift($tofetch)); + if (!$abort && count($handles) < $parallel && ($next = each($p['urls']))) + $addhandle($next['key'], $next['value']); } } } while ($mrc == CURLM_CALL_MULTI_PERFORM); @@ -407,7 +410,7 @@ static function get_multi($p=null) $closehandle($key); curl_multi_close($mh); - return $p['noresults'] ? null : it::filter_keys($results_unordered, array_keys($urls), ['reorder' => true]); + return $p['noresults'] ? null : it::filter_keys($results_unordered, $keyorder, ['reorder' => true]); } /** |