diff options
Diffstat (limited to 'it_url.class')
-rw-r--r-- | it_url.class | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/it_url.class b/it_url.class index c5252aa..aa4f985 100644 --- a/it_url.class +++ b/it_url.class @@ -346,6 +346,51 @@ function get($p=null, $timeout=5) /** + * Get multiple URL in parallel with timeout. Needs to be called statically + * @param $p parameter array with the following keys (same as it_url::get) + * @param $p['urls']: array of urls to get + * @param $p['timeout']: timeout per read in seconds, defaults to 5. (TODO: fractions allowed?) + * @param $p['totaltimeout']: timeout for the whole function call + * @return array of contents of resulting page using same keys as the urls input array, + * considering redirects, excluding headers + */ +function get_multi($p=null) +{ + $p += array('totaltimeout' => "999999", 'timeout' => 5, 'retries' => 1); + $opts = array( + CURLOPT_HEADER => false, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_TIMEOUT => $p['totaltimeout'], + CURLOPT_LOW_SPEED_LIMIT => 5, + CURLOPT_LOW_SPEED_TIME => $p['timeout'], + CURLOPT_FOLLOWLOCATION => true, + ); + $mh = curl_multi_init(); + + foreach ($p['urls'] as $key => $url) + { + $ch[$key] = curl_init(); + curl_setopt($ch[$key], CURLOPT_URL, $url); + curl_setopt_array($ch[$key], $opts); + curl_multi_add_handle($mh, $ch[$key]); + } + + do { + curl_multi_exec($mh, $running); + } while ($running > 0); + + $results = array(); + foreach ($p['urls'] as $key => $url) + { + $results[$key] = curl_multi_getcontent($ch[$key]); + curl_multi_remove_handle($mh, $ch[$key]); + curl_close($ch[$key]); + } + curl_multi_close($mh); + return $results; +} + +/** * Construct a local directory name to cache an URL. Named args: * @param $p['cachedir'] directory to store cache files in, defaults to $ULTRAHOME/var/urlcache * @param $p['id'] If you need more than one type of cache (e.g. different maxage) you can specify an id |