From a8a336daf2755274f430c88e67d6d2c396706f4c Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Tue, 24 May 2011 13:07:23 +0000 Subject: add it_url::get_multi to fetch multiple urls in parallel --- it_url.class | 45 +++++++++++++++++++++++++++++++++++++++++++++ tests/it_url.t | 6 ++++++ 2 files changed, 51 insertions(+) diff --git a/it_url.class b/it_url.class index c5252aa..aa4f985 100644 --- a/it_url.class +++ b/it_url.class @@ -345,6 +345,51 @@ function get($p=null, $timeout=5) } +/** + * Get multiple URL in parallel with timeout. Needs to be called statically + * @param $p parameter array with the following keys (same as it_url::get) + * @param $p['urls']: array of urls to get + * @param $p['timeout']: timeout per read in seconds, defaults to 5. (TODO: fractions allowed?) + * @param $p['totaltimeout']: timeout for the whole function call + * @return array of contents of resulting page using same keys as the urls input array, + * considering redirects, excluding headers + */ +function get_multi($p=null) +{ + $p += array('totaltimeout' => "999999", 'timeout' => 5, 'retries' => 1); + $opts = array( + CURLOPT_HEADER => false, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_TIMEOUT => $p['totaltimeout'], + CURLOPT_LOW_SPEED_LIMIT => 5, + CURLOPT_LOW_SPEED_TIME => $p['timeout'], + CURLOPT_FOLLOWLOCATION => true, + ); + $mh = curl_multi_init(); + + foreach ($p['urls'] as $key => $url) + { + $ch[$key] = curl_init(); + curl_setopt($ch[$key], CURLOPT_URL, $url); + curl_setopt_array($ch[$key], $opts); + curl_multi_add_handle($mh, $ch[$key]); + } + + do { + curl_multi_exec($mh, $running); + } while ($running > 0); + + $results = array(); + foreach ($p['urls'] as $key => $url) + { + $results[$key] = curl_multi_getcontent($ch[$key]); + curl_multi_remove_handle($mh, $ch[$key]); + curl_close($ch[$key]); + } + curl_multi_close($mh); + return $results; +} + /** * Construct a local directory name to cache an URL. Named args: * @param $p['cachedir'] directory to store cache files in, defaults to $ULTRAHOME/var/urlcache diff --git a/tests/it_url.t b/tests/it_url.t index 4c59004..e8507aa 100755 --- a/tests/it_url.t +++ b/tests/it_url.t @@ -126,4 +126,10 @@ is( '', 'it_url::get() static call' ); + +$pages = it_url::get_multi('urls' => array('a' => 'http://www.gna.ch/', 'b' => 'http://search.ch/')); +ok(it::match('', $pages['a']), 'it_url::get_multi got first url'); +ok(it::match('', $pages['b']), 'it_url::get_multi got second url'); +is(count($pages), 2, 'it_url::get_multi no additional array elements'); + ?> -- cgit v1.2.3