diff options
author | Nathan Gass | 2011-05-24 13:07:23 +0000 |
---|---|---|
committer | Nathan Gass | 2011-05-24 13:07:23 +0000 |
commit | a8a336daf2755274f430c88e67d6d2c396706f4c (patch) | |
tree | 0754a5d02e17c7e2c7efc99f72b8f8b03000d590 | |
parent | 90bbfd2be4fba89e5016a290a5f433aa8204d793 (diff) | |
download | itools-a8a336daf2755274f430c88e67d6d2c396706f4c.tar.gz itools-a8a336daf2755274f430c88e67d6d2c396706f4c.tar.bz2 itools-a8a336daf2755274f430c88e67d6d2c396706f4c.zip |
add it_url::get_multi to fetch multiple urls in parallel
-rw-r--r-- | it_url.class | 45 | ||||
-rwxr-xr-x | tests/it_url.t | 6 |
2 files changed, 51 insertions, 0 deletions
diff --git a/it_url.class b/it_url.class index c5252aa..aa4f985 100644 --- a/it_url.class +++ b/it_url.class @@ -346,6 +346,51 @@ function get($p=null, $timeout=5) /** + * Get multiple URL in parallel with timeout. Needs to be called statically + * @param $p parameter array with the following keys (same as it_url::get) + * @param $p['urls']: array of urls to get + * @param $p['timeout']: timeout per read in seconds, defaults to 5. (TODO: fractions allowed?) + * @param $p['totaltimeout']: timeout for the whole function call + * @return array of contents of resulting page using same keys as the urls input array, + * considering redirects, excluding headers + */ +function get_multi($p=null) +{ + $p += array('totaltimeout' => "999999", 'timeout' => 5, 'retries' => 1); + $opts = array( + CURLOPT_HEADER => false, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_TIMEOUT => $p['totaltimeout'], + CURLOPT_LOW_SPEED_LIMIT => 5, + CURLOPT_LOW_SPEED_TIME => $p['timeout'], + CURLOPT_FOLLOWLOCATION => true, + ); + $mh = curl_multi_init(); + + foreach ($p['urls'] as $key => $url) + { + $ch[$key] = curl_init(); + curl_setopt($ch[$key], CURLOPT_URL, $url); + curl_setopt_array($ch[$key], $opts); + curl_multi_add_handle($mh, $ch[$key]); + } + + do { + curl_multi_exec($mh, $running); + } while ($running > 0); + + $results = array(); + foreach ($p['urls'] as $key => $url) + { + $results[$key] = curl_multi_getcontent($ch[$key]); + curl_multi_remove_handle($mh, $ch[$key]); + curl_close($ch[$key]); + } + curl_multi_close($mh); + return $results; +} + +/** * Construct a local directory name to cache an URL. Named args: * @param $p['cachedir'] directory to store cache files in, defaults to $ULTRAHOME/var/urlcache * @param $p['id'] If you need more than one type of cache (e.g. different maxage) you can specify an id diff --git a/tests/it_url.t b/tests/it_url.t index 4c59004..e8507aa 100755 --- a/tests/it_url.t +++ b/tests/it_url.t @@ -126,4 +126,10 @@ is( '</html>', 'it_url::get() static call' ); + +$pages = it_url::get_multi('urls' => array('a' => 'http://www.gna.ch/', 'b' => 'http://search.ch/')); +ok(it::match('</html>', $pages['a']), 'it_url::get_multi got first url'); +ok(it::match('</html>', $pages['b']), 'it_url::get_multi got second url'); +is(count($pages), 2, 'it_url::get_multi no additional array elements'); + ?> |