summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Gass2011-05-24 13:07:23 +0000
committerNathan Gass2011-05-24 13:07:23 +0000
commita8a336daf2755274f430c88e67d6d2c396706f4c (patch)
tree0754a5d02e17c7e2c7efc99f72b8f8b03000d590
parent90bbfd2be4fba89e5016a290a5f433aa8204d793 (diff)
downloaditools-a8a336daf2755274f430c88e67d6d2c396706f4c.tar.gz
itools-a8a336daf2755274f430c88e67d6d2c396706f4c.tar.bz2
itools-a8a336daf2755274f430c88e67d6d2c396706f4c.zip
add it_url::get_multi to fetch multiple urls in parallel
-rw-r--r--it_url.class45
-rwxr-xr-xtests/it_url.t6
2 files changed, 51 insertions, 0 deletions
diff --git a/it_url.class b/it_url.class
index c5252aa..aa4f985 100644
--- a/it_url.class
+++ b/it_url.class
@@ -346,6 +346,51 @@ function get($p=null, $timeout=5)
/**
+ * Get multiple URL in parallel with timeout. Needs to be called statically
+ * @param $p parameter array with the following keys (same as it_url::get)
+ * @param $p['urls']: array of urls to get
+ * @param $p['timeout']: timeout per read in seconds, defaults to 5. (TODO: fractions allowed?)
+ * @param $p['totaltimeout']: timeout for the whole function call
+ * @return array of contents of resulting page using same keys as the urls input array,
+ * considering redirects, excluding headers
+ */
+function get_multi($p=null)
+{
+ $p += array('totaltimeout' => "999999", 'timeout' => 5, 'retries' => 1);
+ $opts = array(
+ CURLOPT_HEADER => false,
+ CURLOPT_RETURNTRANSFER => true,
+ CURLOPT_TIMEOUT => $p['totaltimeout'],
+ CURLOPT_LOW_SPEED_LIMIT => 5,
+ CURLOPT_LOW_SPEED_TIME => $p['timeout'],
+ CURLOPT_FOLLOWLOCATION => true,
+ );
+ $mh = curl_multi_init();
+
+ foreach ($p['urls'] as $key => $url)
+ {
+ $ch[$key] = curl_init();
+ curl_setopt($ch[$key], CURLOPT_URL, $url);
+ curl_setopt_array($ch[$key], $opts);
+ curl_multi_add_handle($mh, $ch[$key]);
+ }
+
+ do {
+ curl_multi_exec($mh, $running);
+ } while ($running > 0);
+
+ $results = array();
+ foreach ($p['urls'] as $key => $url)
+ {
+ $results[$key] = curl_multi_getcontent($ch[$key]);
+ curl_multi_remove_handle($mh, $ch[$key]);
+ curl_close($ch[$key]);
+ }
+ curl_multi_close($mh);
+ return $results;
+}
+
+/**
* Construct a local directory name to cache an URL. Named args:
* @param $p['cachedir'] directory to store cache files in, defaults to $ULTRAHOME/var/urlcache
* @param $p['id'] If you need more than one type of cache (e.g. different maxage) you can specify an id
diff --git a/tests/it_url.t b/tests/it_url.t
index 4c59004..e8507aa 100755
--- a/tests/it_url.t
+++ b/tests/it_url.t
@@ -126,4 +126,10 @@ is(
'</html>',
'it_url::get() static call'
);
+
+$pages = it_url::get_multi('urls' => array('a' => 'http://www.gna.ch/', 'b' => 'http://search.ch/'));
+ok(it::match('</html>', $pages['a']), 'it_url::get_multi got first url');
+ok(it::match('</html>', $pages['b']), 'it_url::get_multi got second url');
+is(count($pages), 2, 'it_url::get_multi no additional array elements');
+
?>