diff options
-rw-r--r-- | it_url.class | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/it_url.class b/it_url.class index a08237a..8eb7aa8 100644 --- a/it_url.class +++ b/it_url.class @@ -98,6 +98,7 @@ function is_reachable($timeout = 5) * @param $p['retries'] Number of retries if download fails, default 1 * @param $p['retrysleep'] Number of seconds to wait before retry (additional to fetchsleep), fractions ok * @param $p['compression'] use compression (uses curl to do that) + * @param $p['postprocess'] function called with content and $p which has it_error. returns content or null * @return contents of resulting page, considering redirects, excluding headers, or false on error */ function get($p=null, $timeout=5) @@ -121,6 +122,8 @@ function get($p=null, $timeout=5) $url = new it_url($p['url']); $result = $url->request($p + ['followlocation' => true]); + if ($p['postprocess']) + $result = $p['postprocess']($result, ['it_error' => $p['retries'] > 0 ? false : ['title' => "invalid content from " . $p['url']]]); if (!$result && $p['retries'] > 0 && !it::match('^(4..|204)$', $url->result)) { @@ -383,9 +386,11 @@ static function get_multi($p=null) { $key = $keys[$info['handle']]; $content = curl_multi_getcontent($info['handle']); + if (isset($p['postprocess'])) + $content = $p['postprocess']($content, ['it_error' => $retries[$key] < $p['retries'] ? false : ['title' => "invalid content from " . $urls[$key]]]); EDC('reqtimings', $key, $info['result'], (gettimeofday(true) - $start) * 1000); - if ($info['result'] == CURLE_OK) { + if ($info['result'] == CURLE_OK && $content !== null) { if (!$p['noresults']) $results_unordered[$key] = $content; @@ -397,6 +402,7 @@ static function get_multi($p=null) } else if($retries[$key]++ < $p['retries']) { $closehandle($key); # closehandle must be called before addhandle as we use the same key $addhandle($key, $urls[$key]); + $mrc = CURLM_CALL_MULTI_PERFORM; # force continue if this was last handle } else { $results_unordered[$key] = false; unset($urls[$key]); |