Class it_url:
/**
* Store contents of url in a file and return file name. Threadsafe: Provides locking. Called statically.
* Requires webserver writeable directory in $p['cachdedir']. Sends it::error on fails by default. Params:
* @param $p['url'] url to get
* @param $p['id'] dirname for cached files; same id should have same expire policy
* @param $p['headers'] optional array of HTTP headers to send
* @param $p['cachedir'] directory to store cache files in, @see get_cache_dir
* @param $p['timeout'] timeout in seconds, default 10. fractions allowed
* @param $p['maxage'] maximum age of cache entries in seconds, default 23 hours. id mandatory if given
* @param $p['randomexpire'] chance to randomly expunge an entry, 0..1
* @param $p['cleanbefore'] maximum seconds since midnight when initiating expire, default 10800
* @param $p['preprocess'] callback function (or array for methods) to change received file or array('function' => ..., 'in' => $src, 'out' => $dst, ...) with callback function plus args
* @param $p['safety'] DEPRECATED. see $p['it_error']
* @param $p['it_error'] parameters for it::error(), false means ignore errors, anything else gets passed to it::error() if errors occur
* @param $p['keepfailed'] keep old versions of files if download fails
* @param $p['returnheaders'] Return array($path, $headers) instead of simply $path
* @param $p['postprocess'] UNSUPPORTED, use ::get_cache_contents
* @param $p['lock'] prevent multiple requests to same url from different processes [true]
* @return Cache filename or false if fetch failed
*/
static function get_cache($p = array())
{
if (!$p['id'] && $p['maxage'])
it::error("calling get_cache with maxage and without id");
$p += ['timeout' => 10, 'maxage' => 23 * 3600, 'cleanbefore' => 10800, 'lock' => true, 'it_error' => $p['safety'] === 0 ? false : ($p['safety'] == 2 ? ['fatal' => true] : [])];
$p['totaltimeout'] = $p['timeout'];
$path = it_url::get_cache_filename($p); # Must be before changing cachedir below
$p['cachedir'] = it_url::get_cache_dir($p);
@mkdir($p['cachedir']);
@mkdir(dirname($path));
if (!is_writable(dirname($path)))
it::error("parent dir not writable: " . trim(it::exec('ls -ld {dir} 2>&1', ['dir' => dirname($path)])));
if (($filemtime = it_url::_expired($path, $p['maxage'], $p['randomexpire'])) || ($p['returnheaders'] && !file_exists(("$path.json")))) # Outdated(non-zero int) or non-existant(true)?
{
$fileexists = $filemtime !== true;
if ($lock = !$p['lock'] ?: it_url::_lock($path))
{
# Touch existing file to prevent locking other getters while refreshing
if ($fileexists)
touch($path);
EDC('getcache', "new", $filemtime, $p['url'], $path);
$url = new it_url;
$data = $url->_get($p + ['checkonly' => true, 'filemtime' => EDC('nocache') ? null : $filemtime]);
if ($p['assoc'] ? ($data['status'] < 500 || $data['data']) : $data)
{
$success = true;
$isnewfile = it_url::_atomicwrite($path, $p['assoc'] ? ($data['status'] === 304 ? true : it::json_encode($data)) : $data); # $data === true means not modified (no new data fetched) and instructs _atomicwrite to just touch the file
if ($p['returnheaders'])
it::file_put("$path.json", it::json_encode($url->headers));
}
else if ($p['keepfailed'])
$success = $fileexists;
else
@unlink($path); # Expired and failed to get
it_url::_unlock($path, $lock);
}
else
{
# Wait for file currently being transferred
EDC('getcache', "wait", $p['url'], $path);
$success = it_url::_waitforlockedfile($path, $p);
# If file could no be fetched by other thread but exists and we are in keepfailed mode then return old file
if (!$success && $p['keepfailed'])
$success = $fileexists;
}
}
else
{
# Get file from cache
EDC('getcache', "cached", $p['url'], $path);
$success = true; # Up to date
}
# Read headers before $path is modified for preprocessing
if ($p['returnheaders'])
$headers = it::json_decode(it::file_get("$path.json"), ['assoc' => true]);
if ($success && $p['preprocess'])
{
$srcpath = $path;
$path .= substr(md5(serialize($p['preprocess'])), 0, 2);
if ($filemtime = $isnewfile ? true : it_url::_expired($path, $p['maxage'])) # Outdated(non-zero int) or non-existant(true)?
{
if ($lock = !$p['lock'] ?: it_url::_lock($path))
{
# Touch existing file to prevent locking other getters while refreshing
if ($filemtime !== true)
touch($path);
EDC('getcache', "processnew", $p['url'], $path);
$dstpath = "$path.preprocesstmp";
if (is_array($p['preprocess']) && $p['preprocess']['function']) # Needs is_array as it can be a string where dereferencing gives first character!
$p['preprocess']['function'](['in' => $srcpath, 'out' => $dstpath] + $p['preprocess']);
else
$p['preprocess']($srcpath, $dstpath);
if (!($success = @filesize($dstpath) && @rename($dstpath, $path)))
{
@unlink($dstpath);
if (!$p['keepfailed'])
@unlink($path);
$success = file_exists($path);
}
it_url::_unlock($path, $lock);
}
else
{
# Wait for file currently being processed
EDC('getcache', "processwait", $p['url'], $path);
$success = it_url::_waitforlockedfile($path, $p);
}
}
}
# cache cleanup, preferably at night
$isnight = date('H') >= 1 && date('H')*3600 + date('i')*60 < $p['cleanbefore'];
if (time() - @filemtime($p['cachedir'] . "/cleaned") > ($isnight ? 80000 : 2*80000))
{
it::file_put($p['cachedir'] . "/cleaned", ""); # touch could have permission problems
$maxagemin = intval($p['maxage']/60);
exec("nohup bash -c 'cd {$p['cachedir']} && for i in [0-9a-f][0-9a-f]; do sleep 20; ionice -c 3 find \$i -mmin +$maxagemin -type f -delete; done' </dev/null >/dev/null 2>&1 &");
}
if (EDC('getcachelog'))
it::log('debug', 'getcachelog', $p['id'], $p['url'], !$isnewfile ? "" : "fetched=" . mb_substr(is_string($data) ? $data : "(assoc)", 0, 400));
### EDC('getcache', $success, $path); # too verbose
return $success ? ($p['returnheaders'] || $p['returncachemiss'] ? [$path, $headers, (bool)$isnewfile] : $path) : false;
}