.
**
** url.class - URL parsing, retrieval and caching functions
*/
class it_url
{
/* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */
var $url; /* E.g. http://www.relog.ch/ */
var $protocol; /* E.g. http */
var $hostname; /* E.g. relog.ch */
var $realhostname; /* E.g. www.relog.ch */
var $port; /* E.g. 80 */
var $explicitport; /* E.g. 80, explicitly set in rawurl */
var $path; /* E.g. / */
var $rawurl; /* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */
var $user; /* E.g. falcon */
var $pass; /* E.g. joshua */
var $page; /* Page or empty */
var $page_read; /* true if page read */
var $title; /* Page title or empty */
var $description; /* Page description or empty */
var $cookies; /* key => values of cookies from server */
var $headers; /* Headers of page fetched by get() */
var $data; /* Data part, even if return code is not 200 */
var $result; /* Return code of get() */
var $redir = 0; /* Redirect count */
/**
* Constructor: canonicalize an URL
* @param $url URL this object represents
*/
function it_url($url = null, $options = array())
{
$this->rawurl = $url;
if (preg_match('#^([a-z]+):/+(?:([^:]*):([^@]*)@)?(.*)$#is', $url, $regs))
{
$this->protocol = strtolower($regs[1]);
$this->user = $regs[2];
$this->pass = $regs[3];
$url = $regs[4];
}
else if (preg_match('/^[a-z]:/', $url) || preg_match('#^/#', $url))
{
$this->protocol = 'file';
}
else
$this->protocol = 'http';
/* Default port */
if ($this->protocol == 'http')
$protoport = 80;
else if ($this->protocol == 'https')
$protoport = 443;
$this->port = intval($protoport);
if (class_exists('Net_IDNA', false))
$idn = Net_IDNA::getInstance();
if ($idn)
$pattern = '^([^/]+)/*(.*)$';
else
$pattern = '^([a-z0-9_:\.-]+)/*(.*)$';
$this->explicitport = '';
if (preg_match("#$pattern#is", $url, $regs))
{
list($hostname, $port) = explode(':', $regs[1]);
$this->realhostname = strtolower($hostname);
if ($port) {
$this->port = intval($port);
$this->explicitport = ":" . $port;
}
$url = $regs[2];
}
$this->hostname = preg_replace('/^www\./', '', $this->realhostname);
$this->path = preg_replace('#^/$#', '', $url);
if ($this->port != $protoport)
$this->url = "$this->protocol://$this->realhostname:$this->port/$this->path";
else
$this->url = "$this->protocol://$this->realhostname/$this->path";
if ($idn)
{
$realhostname = $this->realhostname;
if (!preg_match('/^utf-?8$/i', $options['encoding']))
$realhostname = utf8_encode($realhostname);
$encoded = $idn->encode($realhostname);
if ($encoded != $realhostname)
$this->realhostname = $encoded;
}
}
/**
* Read the page into memory, extract title and description and
* set $this->page, $this->title and $this->description
* @param $timeout Timeout for operation, defaults to unlimited (0)
* @return True if page has been read and $this->page is set
*/
function read_page($timeout = 0)
{
unset($this->page);
unset($this->title);
unset($this->description);
/*
** If the URL does not contain a dot followed by at least one character,
** it is considered bogus. This prevents 'localhost', 'www', and numerical IP addresses.
*/
if (!preg_match('/\.[a-z]+$/i', $this->realhostname))
return 0;
$url = $this->rawurl;
while ($this->page == '')
{
$cmd = 'LANG=C wget 2>&1 -T ' . ((int)$timeout) . ' -q -U "Mozilla/4.0 (Compatible; Relog ITools)" -O - ' . preg_replace("/[ \t]/", '\\ ', escapeshellcmd("$url"));
$this->page = `$cmd`;
if ($this->page == '') /* An error occurred. Find out what it was. */
{
$cmd = 'LANG=C wget 2>&1 -T' . ((int)$timeout) . ' -v -U "Mozilla/4.0 (Compatible; Relog ITools)" -O - ' . preg_replace("/[ \t]/", '\\ ', escapeshellcmd($url));
$error = `$cmd`;
if (preg_match('/Location: ([^ ]*)/i', $error, $regs)) /* Redirect ? */
{
$url = $regs[1];
if (!preg_match('/^[a-z]+:/i', $url)) /* Kludge for Miss Kournikova's admirers: grok local redirects (in violation of RFC) */
$url = $this->rawurl.'/'.$url;
}
else
break;
}
if (++$count > 4) /* Avoid infinite redirect loops */
break;
}
$this->page_read = 1;
if (preg_match('#
([^<]*)#i', $this->page, $regs))
$this->title = it_htmlentities_decode($regs[1]);
if (preg_match('/]+content="([^"]*)">/i', $this->page, $regs))
$this->description = it_htmlentities_decode($regs[1]);
return ($this->page != '');
}
/* Return the description of this page */
function get_description()
{
if (!$this->page_read)
$this->read_page();
return $this->description;
}
/* Return the title of this page */
function get_title()
{
if (!$this->page_read)
$this->read_page();
return $this->title;
}
/**
* Check if a given url (currently http:port80-only) can be fetched
* Note: Redirects are treated as succesful
* $timeout Timeout for connection in seconds
* @return true if url could be fetched
*/
function is_reachable($timeout = 5)
{
$result = false;
if ($fp = @fsockopen($this->realhostname, $this->port, $dummy_errno, $dummy_errstr, $timeout))
{
fputs($fp, "GET /$this->path HTTP/1.0\r\nHost: $this->realhostname\r\nUser-Agent: ITools\r\n\r\n");
$line = fgets($fp, 128);
fclose($fp);
#debug("it_url::is_reachable($this->rawurl: $line");
$result = preg_match("#^$this->protocol/[^ ]+ +[23]#i", $line);
}
return $result;
}
/**
* Get simple URL with timeout and one retry. Can be called statically
*
* If the protocol is not http, only features of get_multi are supported.
*
* @param $p parameter array with the following keys
* @param $p['url'] url to get, defaults to constructor URL
* @param $p['headers'] optional array of HTTP headers to send
* @param $p['safety'] set to 1 to generate an it::error in case of timeout
* @param $p['it_error'] extra arguments given to it_error if safety is on an an error occurs
* @param $p['timeout'] timeout per read in seconds, defaults to 5. fractions allowed. silent, see $p['safety']
* @param $p['totaltimeout'] timeout for the whole function call
* @param $p['maxlength'] maximum length of response
* @param $p['filemtime'] Add HTTP header to only fetch when newer than this, otherwise return true instead of data
* @param $p['data']: POST data array with key-value pairs
* @param $p['retries']: Number of retries if download fails, default 1
* @return contents of resulting page, considering redirects, excluding headers, or false on error
*/
function get($p=null, $timeout=5)
{
if (!is_array($p))
$p = array('url' => $p, 'timeout' => $timeout);
$p += array('retries' => 1);
if (($filter = EDC('req')) && strstr($p['url'], it::replace(array('1' => ":"), $filter)))
ED($p['url']);
if ($this instanceof it_url)
{
$url = $this;
if ($p['url'])
$this->it_url($p['url']);
}
else # called statically
$url = new it_url($p['url']);
if ($url->protocol == 'http')
$result = $url->request($p);
else
$result = $url->request_curl($p);
if ($url->headers['Location'] && preg_match('#^(https?://[^/]*)?(/)?(.*)$#i', $url->headers['Location'], $parts) && ($parts[1] != $url->url)) # Handle redirects (supports relative and global)
{
unset($p['url'], $p['headers']['Host']);
$url->it_url($parts[1] ? $parts[1].$parts[2].$parts[3] : $url->protocol.'://'.$url->realhostname.($parts[2] ? $parts[2].$parts[3] : '/'.dirname($url->path).'/'.$parts[3]));
if (++$url->redir <= 4) /* Avoid infinite redirects */
return $url->get($p);
}
if (!$result && $p['retries'] > 0 && $url->result < 400)
$result = $url->get(array('retries' => $p['retries'] - 1) + $p);
if (($filter = EDC('res')) && strstr($p['url'], it::replace(array('1' => ":"), $filter)))
ED($result);
return $result;
}
function parse_http_header($header)
{
foreach (explode("\n", trim($header)) as $line) {
$line = trim($line);
if (preg_match('#^(HTTP\S+)\s(\d+)#', $line, $parts)) # Parse result code
$this->headers[$parts[1]] = $this->result = $parts[2];
elseif (preg_match('#^([^:]+): (.*)$#', $line, $parts))
$this->headers[$parts[1]] = $parts[2];
if ($parts[1] == 'Set-Cookie' && preg_match('/^([^=]+)=([^;]*)/', $parts[2], $cookie))
$this->cookies[$cookie[1]] = $cookie[2];
}
}
function request($p=array())
{
$p += array('totaltimeout' => "999999", 'timeout' => 5);
$url = $this;
if ($p['url'])
$this->it_url($p['url']);
$url->result = $result = false;
unset($url->data);
$url->headers = $url->cookies = array();
$p['timeout'] = min($p['timeout'], $p['totaltimeout']); # No operation may be longer than totaltimeout
$endtime = time() + $p['totaltimeout'];
if ($fp = @fsockopen($url->realhostname, $url->port, $dummy_errno, $errstr, $p['timeout']))
{
# urlencode data pairs if is array
if (is_array($p['data']))
$data = it_url::params($p['data']);
else
$data = $p['data'];
$p['headers'] = (array)$p['headers'] + array(
'Host' => $url->realhostname . $url->explicitport,
'User-Agent' => "Mozilla/5.0 (compatible; MSIE 9.0; ITools)",
'Accept-Language' => $p['headers']['Accept-Language'] ?: T_lang(), # can prevent loading of it_text
'Referer' => it::match('([-\w]+\.\w+)$', $url->hostname) == it::match('([-\w]+\.\w+)$', $_SERVER['HTTP_HOST']) ? it_url::absolute(U($_GET)) : null,
);
if (is_int($p['filemtime']))
$p['headers']['If-Modified-Since'] = date("r", $p['filemtime']);
if ($datalen = strlen($data))
{
$method = $p['method'] ?: "POST";
$p['headers'] += array(
'Content-Type' => "application/x-www-form-urlencoded",
'Content-Length' => $datalen,
);
}
else
$method = $p['method'] ?: "GET";
if ($url->user || $url->pass)
$p['headers'] += array('Authorization' => 'Basic ' . base64_encode($url->user . ':' . $url->pass));
foreach ($p['headers'] as $header => $value) {
if ($value !== null)
$headers .= "$header: $value\r\n";
}
stream_set_timeout($fp, intval($p['timeout']), intval(($p['timeout']*1000000)%1000000));
@fputs($fp, "$method /$url->path HTTP/1.0\r\n$headers\r\n$data");
$url->header = '';
while (!feof($fp) && ($origline = @fgets($fp, 10240)) && (trim($origline)) && (time() < $endtime))
{
$url->header .= $origline;
$origline = '';
}
$url->header .= $origline;
$this->parse_http_header($url->header);
if ($url->result)
{
if ($url->headers['Transfer-Encoding'] == "chunked") # Bogus HTTP/1.1 chunked answer from server (e.g. Wordpress/Apache2/PHP5)
{
while (($len = hexdec(fgets($fp))) && (!$p['maxlength'] || strlen($url->data) + $len <= $p['maxlength']))
{
$chunk = "";
while (!feof($fp) && (strlen($chunk) < $len) && (time() < $endtime))
$chunk .= @fread($fp, $len - strlen($chunk));
$url->data .= $chunk;
}
}
else
{
while (!feof($fp) && (time() < $endtime) && (!$p['maxlength'] || strlen($url->data) <= $p['maxlength']))
$url->data .= @fread($fp, 20480);
}
if ($p['filemtime'] && ($url->result == 304))
$result = true; # Not modified, success but no data
else if ($url->result < 400)
$result =& $url->data;
}
@fclose($fp);
}
if (time() >= $endtime)
$result = false;
if ($result === false && $p['safety'] == 1)
it::error(array('title' => "problem (timeout?) getting $url->url " . $errstr) + (array)$p['it_error']);
if ($p['maxlength'] && (strlen($this->data) + $len > $p['maxlength']))
$result = false;
return $result;
}
static function curl_opts($p=array())
{
$p += array('totaltimeout' => "999999", 'timeout' => 5);
foreach ($p['headers'] as $header => $value)
$headers[] = "$header: $value";
if ($p['maxlength']) {
$maxlength = $p['maxlength'];
$add = array(
#CURLOPT_BUFFERSIZE => 1024 * 1024 * 10,
CURLOPT_NOPROGRESS => false,
CURLOPT_PROGRESSFUNCTION => function ($dummy0, $dummy1, $size, $dummy2, $dummy3) use ($maxlength) { return $size < $maxlength ? 0 : 1; },
);
}
return (array)$add + array(
CURLOPT_HEADER => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => $p['totaltimeout'],
CURLOPT_LOW_SPEED_LIMIT => 5,
CURLOPT_LOW_SPEED_TIME => $p['timeout'],
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_HTTPHEADER => $headers,
CURLOPT_SSL_VERIFYPEER => true,
CURLOPT_SSL_VERIFYHOST => 2,
CURLOPT_CAPATH => '/etc/ssl/certs/',
CURLOPT_SSL_VERIFYPEER => 1,
CURLOPT_SSL_VERIFYHOST => 2,
CURLINFO_HEADER_OUT => 1,
);
}
/*
* drop in replacement for request using curl
*
* todo:
* @param $p['filemtime'] Add HTTP header to only fetch when newer than this, otherwise return true instead of data
* @param $p['data'] POST data array with key-value pairs
* @param $p['method'] different HTTP method
*/
function request_curl($p=array())
{
$url = $this;
if ($p['url'])
$this->it_url($p['url']);
$url->headers = array();
$p['headers'] = (array)$p['headers'] + array(
'Host' => $url->realhostname . $url->explicitport,
'User-Agent' => "Mozilla/4.0 (compatible; MSIE 7.0; ITools)",
'Accept-Language' => T_lang(),
);
$opts = array(CURLOPT_FOLLOWLOCATION => false, CURLOPT_HEADER => 1) + self::curl_opts($p);
$curl = curl_init($url->rawurl);
curl_setopt_array($curl, $opts);
$got = curl_exec($curl);
EDC('curlinfo', curl_getinfo($curl));
if ($got === false && $p['safety'] == 1)
it::error(array('title' => "problem getting $url->url with curl: " . curl_error($curl)) + (array)$p['it_error']);
if ($got) {
list($url->header, $url->data) = explode("\r\n\r\n", $got, 2);
$url->parse_http_header($url->header);
if ($p['maxlength'] && (strlen($this->data) > $p['maxlength']))
$result = false;
else
$result =& $url->data;
} else
$result = false;
return $result;
}
/**
* Get multiple URL in parallel with timeout. Needs to be called statically
* @param $p parameter array with the following keys (same as it_url::get)
* @param $p['urls']: array of urls to get
* @param $p['timeout']: timeout per read in seconds, defaults to 5. (TODO: fractions allowed?)
* @param $p['totaltimeout']: timeout for the whole function call
* @param $p['headers']: optional array of HTTP headers to send
* @return array of contents (or false for errors like timesou) of resulting page using same
* keys as the urls input array, considering redirects, excluding headers
*/
function get_multi($p=null)
{
$p += array('retries' => 1);
$p['headers'] = (array)$p['headers'] + array(
'User-Agent' => "Mozilla/4.0 (compatible; MSIE 7.0; ITools)",
'Accept-Language' => T_lang(),
);
$opts = array(CURLOPT_FOLLOWLOCATION => true) + self::curl_opts($p);
$mh = curl_multi_init();
$urls = array();
foreach ($p['urls'] as $key => $url)
$urls[$key] = is_array($url) ? $url : array('url' => $url);
foreach ($urls as $key => $url)
{
$handle = curl_init();
curl_setopt($handle, CURLOPT_URL, $url['url']);
curl_setopt_array($handle, $opts);
curl_multi_add_handle($mh, $handle);
$keys[$handle] = $key;
$handles[$key] = $handle;
}
$start = microtime(true);
# curl_multi loop copied from example at http://php.net/manual/en/function.curl-multi-exec.php
$active = null;
do {
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
$timeout = 0.001; # Very short timeout to work around problem with first select call on cURL 7.25.0
while (!$abort && $active && $mrc == CURLM_OK)
{
if (curl_multi_select($mh, $timeout) == -1)
usleep($timeout * 1000000);
do {
$mrc = curl_multi_exec($mh, $active);
while (($info = curl_multi_info_read($mh)) !== false)
{
if ($info['msg'] == CURLMSG_DONE)
{
$key = $keys[$info['handle']];
EDC('reqtimings', $key, $info['result'], (microtime(true) - $start) * 1000);
if ($info['result'] == CURLE_OK)
$results_unordered[$key] = curl_multi_getcontent($info['handle']);
else
$results_unordered[$key] = false;
if (($handler = $urls[$keys[$info['handle']]]['handler']))
$abort = $handler($info['result'], $results_unordered[$key]);
}
}
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
$timeout = 0.1; # Longer delay to avoid busy loop but shorter than default of 1s in case we stil hit cURL 7.25.0 problem
}
$results = array();
foreach ($handles as $key => $handle) {
curl_multi_remove_handle($mh, $handle);
curl_close($handle);
$results[$key] = $results_unordered[$key];
}
curl_multi_close($mh);
return $results;
}
/**
* Construct a local directory name to cache an URL. Named args:
* @param $p['cachedir'] directory to store cache files in, defaults to $ULTRAHOME/var/urlcache
* @param $p['id'] If you need more than one type of cache (e.g. different maxage) you can specify an id
*/
function get_cache_dir($p)
{
$p += array('cachedir' => $GLOBALS['ULTRAHOME'] . "/var/urlcache", 'id' => "default");
return rtrim($p['cachedir'] . "/" . $p['id'], "/");
}
/**
* Construct a local file name to cache an URL. Named args:
* @param $p['url'] remote url to get
* @param $p['cachedir'] directory to store cache files in, @see get_cache_dir
* @param $p['id'] If you need more than one type of cache (e.g. different maxage) you can specify an id
*/
function get_cache_filename($p)
{
if (!is_array($p))
$p = array('url'=>$p);
$p['cachedir'] = it_url::get_cache_dir($p);
$filename = md5(T_lang() . $p['url'] . ($p['headers'] ? serialize($p['headers']) : ""));
return $p['cachedir'] . "/" . substr($filename, 0, 2) . "/$filename";
}
/**
* Store contents of url in a file and return file name. Threadsafe: Provides locking. Called statically.
* Requires webserver writeable directory in $p['cachdedir']. Params in associative array p:
* @param $p['url'] url to get
* @param $p['headers'] optional array of HTTP headers to send
* @param $p['cachedir'] directory to store cache files in, @see get_cache_dir
* @param $p['id'] If you need more than one type of cache (e.g. different maxage) you can specify an id
* @param $p['timeout'] timeout in seconds, default 10. fractions allowed
* @param $p['maxage'] maximum age of cache entries in seconds, default 86400
* @param $p['cleanbefore'] maximum daytime when attempting cleanup, default 7200
* @param $p['preprocess'] callback function (or array for methods) to change received file or array('function' => ..., 'in' => $src, 'out' => $dst, ...) with callback function plus args
* @param $p['safety'] value 0 means dont generate alert, value 1 means generate alerts on timeouts and failures
* @param $p['keepfailed'] keep old versions of files if download fails (sending alerts conservatively)
* @param $p['returnheaders'] Return array($path, $headers) instead of simply $path
* @param $p['it_error'] parameters for it::error()
*/
function get_cache($p = array())
{
$p += array('timeout' => 10, 'maxage' => 86400, 'cleanbefore' => 7200, 'safety' => 1, 'it_error' => array());
$p['totaltimeout'] = $p['timeout'];
$path = it_url::get_cache_filename($p); # Must be before changing cachedir below
$p['cachedir'] = it_url::get_cache_dir($p);
@mkdir($p['cachedir']);
@mkdir(dirname($path));
if (!is_writable(dirname($path)))
it::error(dirname($path) . " not writable");
if ($filemtime = it_url::_expired($path, $p['maxage'])) # Outdated(non-zero int) or non-existant(true)?
{
$fileexists = $filemtime !== true;
if ($lock = it_url::_lock($path))
{
# Touch existing file to prevent locking other getters while refreshing
if ($fileexists)
touch($path);
EDC('getcache', "new", $filemtime, $p['url'], $path);
$url = new it_url;
if ($result = $url->get(array('safety' => 0) + $p + array('filemtime' => EDC('nocache') ? null : $filemtime))) # => true means not modified (no new data fetched)
{
$newfile = it_url::_atomicwrite($path, $result);
if ($p['returnheaders'])
file_put_contents("$path.headers", 'headers, true) . ";\n");
}
else if ($p['keepfailed'])
$result = $fileexists;
else
@unlink($path); # Expired and failed to get
it_url::_unlock($path, $lock);
}
else
{
# Wait for file currently being transferred
EDC('getcache', "wait", $p['url'], $path);
$result = it_url::_waitforlockedfile($path, $p);
# If file could no be fetched by other thread but exists and we are in keepfailed mode then return old file
if (!$result && $p['keepfailed'])
$result = $fileexists;
}
}
else
{
# Get file from cache
EDC('getcache', "cached", $p['url'], $path);
$result = true; # Up to date
}
# Read headers before $path is modified for preprocessing
if ($p['returnheaders'])
$headers = @include("$path.headers");
if ($result && $p['preprocess'])
{
$srcpath = $path;
$path .= substr(md5(serialize($p['preprocess'])), 0, 2);
if ($filemtime = $newfile ? true : it_url::_expired($path, $p['maxage'])) # Outdated(non-zero int) or non-existant(true)?
{
if ($lock = it_url::_lock($path))
{
# Touch existing file to prevent locking other getters while refreshing
if ($filemtime !== true)
touch($path);
EDC('getcache', "processnew", $p['url'], $path);
$dstpath = "$path.preprocesstmp";
if (is_array($p['preprocess']) && $p['preprocess']['function']) # Needs is_array as it can be a string where dereferencing gives first character!
call_user_func($p['preprocess']['function'], array('in' => $srcpath, 'out' => $dstpath) + $p['preprocess']);
else
call_user_func($p['preprocess'], $srcpath, $dstpath);
if (!($result = @filesize($dstpath) && @rename($dstpath, $path)))
{
@unlink($dstpath);
@unlink($path);
}
it_url::_unlock($path, $lock);
}
else
{
# Wait for file currently being processed
EDC('getcache', "processwait", $p['url'], $path);
$result = it_url::_waitforlockedfile($path, $p);
}
}
}
# cache cleanup at night
if ((date('H')*3600 + date('i')*60 < $p['cleanbefore']) && (time()-@filemtime($p['cachedir'] . "/cleaned") > 80000))
{
touch($p['cachedir'] . "/cleaned");
$maxagemin = intval($p['maxage']/60);
exec("nohup bash -c 'cd {$p['cachedir']} && sleep 10 && find ?? -mmin +$maxagemin -print0 | xargs -0 -r rm' /dev/null 2>&1 &");
}
EDC('getcache', $result, $path);
return $result ? ($p['returnheaders'] ? array($path, $headers) : $path) : false;
}
/**
* Fetch a file, cache it and return contents
* @param see it_url::get_cache
*/
function get_cache_contents($p)
{
return ($fn = self::get_cache($p)) ? file_get_contents($fn) : ($p['safety'] === 0 ? null : it::error(array('title' => "failed getting " . $p['url'], 'body' => var_export($p, true))));
}
/**
* Check whether file at given path is older than maxage
* @param $path File to check
* @param $maxage Maximum age of file in seconds
* @return Not expired: false | Non-existant file: true | Timestamp of expired file
*/
function _expired($path, $maxage)
{
if ($result = EDC('nocache') ? false : @filemtime($path))
{
if (time() - $result > $maxage)
EDC('getcache', "expired", $path);
else
$result = false;
}
else # File does not exists yet
$result = true;
return $result;
}
/**
* Acquire lock for a given file
* @param $path File to lock
* @return Lock handle if successfully locked file
*/
function _lock($path)
{
$force = EDC('nocache') || (($mtime = @filemtime("$path.lock")) && (time() - $mtime > 30)); # expire forgotten locks
return @fopen("$path.lock", $force ? "w" : "x");
}
/**
* Release lock on a file
* @param $path File to unlock
* @param $lock Handle to lock acquird by _lock
*/
function _unlock($path, $lock)
{
fclose($lock);
@unlink("$path.lock");
}
/**
* Wait for file which is currently locked
* @param $path File to wait for
* @param $p Wait parameters, see @get_cache
* @return Whether lock was released within timeout and file is still there
*/
function _waitforlockedfile($path, $p)
{
$sleeptime = 0.1; # seconds to wait per pass
# wait until cache is ready, then read from cache
for ($maxpasses = $p['timeout'] / $sleeptime, $passes = 0; ($lockedbyother = file_exists("$path.lock")) && ($passes < $maxpasses); ++$passes)
{
usleep($sleeptime * 1000000);
clearstatcache();
}
if ($lockedbyother && $p['safety'] == 1)
it::error(($passes < $maxpasses ? "error getting url" : "timeout") . " in it_url::get_cache(): url={$p['url']}, passes=$passes, maxpasses=$maxpasses, path={$p['path']}");
return !$lockedbyother && file_exists($path);
}
/**
* Write data to tmp file and atomically rename it to destination
* @param $path Destination file to write data to
* @param $data Data to write | true to just touch file
* @return True if data was written to file
*/
function _atomicwrite($path, $data)
{
$result = false;
if ($data === true) # Not modified, no new data, just update timestamp
touch($path);
else if ($data !== false)
{
$tmpname = tempnam(dirname($path), "writetmp");
fputs($cachetmp = fopen($tmpname, "w"), $data);
fclose($cachetmp);
chmod($tmpname, 0664);
$result = rename($tmpname, $path);
}
else
@unlink($path);
return $result;
}
/**
* Make an URL absolute by using host an protocol from current Apache request (but not port number)
* @param $url Optional URL ( foo.html, /foo.html, //host/bar.html, http://host/bar.html ), default self
* @return absolute version of URL ( http[s]://host/bar.html )
*/
static function absolute($url=null)
{
if (!isset($url))
$url = $_SERVER['PHP_SELF'];
if (!preg_match('/^\w+:/', $url))
{
if (!preg_match('#^//#', $url))
{
$dir = preg_replace('#/[^/]*$#', '/', $_SERVER['PHP_SELF']);
$url = preg_match('#^/#', $url) ? $url : "$dir$url";
$url = "//" . $_SERVER['HTTP_HOST'] . $url;
}
$url = "http" . (isset($_SERVER['HTTPS']) ? 's':'') . ":$url";
}
return $url;
}
/**
* Craft a valid redirect URL, send Location: header and terminate execution
* @param $url Optional URL ( foo.html, /foo.html, //host/bar.html, http://host/bar.html ), default self
* @param $type Type of redirect, "temporary" or "permanent", default temporary
* @return This method never returns.
*/
function redirect($url = null, $type = "temporary")
{
$codes = array('permanent' => 301, 'temporary' => 303); # NOTE: HTTP 303 is called "See Other", rather than Temporary (which would be HTTP 307), but is the behaviour one usually wants for temporary redirects
if (!($code = $codes[$type]))
it::fatal("Invalid redirect type '$type', must be 'permanent' or 'temporary'");
$url = preg_replace("/[\r\n].*/", '', it_url::absolute($url)); # Security: cut after CR/LF
if (EDC('noredir')) {
if (!function_exists('a'))
new it_html();
echo a(array('href' => $url), Q($url)) . Q(" (HTTP/1.1 $code, $type redirect)") . br() . Q("Trace: " . it_debug::backtrace());
}
else
header('Location: ' . it_untaint($url, TC_SELF), true, $code);
exit;
}
/**
* Urlencode but leave some chars
*/
static function encode($str)
{
return strtr(urlencode($str), array("%2C"=>",", "%28"=>"(", "%29"=>")"));
}
/**
* Create GET request from params, optionally only using given fields
* @param $params Array to take values from, usually $_GET
* @param $keys Keys to use; default: all
*/
static function params($params, $keys = null)
{
return join("&", it_url::_params($params, $keys));
}
static function _params($params, $keys = null)
{
$result = array();
if (!isset($keys))
$keys = array_keys($params);
foreach ($keys as $key)
{
if (is_array($params[$key]))
{
foreach (it_url::_params($params[$key]) as $value)
{
if (strlen($value))
$result[] = it::replace(array('^([^=\[]*)' => $key . '[$1]'), $value);
}
}
else if (strlen($params[$key]))
$result[] = urlencode($key) . "=" . it_url::encode($params[$key]);
}
return $result;
}
/**
* Convert url into array with base url in $result[0] and GET params
*/
static function parse($url)
{
list($path, $query) = explode("?", $url, 2);
parse_str((string)$query, $params);
return (array)$path + (array)$params;
}
}
?>