Branch itools/devel-utf8 created

author: Nathan Gass 2012-03-22 18:18:42 +0000
committer: Nathan Gass 2012-03-22 18:18:42 +0000
commit: d59a4921188753dbe4c0161081755a28112c3ef6 (patch)
tree: 81496414d988f37f1db9d92c9750d888ffa13746 /devel-utf8/it_url.class
parent: ca11771e8fad5fef96615df4c44e04b8fb60ac31 (diff)
download: itools-d59a4921188753dbe4c0161081755a28112c3ef6.tar.gz
itools-d59a4921188753dbe4c0161081755a28112c3ef6.tar.bz2
itools-d59a4921188753dbe4c0161081755a28112c3ef6.zip
1 files changed, 765 insertions, 0 deletions
diff --git a/devel-utf8/it_url.class b/devel-utf8/it_url.class
new file mode 100644
index 0000000..305bde9
--- /dev/null
+++ b/devel-utf8/it_url.class
@@ -0,0 +1,765 @@
+<?php
+/*
+**	$Id$
+**
+**	Copyright (C) 1995-2007 by the ITools Authors.
+**	This file is part of ITools - the Internet Tools Library
+**
+**	ITools is free software; you can redistribute it and/or modify
+**	it under the terms of the GNU General Public License as published by
+**	the Free Software Foundation; either version 3 of the License, or
+**	(at your option) any later version.
+**
+**	ITools is distributed in the hope that it will be useful,
+**	but WITHOUT ANY WARRANTY; without even the implied warranty of
+**	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+**	GNU General Public License for more details.
+**
+**	You should have received a copy of the GNU General Public License
+**	along with this program.  If not, see <http://www.gnu.org/licenses/>.
+**
+**	url.class - URL parsing, retrieval and caching functions
+*/
+
+class it_url
+{
+	/* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */
+	var $url;		/* E.g. http://www.relog.ch/ */
+	var $protocol;		/* E.g. http */
+	var $hostname;		/* E.g. relog.ch */
+	var $realhostname;	/* E.g. www.relog.ch */
+	var $port;		/* E.g. 80 */
+	var $path;		/* E.g. / */
+	var $rawurl;		/* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */
+	var $user;		/* E.g. falcon */
+	var $pass;		/* E.g. joshua */
+
+	var $page;		/* Page or empty */
+	var $page_read;		/* true if page read */
+	var $title;		/* Page title or empty */
+	var $description;	/* Page description or empty */
+
+	var $headers;		/* Headers of page fetched by get() */
+	var $data;		/* Data part, even if return code is not 200 */
+	var $result;		/* Return code of get() */
+	var $redir = 0;		/* Redirect count */
+
+
+/**
+ * Constructor: canonicalize an URL
+ * @param $url URL this object represents
+ */
+function it_url($url = null, $options = array())
+{
+	$this->rawurl = $url;
+
+	if (preg_match('#^([a-z]+):/+(?:([^:]*):([^@]*)@)?(.*)$#i', $url, $regs))
+	{
+		$this->protocol = strtolower($regs[1]);
+		$this->user = $regs[2];
+		$this->pass = $regs[3];
+		$url = $regs[4];
+	}
+	else if (preg_match('/^[a-z]:/', $url) || preg_match('#^/#', $url))
+	{
+		$this->protocol = 'file';
+	}
+	else
+		$this->protocol = 'http';
+
+	/* Default port */
+	if ($this->protocol == 'http')
+		$protoport = 80;
+	else if ($this->protocol == 'https')
+		$protoport = 443;
+
+	$this->port = intval($protoport);
+
+	if (class_exists('Net_IDNA', false))
+		$idn = Net_IDNA::getInstance();
+
+	if ($idn)
+		$pattern = '^([^/]+)/*(.*)$';
+	else
+		$pattern = '^([a-z0-9_:\.-]+)/*(.*)$';
+
+	if (preg_match("#$pattern#i", $url, $regs))
+	{
+		list($hostname, $port) = explode(':', $regs[1]);
+
+		$this->realhostname = strtolower($hostname);
+
+		if ($port)
+			$this->port = intval($port);
+
+		$url = $regs[2];
+	}
+
+	$this->hostname = preg_replace('/^www\./', '', $this->realhostname);
+
+	# Get rid of common index file names
+	$url = preg_replace('#(^|/)(index\.[ps]?html?|index\.php[34]?|default\.aspx?)$#', '', $url);
+
+	$this->path = preg_replace('#^/$#', '', $url);
+
+	if ($this->port != $protoport)
+		$this->url = "$this->protocol://$this->realhostname:$this->port/$this->path";
+	else
+		$this->url = "$this->protocol://$this->realhostname/$this->path";
+
+	if ($idn)
+	{
+		$realhostname = $this->realhostname;
+
+		if (!preg_match('/^utf-?8$/i', $options['encoding']))
+			$realhostname = utf8_encode($realhostname);
+
+		$encoded = $idn->encode($realhostname);
+
+		if ($encoded != $realhostname)
+			$this->realhostname = $encoded;
+	}
+}
+
+
+/**
+ * Read the page into memory, extract title and description and
+ * set $this->page, $this->title and $this->description
+ * @param $timeout Timeout for operation, defaults to unlimited (0)
+ * @return True if page has been read and $this->page is set
+ */
+function read_page($timeout = 0)
+{
+	unset($this->page);
+	unset($this->title);
+	unset($this->description);
+
+	/*
+	** If the URL does not contain a dot followed by at least one character,
+	** it is considered bogus. This prevents 'localhost', 'www', and numerical IP addresses.
+	*/
+	if (!preg_match('/\.[a-z]+$/i', $this->realhostname))
+		return 0;
+
+	$url = $this->rawurl;
+	while ($this->page == '')
+	{
+		$cmd = 'LANG=C wget 2>&1 -T ' . ((int)$timeout) . ' -q -U "Mozilla/4.0 (Compatible; Relog ITools)" -O - ' . preg_replace("/[ \t]/", '\\ ', escapeshellcmd("$url"));
+		$this->page = `$cmd`;
+
+		if ($this->page == '')	/* An error occurred. Find out what it was. */
+		{
+			$cmd = 'LANG=C wget 2>&1 -T' . ((int)$timeout) . ' -v -U "Mozilla/4.0 (Compatible; Relog ITools)" -O - ' . preg_replace("/[ \t]/", '\\ ', escapeshellcmd($url));
+			$error = `$cmd`;
+			if (preg_match('/Location: ([^ ]*)/i', $error, $regs)) /* Redirect ? */
+			{
+				$url = $regs[1];
+				if (!preg_match('/^[a-z]+:/i', $url))	/* Kludge for Miss Kournikova's admirers: grok local redirects (in violation of RFC) */
+					$url = $this->rawurl.'/'.$url;
+			}
+			else
+				break;
+		}
+
+		if (++$count > 4)	/* Avoid infinite redirect loops */
+			break;
+	}
+
+	$this->page_read = 1;
+
+	if (preg_match('#<title>([^<]*)</title>#i', $this->page, $regs))
+		$this->title = it_htmlentities_decode($regs[1]);
+
+	if (preg_match('/<meta name="description"[^>]+content="([^"]*)">/i', $this->page, $regs))
+		$this->description = it_htmlentities_decode($regs[1]);
+
+	return ($this->page != '');
+}
+
+
+/* Return the description of this page */
+function get_description()
+{
+	if (!$this->page_read)
+		$this->read_page();
+
+	return $this->description;
+}
+
+
+/* Return the title of this page */
+function get_title()
+{
+	if (!$this->page_read)
+		$this->read_page();
+
+	return $this->title;
+}
+
+/**
+ * Check if a given url (currently http:port80-only) can be fetched
+ * Note: Redirects are treated as succesful
+ * $timeout Timeout for connection in seconds
+ * @return true if url could be fetched
+ */
+function is_reachable($timeout = 5)
+{
+	$result = false;
+
+	if ($fp = @fsockopen($this->realhostname, $this->port, $dummy_errno, $dummy_errstr, $timeout))
+	{
+		fputs($fp, "GET /$this->path HTTP/1.0\r\nHost: $this->realhostname\r\nUser-Agent: ITools\r\n\r\n");
+		$line = fgets($fp, 128);
+		fclose($fp);
+
+		#debug("it_url::is_reachable($this->rawurl: $line");
+		$result = preg_match("#^$this->protocol/[^ ]+ +[23]#i", $line);
+	}
+
+	return $result;
+}
+
+/**
+ * Get simple URL with timeout. Can be called statically
+ *
+ * If the protocol is not http, only features of get_multi are supported.
+ *
+ * @param $p parameter array with the following keys
+ * @param $p['url']: url to get, defaults to constructor URL
+ * @param $p['headers']: optional array of HTTP headers to send
+ * @param $p['timeout']: timeout per read in seconds, defaults to 5. fractions allowed
+ * @param $p['totaltimeout']: timeout for the whole function call
+ * @param $p['filemtime']: Add HTTP header to only fetch when newer than this, otherwise return true instead of data
+ * @param $p['data']: POST data array with key-value pairs
+ * @param $p['retries']: Number of retries if download fails, default 1
+ * @return contents of resulting page, considering redirects, excluding headers, or false on error
+ */
+function get($p=null, $timeout=5)
+{
+	if (!is_array($p))
+		$p = array('url' => $p, 'timeout' => $timeout);
+
+	$p += array('totaltimeout' => "999999", 'timeout' => 5, 'retries' => 1);
+
+	if ($this instanceof it_url)
+	{
+		$url = $this;
+		if ($p['url'])
+			$this->it_url($p['url']);
+	}
+	else	# called statically
+		$url = new it_url($p['url']);
+
+	$url->result = $result = false;
+	unset($url->data);
+	$url->headers = array();
+	$p['timeout'] = min($p['timeout'], $p['totaltimeout']);	# No operation may be longer than totaltimeout
+	$endtime = time() + $p['totaltimeout'];
+
+	if ($url->protocol == 'http')
+	{
+		if ($fp = @fsockopen($url->realhostname, $url->port, $dummy_errno, $dummy_errstr, $p['timeout']))
+		{
+			# urlencode data pairs if is array
+			if (is_array($p['data']))
+				$data = it_url::params($p['data']);
+
+			$p['headers'] = (array)$p['headers'] + array(
+				'Host' => $url->realhostname,
+				'User-Agent' => "Mozilla/4.0 (compatible; MSIE 7.0; ITools)",
+				'Accept-Language' => T_lang(),
+			);
+
+			if (is_int($p['filemtime']))
+				$p['headers']['If-Modified-Since'] = date("r", $p['filemtime']);
+
+			if ($datalen = strlen($data))
+			{
+				$method = "POST";
+				$p['headers'] += array(
+					'Content-Type' => "application/x-www-form-urlencoded",
+					'Content-Length' => $datalen,
+				);
+			}
+			else
+				$method = "GET";
+
+			if ($url->user || $url->pass)
+				$p['headers'] += array('Authorization' => 'Basic ' . base64_encode($url->user . ':' . $url->pass));
+
+			foreach ($p['headers'] as $header => $value)
+					$headers .= "$header: $value\r\n";
+
+			stream_set_timeout($fp, intval($p['timeout']), intval(($p['timeout']*1000000)%1000000));
+			@fputs($fp, "$method /$url->path HTTP/1.0\r\n$headers\r\n$data");
+
+			while (!feof($fp) && ($line = @fgets($fp, 10240)) && ($line = trim($line)) && (time() < $endtime))
+			{
+				if (preg_match('#^(HTTP\S+)\s(\d+)#', $line, $parts)) # Parse result code
+					$url->headers[$parts[1]] = $url->result = $parts[2];
+				elseif (preg_match('#^Location: (https?://[^/]*)?(/)?(.*)$#i', $line, $parts) && ($parts[1] != $url->url)) # Handle redirects (supports relative and global)
+				{
+					unset($p['url'], $p['headers']['Host']);
+					$url->it_url($parts[1] ? $parts[1].$parts[2].$parts[3] : $url->protocol.'://'.$url->realhostname.($parts[2] ? $parts[2].$parts[3] : '/'.dirname($url->path).'/'.$parts[3]));
+					if (++$url->redir <= 4)  /* Avoid infinite redirects */
+						return $url->get($p);
+				}
+				elseif (preg_match('#^([^:]+): (.*)$#', $line, $parts))
+					$url->headers[$parts[1]] = $parts[2];
+			}
+
+			if ($url->result)
+			{
+				if ($url->headers['Transfer-Encoding'] == "chunked")	# Bogus HTTP/1.1 chunked answer from server (e.g. Wordpress/Apache2/PHP5)
+				{
+					while ($len = hexdec(fgets($fp)))
+					{
+						$chunk = "";
+
+						while (!feof($fp) && (strlen($chunk) < $len) && (time() < $endtime))
+							$chunk .= @fread($fp, $len - strlen($chunk));
+
+						$url->data .= $chunk;
+					}
+				}
+				else
+				{
+					while (!feof($fp) && (time() < $endtime))
+						$url->data .= @fread($fp, 20480);
+				}
+
+				if ($p['filemtime'] && ($url->result == 304))
+					$result = true;	# Not modified, success but no data
+				else if ($url->result < 400)
+					$result =& $url->data;
+			}
+
+			@fclose($fp);
+		}
+	} else {
+		$results = self::get_multi(array('urls' => array('one' => $p['url'])) + $p);
+		$result = $results['one'];
+	}
+
+	if (time() >= $endtime)
+		$result = false;
+
+	if (!$result && $p['retries'] > 0 && $url->result < 400)
+		$result = $url->get(array('retries' => $p['retries'] - 1) + $p);
+
+	return $result;
+}
+
+
+/**
+ * Get multiple URL in parallel with timeout. Needs to be called statically
+ * @param $p parameter array with the following keys (same as it_url::get)
+ * @param $p['urls']: array of urls to get
+ * @param $p['timeout']: timeout per read in seconds, defaults to 5. (TODO: fractions allowed?)
+ * @param $p['totaltimeout']: timeout for the whole function call
+ * @param $p['headers']: optional array of HTTP headers to send
+ * @return array of contents of resulting page using same keys as the urls input array,
+ *         considering redirects, excluding headers
+ */
+function get_multi($p=null)
+{
+	$p += array('totaltimeout' => "999999", 'timeout' => 5, 'retries' => 1);
+	$p['headers'] = (array)$p['headers'] + array(
+		'User-Agent' => "Mozilla/4.0 (compatible; MSIE 7.0; ITools)",
+		'Accept-Language' => T_lang(),
+	);
+	foreach ($p['headers'] as $header => $value)
+		$headers[] = "$header: $value";
+	$opts = array(
+		CURLOPT_HEADER => false,
+		CURLOPT_RETURNTRANSFER => true,
+		CURLOPT_TIMEOUT => $p['totaltimeout'],
+		CURLOPT_LOW_SPEED_LIMIT => 5,
+		CURLOPT_LOW_SPEED_TIME => $p['timeout'],
+		CURLOPT_FOLLOWLOCATION => true,
+		CURLOPT_HTTPHEADER => $headers,
+	);
+	$mh = curl_multi_init();
+
+	foreach ($p['urls'] as $key => $url)
+	{
+		$ch[$key] = curl_init();
+		curl_setopt($ch[$key], CURLOPT_URL, $url);
+		curl_setopt_array($ch[$key], $opts);
+		curl_multi_add_handle($mh, $ch[$key]);
+	}
+
+	# curl_multi loop copied from example at http://php.net/manual/en/function.curl-multi-exec.php 
+	$active = null;
+	do {
+		$mrc = curl_multi_exec($mh, $active);
+	} while ($mrc == CURLM_CALL_MULTI_PERFORM);
+
+	while ($active && $mrc == CURLM_OK) 
+	{
+		if (curl_multi_select($mh) != -1) 
+		{
+			do {
+				$mrc = curl_multi_exec($mh, $active);
+			} while ($mrc == CURLM_CALL_MULTI_PERFORM);
+		}
+	}
+
+	$results = array();
+	foreach ($p['urls'] as $key => $url)
+	{
+		$results[$key] = curl_multi_getcontent($ch[$key]);
+		curl_multi_remove_handle($mh, $ch[$key]);
+		curl_close($ch[$key]);
+	}
+	curl_multi_close($mh);
+	return $results;
+}
+
+/**
+ * Construct a local directory name to cache an URL. Named args:
+ * @param $p['cachedir']    directory to store cache files in, defaults to $ULTRAHOME/var/urlcache
+ * @param $p['id']          If you need more than one type of cache (e.g. different maxage) you can specify an id
+ */
+function get_cache_dir($p)
+{
+	$p += array('cachedir' => $GLOBALS['ULTRAHOME'] . "/var/urlcache", 'id' => "default");
+	return rtrim($p['cachedir'] . "/" . $p['id'], "/");
+}
+
+
+/**
+ * Construct a local file name to cache an URL. Named args:
+ * @param $p['url'] remote url to get
+ * @param $p['cachedir']    directory to store cache files in, @see get_cache_dir
+ * @param $p['id']          If you need more than one type of cache (e.g. different maxage) you can specify an id
+ */
+function get_cache_filename($p)
+{
+	if (!is_array($p))
+		$p = array('url'=>$p);
+
+	$p['cachedir'] = it_url::get_cache_dir($p);
+	$filename = md5(T_lang() . $p['url'] . ($p['headers'] ? serialize($p['headers']) : ""));
+
+	return $p['cachedir'] . "/" . substr($filename, 0, 2) . "/$filename";
+}
+
+
+/**
+ * Store contents of url in a file and return file name. Threadsafe: Provides locking. Called statically.
+ * Requires webserver writeable directory in $p['cachdedir']. Params in associative array p:
+ * @param $p['url']         url to get
+ * @param $p['headers']     optional array of HTTP headers to send
+ * @param $p['cachedir']    directory to store cache files in, @see get_cache_dir
+ * @param $p['id']          If you need more than one type of cache (e.g. different maxage) you can specify an id
+ * @param $p['timeout']     timeout in seconds, default 10. fractions allowed
+ * @param $p['maxage']      maximum age of cache entries in seconds, default 86400
+ * @param $p['cleanbefore'] maximum daytime when attempting cleanup, default 7200
+ * @param $p['preprocess']  callback function (or array for methods) to change received file or array('function' => ..., 'in' => $src, 'out' => $dst, ...) with callback function plus args
+ * @param $p['safety']      value 0 means dont generate alert, value 1 means generate alerts on timeouts and failures
+ * @param $p['keepfailed']  keep old versions of files if download fails (sending alerts conservatively)
+ * @param $p['returnheaders'] Return array($path, $headers) instead of simply $path
+ * @param $p['it_error']    parameters for it::error()
+ */
+function get_cache($p = array())
+{
+	$p += array('timeout' => 10, 'maxage' => 86400, 'cleanbefore' => 7200, 'safety' => 1, 'it_error' => array());
+	$p['totaltimeout'] = $p['timeout'];
+	$path = it_url::get_cache_filename($p);	# Must be before changing cachedir below
+	$p['cachedir'] = it_url::get_cache_dir($p);
+
+	@mkdir($p['cachedir']);
+	@mkdir(dirname($path));
+
+	if (!is_writable(dirname($path)))
+		it::error(dirname($path) . " not writable");
+
+	if ($filemtime = it_url::_expired($path, $p['maxage']))	# Outdated(non-zero int) or non-existant(true)?
+	{
+		$fileexists = $filemtime !== true;
+
+		if ($lock = it_url::_lock($path))
+		{
+			# Touch existing file to prevent locking other getters while refreshing
+			if ($fileexists)
+				touch($path);
+
+			EDC('getcache', "new", $filemtime, $p['url'], $path);
+			$url = new it_url;
+			if ($result = $url->get($p + array('filemtime' => EDC('nocache') ? null : $filemtime)))	# => true means not modified (no new data fetched)
+			{
+				$newfile = it_url::_atomicwrite($path, $result);
+				if ($p['returnheaders'])
+					file_put_contents("$path.headers", '<?php return ' . var_export($url->headers, true) . ";\n");
+			}
+			else if ($p['keepfailed'])
+				$result = $fileexists;
+			else
+				@unlink($path);	# Expired and failed to get
+
+			it_url::_unlock($path, $lock);
+		}
+		else
+		{
+			# Wait for file currently being transferred
+			EDC('getcache', "wait", $p['url'], $path);
+			$result = it_url::_waitforlockedfile($path, $p);
+
+			# If file could no be fetched by other thread but exists and we are in keepfailed mode then return old file
+			if (!$result && $p['keepfailed'])
+				$result = $fileexists;
+
+		}
+	}
+	else
+	{
+		# Get file from cache
+		EDC('getcache', "cached", $p['url'], $path);
+		$result = true;	# Up to date
+	}
+
+	# Read headers before $path is modified for preprocessing
+	if ($p['returnheaders'])
+		$headers = @include("$path.headers");
+
+	if ($result && $p['preprocess'])
+	{
+		$srcpath = $path;
+		$path .= substr(md5(serialize($p['preprocess'])), 0, 2);
+
+		if ($filemtime = $newfile ? true : it_url::_expired($path, $p['maxage']))	# Outdated(non-zero int) or non-existant(true)?
+		{
+			if ($lock = it_url::_lock($path))
+			{
+				# Touch existing file to prevent locking other getters while refreshing
+				if ($filemtime !== true)
+					touch($path);
+
+				EDC('getcache', "processnew", $p['url'], $path);
+				$dstpath = "$path.preprocesstmp";
+
+				if (is_array($p['preprocess']) && $p['preprocess']['function'])	# Needs is_array as it can be a string where dereferencing gives first character!
+					call_user_func($p['preprocess']['function'], array('in' => $srcpath, 'out' => $dstpath) + $p['preprocess']);
+				else
+					call_user_func($p['preprocess'], $srcpath, $dstpath);
+
+				if (!($result = @filesize($dstpath) && @rename($dstpath, $path)))
+				{
+					@unlink($dstpath);
+					@unlink($path);
+				}
+
+				it_url::_unlock($path, $lock);
+			}
+			else
+			{
+				# Wait for file currently being processed
+				EDC('getcache', "processwait", $p['url'], $path);
+				$result = it_url::_waitforlockedfile($path, $p);
+			}
+		}
+	}
+
+	# cache cleanup at night
+	if ((date('H')*3600 + date('i')*60 < $p['cleanbefore']) && (time()-@filemtime($p['cachedir'] . "/cleaned") > 80000))
+	{
+		touch($p['cachedir'] . "/cleaned");
+		$maxagemin = intval($p['maxage']/60);
+		exec("nohup bash -c 'cd {$p['cachedir']} && sleep 10 && find ?? -mmin +$maxagemin -print0 | xargs -0 -r rm' </dev/null >/dev/null 2>&1 &");
+	}
+
+	EDC('getcache', $result, $path);
+	return $result ? ($p['returnheaders'] ? array($path, $headers) : $path) : false;
+}
+
+/**
+ * Fetch a file, cache it and return contents
+ * @param see it_url::get_cache
+ */
+function get_cache_contents($p)
+{
+	return ($fn = self::get_cache($p)) ? file_get_contents($fn) : ($p['safety'] === 0 ? null : it::error(array('title' => "failed getting " . $p['url'], 'body' => var_export($p, true))));
+}
+
+/**
+ * Check whether file at given path is older than maxage
+ * @param $path File to check
+ * @param $maxage Maximum age of file in seconds
+ * @return Not expired: false | Non-existant file: true | Timestamp of expired file
+ */
+function _expired($path, $maxage)
+{
+	if ($result = EDC('nocache') ? false : @filemtime($path))
+	{
+		if (time() - $result > $maxage)
+			EDC('getcache', "expired", $path);
+		else
+			$result = false;
+	}
+	else	# File does not exists yet
+		$result = true;
+
+	return $result;
+}
+
+/**
+ * Acquire lock for a given file
+ * @param $path File to lock
+ * @return Lock handle if successfully locked file
+ */
+function _lock($path)
+{
+	$force = EDC('nocache') || (($mtime = @filemtime("$path.lock")) && (time() - $mtime > 30)); # expire forgotten locks
+	return @fopen("$path.lock", $force ? "w" : "x");
+}
+
+/**
+ * Release lock on a file
+ * @param $path File to unlock
+ * @param $lock Handle to lock acquird by _lock
+ */
+function _unlock($path, $lock)
+{
+	fclose($lock);
+	@unlink("$path.lock");
+}
+
+/**
+ * Wait for file which is currently locked
+ * @param $path File to wait for
+ * @param $p Wait parameters, see @get_cache
+ * @return Whether lock was released within timeout and file is still there
+ */
+function _waitforlockedfile($path, $p)
+{
+	$sleeptime = 0.1; # seconds to wait per pass
+
+	# wait until cache is ready, then read from cache
+	for ($maxpasses = $p['timeout'] / $sleeptime, $passes = 0; ($lockedbyother = file_exists("$path.lock")) && ($passes < $maxpasses); ++$passes)
+	{
+		usleep($sleeptime * 1000000);
+		clearstatcache();
+	}
+
+	if ($lockedbyother && $p['safety'] == 1)
+		it::error(($passes < $maxpasses ? "error getting url" : "timeout") . " in it_url::get_cache(): url={$p['url']}, passes=$passes, maxpasses=$maxpasses, path={$p['path']}");
+
+	return !$lockedbyother && file_exists($path);
+}
+
+/**
+ * Write data to tmp file and atomically rename it to destination
+ * @param $path Destination file to write data to
+ * @param $data Data to write | true to just touch file
+ * @return True if data was written to file
+ */
+function _atomicwrite($path, $data)
+{
+	$result = false;
+
+	if ($data === true)	# Not modified, no new data, just update timestamp
+		touch($path);
+	else if ($data !== false)
+	{
+		$tmpname = tempnam(dirname($path), "writetmp");
+		fputs($cachetmp = fopen($tmpname, "w"), $data);
+		fclose($cachetmp);
+		chmod($tmpname, 0664);
+		$result = rename($tmpname, $path);
+	}
+	else
+		@unlink($path);
+
+	return $result;
+}
+
+/**
+ * Make an URL absolute by using host an protocol from current Apache request (but not port number)
+ * @param $url Optional URL ( foo.html, /foo.html, //host/bar.html, http://host/bar.html ), default self
+ * @return absolute version of URL ( http[s]://host/bar.html )
+ */
+function absolute($url=null)
+{
+	if (!isset($url))
+		$url = $_SERVER['PHP_SELF'];
+
+	if (!preg_match('/^http/', $url))
+	{
+		if (!preg_match('#//#', $url))
+		{
+			$dir = preg_replace('#/[^/]*$#', '/', $_SERVER['PHP_SELF']);
+			$url = preg_match('#^/#', $url) ? $url : "$dir$url";
+			$url = "//" . $_SERVER['HTTP_HOST'] . ($_SERVER['SERVER_PORT'] == (isset($_SERVER['HTTPS']) ? 443 : 80) ? "" : ":{$_SERVER['SERVER_PORT']}") . $url;
+		}
+		$url = "http" . (isset($_SERVER['HTTPS']) ? 's':'') . ":$url";
+	}
+
+	return $url;
+}
+
+/**
+ * Craft a valid redirect URL, send Location: header and terminate execution
+ * @param $url  Optional URL ( foo.html, /foo.html, //host/bar.html, http://host/bar.html ), default self
+ * @param $type Type of redirect, "temporary" or "permanent", default temporary
+ * @return This method never returns.
+ */
+function redirect($url = null, $type = "temporary")
+{
+	$codes = array('permanent' => 301, 'temporary' => 303);	# NOTE: HTTP 303 is called "See Other", rather than Temporary (which would be HTTP 307), but is the behaviour one usually wants for temporary redirects
+	if (!($code = $codes[$type]))
+		it::fatal("Invalid redirect type '$type', must be 'permanent' or 'temporary'");
+
+	$url = preg_replace("/[\r\n].*/", '', it_url::absolute($url));	# Security: cut after CR/LF
+	if (EDC('noredir'))
+		echo a(array('href' => $url), Q($url)) . Q(" (HTTP/1.1 $code, $type redirect)") . br() . Q("Trace: " . it_debug::backtrace());
+	else
+		header('Location: ' . it_untaint($url, TC_SELF), true, $code);
+	exit;
+}
+
+/**
+ * Urlencode but leave some chars
+ */
+function encode($str)
+{
+	return strtr(urlencode($str), array("%2C"=>",", "%28"=>"(", "%29"=>")"));
+}
+
+/**
+ * Create GET request from params, optionally only using given fields
+ * @param $params Array to take values from, usually $_GET
+ * @param $keys Keys to use; default: all
+ */
+function params($params, $keys = null)
+{
+	return join("&", it_url::_params($params, $keys));
+}
+
+function _params($params, $keys = null)
+{
+	$result = array();
+
+	if (!isset($keys))
+		$keys = array_keys($params);
+
+	foreach ($keys as $key)
+	{
+		if (is_array($params[$key]))
+		{
+			foreach (it_url::_params($params[$key]) as $value)
+			{
+				if (strlen($value))
+					$result[] = it::replace(array('^([^=\[]*)' => $key . '[$1]'), $value);
+			}
+		}
+		else if (strlen($params[$key]))
+			$result[] = urlencode($key) . "=" . it_url::encode($params[$key]);
+	}
+
+	return $result;
+}
+
+}
+
+?>
author	Nathan Gass	2012-03-22 18:18:42 +0000
committer	Nathan Gass	2012-03-22 18:18:42 +0000
commit	d59a4921188753dbe4c0161081755a28112c3ef6 (patch)
tree	81496414d988f37f1db9d92c9750d888ffa13746 /devel-utf8/it_url.class
parent	ca11771e8fad5fef96615df4c44e04b8fb60ac31 (diff)
download	itools-d59a4921188753dbe4c0161081755a28112c3ef6.tar.gz itools-d59a4921188753dbe4c0161081755a28112c3ef6.tar.bz2 itools-d59a4921188753dbe4c0161081755a28112c3ef6.zip