summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--it_url.class81
1 files changed, 1 insertions, 80 deletions
diff --git a/it_url.class b/it_url.class
index b0320a6..e58ad14 100644
--- a/it_url.class
+++ b/it_url.class
@@ -32,13 +32,7 @@ class it_url
var $rawurl; /* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */
var $user; /* E.g. falcon */
var $pass; /* E.g. joshua */
-
- var $page; /* Page or empty */
- var $page_read; /* true if page read */
- var $title; /* Page title or empty */
- var $description; /* Page description or empty */
var $cookies; /* key => values of cookies from server */
-
var $headers; /* Headers of page fetched by get() */
var $data; /* Data part, even if return code is not 200 */
var $result; /* Return code of get() */
@@ -48,6 +42,7 @@ class it_url
/**
* Constructor: canonicalize an URL
* @param $url URL this object represents
+ * @param $options['encoding'] encoding of hostname ('utf-8', 'iso-8859-1' etc.)
*/
function it_url($url = null, $options = array())
{
@@ -123,80 +118,6 @@ function it_url($url = null, $options = array())
/**
- * Read the page into memory, extract title and description and
- * set $this->page, $this->title and $this->description
- * @param $timeout Timeout for operation, defaults to unlimited (0)
- * @return True if page has been read and $this->page is set
- */
-function read_page($timeout = 0)
-{
- unset($this->page);
- unset($this->title);
- unset($this->description);
-
- /*
- ** If the URL does not contain a dot followed by at least one character,
- ** it is considered bogus. This prevents 'localhost', 'www', and numerical IP addresses.
- */
- if (!preg_match('/\.[a-z]+$/i', $this->realhostname))
- return 0;
-
- $url = $this->rawurl;
- while ($this->page == '')
- {
- $cmd = 'LANG=C wget 2>&1 -T ' . ((int)$timeout) . ' -q -U "Mozilla/4.0 (Compatible; Relog ITools)" -O - ' . preg_replace("/[ \t]/", '\\ ', escapeshellcmd("$url"));
- $this->page = `$cmd`;
-
- if ($this->page == '') /* An error occurred. Find out what it was. */
- {
- $cmd = 'LANG=C wget 2>&1 -T' . ((int)$timeout) . ' -v -U "Mozilla/4.0 (Compatible; Relog ITools)" -O - ' . preg_replace("/[ \t]/", '\\ ', escapeshellcmd($url));
- $error = `$cmd`;
- if (preg_match('/Location: ([^ ]*)/i', $error, $regs)) /* Redirect ? */
- {
- $url = $regs[1];
- if (!preg_match('/^[a-z]+:/i', $url)) /* Kludge for Miss Kournikova's admirers: grok local redirects (in violation of RFC) */
- $url = $this->rawurl.'/'.$url;
- }
- else
- break;
- }
-
- if (++$count > 4) /* Avoid infinite redirect loops */
- break;
- }
-
- $this->page_read = 1;
-
- if (preg_match('#<title>([^<]*)</title>#i', $this->page, $regs))
- $this->title = it_htmlentities_decode($regs[1]);
-
- if (preg_match('/<meta name="description"[^>]+content="([^"]*)">/i', $this->page, $regs))
- $this->description = it_htmlentities_decode($regs[1]);
-
- return ($this->page != '');
-}
-
-
-/* Return the description of this page */
-function get_description()
-{
- if (!$this->page_read)
- $this->read_page();
-
- return $this->description;
-}
-
-
-/* Return the title of this page */
-function get_title()
-{
- if (!$this->page_read)
- $this->read_page();
-
- return $this->title;
-}
-
-/**
* Check if a given url (currently http:port80-only) can be fetched
* Note: Redirects are treated as succesful
* $timeout Timeout for connection in seconds