diff options
author | Christian A. Weber | 2015-10-13 12:52:46 +0200 |
---|---|---|
committer | Christian A. Weber | 2015-10-13 12:52:46 +0200 |
commit | babb09e211e93c6236bc19e60a4f649e573a6f4e (patch) | |
tree | c48bae390a352bcfe0da7407c3c17d8c5f827216 /it_url.class | |
parent | 74ec528f1e737e323c8a0fafeb4f42a5a31812d5 (diff) | |
download | itools-babb09e211e93c6236bc19e60a4f649e573a6f4e.tar.gz itools-babb09e211e93c6236bc19e60a4f649e573a6f4e.tar.bz2 itools-babb09e211e93c6236bc19e60a4f649e573a6f4e.zip |
remove ultra-obsolete read_page(), get_description() and get_title() api (introduced for Myax Knowledge Manager)
Diffstat (limited to 'it_url.class')
-rw-r--r-- | it_url.class | 81 |
1 files changed, 1 insertions, 80 deletions
diff --git a/it_url.class b/it_url.class index 984f621..2204b60 100644 --- a/it_url.class +++ b/it_url.class @@ -32,13 +32,7 @@ class it_url var $rawurl; /* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */ var $user; /* E.g. falcon */ var $pass; /* E.g. joshua */ - - var $page; /* Page or empty */ - var $page_read; /* true if page read */ - var $title; /* Page title or empty */ - var $description; /* Page description or empty */ var $cookies; /* key => values of cookies from server */ - var $headers; /* Headers of page fetched by get() */ var $data; /* Data part, even if return code is not 200 */ var $result; /* Return code of get() */ @@ -48,6 +42,7 @@ class it_url /** * Constructor: canonicalize an URL * @param $url URL this object represents + * @param $options['encoding'] encoding of hostname ('utf-8', 'iso-8859-1' etc.) */ function it_url($url = null, $options = array()) { @@ -123,80 +118,6 @@ function it_url($url = null, $options = array()) /** - * Read the page into memory, extract title and description and - * set $this->page, $this->title and $this->description - * @param $timeout Timeout for operation, defaults to unlimited (0) - * @return True if page has been read and $this->page is set - */ -function read_page($timeout = 0) -{ - unset($this->page); - unset($this->title); - unset($this->description); - - /* - ** If the URL does not contain a dot followed by at least one character, - ** it is considered bogus. This prevents 'localhost', 'www', and numerical IP addresses. - */ - if (!preg_match('/\.[a-z]+$/i', $this->realhostname)) - return 0; - - $url = $this->rawurl; - while ($this->page == '') - { - $cmd = 'LANG=C wget 2>&1 -T ' . ((int)$timeout) . ' -q -U "Mozilla/4.0 (Compatible; Relog ITools)" -O - ' . preg_replace("/[ \t]/", '\\ ', escapeshellcmd("$url")); - $this->page = `$cmd`; - - if ($this->page == '') /* An error occurred. Find out what it was. */ - { - $cmd = 'LANG=C wget 2>&1 -T' . ((int)$timeout) . ' -v -U "Mozilla/4.0 (Compatible; Relog ITools)" -O - ' . preg_replace("/[ \t]/", '\\ ', escapeshellcmd($url)); - $error = `$cmd`; - if (preg_match('/Location: ([^ ]*)/i', $error, $regs)) /* Redirect ? */ - { - $url = $regs[1]; - if (!preg_match('/^[a-z]+:/i', $url)) /* Kludge for Miss Kournikova's admirers: grok local redirects (in violation of RFC) */ - $url = $this->rawurl.'/'.$url; - } - else - break; - } - - if (++$count > 4) /* Avoid infinite redirect loops */ - break; - } - - $this->page_read = 1; - - if (preg_match('#<title>([^<]*)</title>#i', $this->page, $regs)) - $this->title = it_htmlentities_decode($regs[1]); - - if (preg_match('/<meta name="description"[^>]+content="([^"]*)">/i', $this->page, $regs)) - $this->description = it_htmlentities_decode($regs[1]); - - return ($this->page != ''); -} - - -/* Return the description of this page */ -function get_description() -{ - if (!$this->page_read) - $this->read_page(); - - return $this->description; -} - - -/* Return the title of this page */ -function get_title() -{ - if (!$this->page_read) - $this->read_page(); - - return $this->title; -} - -/** * Check if a given url (currently http:port80-only) can be fetched * Note: Redirects are treated as succesful * $timeout Timeout for connection in seconds |