remove ultra-obsolete read_page(), get_description() and get_title() api (introduced for Myax Knowledge Manager)

author: Christian A. Weber 2015-10-13 12:52:46 +0200
committer: Christian A. Weber 2015-10-13 12:52:46 +0200
commit: babb09e211e93c6236bc19e60a4f649e573a6f4e (patch)
tree: c48bae390a352bcfe0da7407c3c17d8c5f827216 /it_url.class
parent: 74ec528f1e737e323c8a0fafeb4f42a5a31812d5 (diff)
download: itools-babb09e211e93c6236bc19e60a4f649e573a6f4e.tar.gz
itools-babb09e211e93c6236bc19e60a4f649e573a6f4e.tar.bz2
itools-babb09e211e93c6236bc19e60a4f649e573a6f4e.zip
1 files changed, 1 insertions, 80 deletions
diff --git a/it_url.class b/it_url.class
index 984f621..2204b60 100644
--- a/it_url.class
+++ b/it_url.class
@@ -32,13 +32,7 @@ class it_url
 	var $rawurl;		/* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */
 	var $user;		/* E.g. falcon */
 	var $pass;		/* E.g. joshua */
-
-	var $page;		/* Page or empty */
-	var $page_read;		/* true if page read */
-	var $title;		/* Page title or empty */
-	var $description;	/* Page description or empty */
 	var $cookies;		/* key => values of cookies from server */
-
 	var $headers;		/* Headers of page fetched by get() */
 	var $data;		/* Data part, even if return code is not 200 */
 	var $result;		/* Return code of get() */
@@ -48,6 +42,7 @@ class it_url
 /**
  * Constructor: canonicalize an URL
  * @param $url URL this object represents
+ * @param $options['encoding'] encoding of hostname ('utf-8', 'iso-8859-1' etc.)
  */
 function it_url($url = null, $options = array())
 {
@@ -123,80 +118,6 @@ function it_url($url = null, $options = array())
 
 
 /**
- * Read the page into memory, extract title and description and
- * set $this->page, $this->title and $this->description
- * @param $timeout Timeout for operation, defaults to unlimited (0)
- * @return True if page has been read and $this->page is set
- */
-function read_page($timeout = 0)
-{
-	unset($this->page);
-	unset($this->title);
-	unset($this->description);
-
-	/*
-	** If the URL does not contain a dot followed by at least one character,
-	** it is considered bogus. This prevents 'localhost', 'www', and numerical IP addresses.
-	*/
-	if (!preg_match('/\.[a-z]+$/i', $this->realhostname))
-		return 0;
-
-	$url = $this->rawurl;
-	while ($this->page == '')
-	{
-		$cmd = 'LANG=C wget 2>&1 -T ' . ((int)$timeout) . ' -q -U "Mozilla/4.0 (Compatible; Relog ITools)" -O - ' . preg_replace("/[ \t]/", '\\ ', escapeshellcmd("$url"));
-		$this->page = `$cmd`;
-
-		if ($this->page == '')	/* An error occurred. Find out what it was. */
-		{
-			$cmd = 'LANG=C wget 2>&1 -T' . ((int)$timeout) . ' -v -U "Mozilla/4.0 (Compatible; Relog ITools)" -O - ' . preg_replace("/[ \t]/", '\\ ', escapeshellcmd($url));
-			$error = `$cmd`;
-			if (preg_match('/Location: ([^ ]*)/i', $error, $regs)) /* Redirect ? */
-			{
-				$url = $regs[1];
-				if (!preg_match('/^[a-z]+:/i', $url))	/* Kludge for Miss Kournikova's admirers: grok local redirects (in violation of RFC) */
-					$url = $this->rawurl.'/'.$url;
-			}
-			else
-				break;
-		}
-
-		if (++$count > 4)	/* Avoid infinite redirect loops */
-			break;
-	}
-
-	$this->page_read = 1;
-
-	if (preg_match('#<title>([^<]*)</title>#i', $this->page, $regs))
-		$this->title = it_htmlentities_decode($regs[1]);
-
-	if (preg_match('/<meta name="description"[^>]+content="([^"]*)">/i', $this->page, $regs))
-		$this->description = it_htmlentities_decode($regs[1]);
-
-	return ($this->page != '');
-}
-
-
-/* Return the description of this page */
-function get_description()
-{
-	if (!$this->page_read)
-		$this->read_page();
-
-	return $this->description;
-}
-
-
-/* Return the title of this page */
-function get_title()
-{
-	if (!$this->page_read)
-		$this->read_page();
-
-	return $this->title;
-}
-
-/**
  * Check if a given url (currently http:port80-only) can be fetched
  * Note: Redirects are treated as succesful
  * $timeout Timeout for connection in seconds
author	Christian A. Weber	2015-10-13 12:52:46 +0200
committer	Christian A. Weber	2015-10-13 12:52:46 +0200
commit	babb09e211e93c6236bc19e60a4f649e573a6f4e (patch)
tree	c48bae390a352bcfe0da7407c3c17d8c5f827216 /it_url.class
parent	74ec528f1e737e323c8a0fafeb4f42a5a31812d5 (diff)
download	itools-babb09e211e93c6236bc19e60a4f649e573a6f4e.tar.gz itools-babb09e211e93c6236bc19e60a4f649e573a6f4e.tar.bz2 itools-babb09e211e93c6236bc19e60a4f649e573a6f4e.zip