summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xtests/url.t70
-rw-r--r--url.class46
2 files changed, 89 insertions, 27 deletions
diff --git a/tests/url.t b/tests/url.t
new file mode 100755
index 0000000..bc63bcb
--- /dev/null
+++ b/tests/url.t
@@ -0,0 +1,70 @@
+#!/www/server/bin/php -qC
+<?php
+
+# Tests for url.class, currently only constructor's parser
+
+require 'searchlib/search_test.class';
+
+# Create object and parse url
+$url = new it_url('HTTP://falcon:joshua@www.Relog.CH:80/default.asp');
+
+plan(9);
+
+is(
+ $url->url,
+ 'http://www.relog.ch/',
+ '$url->url'
+);
+
+is(
+ $url->protocol,
+ 'http',
+ '$url->protocol'
+);
+
+is(
+ $url->hostname,
+ 'relog.ch',
+ '$url->hostname'
+);
+
+is(
+ $url->realhostname,
+ 'www.relog.ch',
+ '$url->realhostname'
+);
+
+is(
+ $url->port,
+ 80,
+ '$url->port'
+);
+
+is(
+ $url->path,
+ '',
+ '$url->path'
+);
+
+is(
+ $url->user,
+ 'falcon',
+ '$url->user'
+);
+
+is(
+ $url->pass,
+ 'joshua',
+ '$url->pass'
+);
+
+# and now check for path
+$url = new it_url('HTTP://falcon:joshua@www.Relog.CH:80/foo/bar.html');
+
+is(
+ $url->path,
+ 'foo/bar.html',
+ '$url->path'
+);
+
+?>
diff --git a/url.class b/url.class
index e7aeb57..575f8ca 100644
--- a/url.class
+++ b/url.class
@@ -4,25 +4,27 @@
**
** ITools - the Internet Tools Library
**
-** Copyright (C) 1995-2003 by the ITools Authors.
+** Copyright (C) 1995-2007 by the ITools Authors.
** This program is free software; you can redistribute it and/or
** modify it under the terms of either the GNU General Public License
** or the GNU Lesser General Public License, as published by the Free
** Software Foundation. See http://www.gnu.org/licenses/ for details.
**
-** url.class - Create an URL object and canonize it
+** url.class - URL parsing, retrieval and caching functions
*/
class it_url
{
- /* E.g. HTTP://www.Relog.CH.:80/default.asp */
+ /* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */
var $url; /* E.g. http://www.relog.ch/ */
var $protocol; /* E.g. http */
var $hostname; /* E.g. relog.ch */
var $realhostname; /* E.g. www.relog.ch */
var $port; /* E.g. 80 */
var $path; /* E.g. / */
- var $rawurl; /* E.g. HTTP://www.Relog.CH.:80/default.asp */
+ var $rawurl; /* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */
+ var $user; /* E.g. falcon */
+ var $pass; /* E.g. joshua */
var $page; /* Page or empty */
var $page_read; /* true if page read */
@@ -42,10 +44,12 @@ function it_url($url, $options = array())
{
$this->rawurl = $url;
- if (eregi('^([a-z]+):/+(.*)$', $url, $regs))
+ if (preg_match('#^([a-z]+):/+(?:([^:]*):([^@]*)@)?(.*)$#i', $url, $regs))
{
$this->protocol = strtolower($regs[1]);
- $url = $regs[2];
+ $this->user = strtolower($regs[2]);
+ $this->pass = strtolower($regs[3]);
+ $url = $regs[4];
}
else if (ereg('^[a-z]:', $url) || ereg('^/', $url))
{
@@ -60,7 +64,7 @@ function it_url($url, $options = array())
else if ($this->protocol == 'https')
$protoport = 443;
- $this->port = $protoport;
+ $this->port = intval($protoport);
if (class_exists('Net_IDNA'))
$idn = Net_IDNA::getInstance();
@@ -77,23 +81,15 @@ function it_url($url, $options = array())
$this->realhostname = strtolower($hostname);
if ($port)
- $this->port = $port;
+ $this->port = intval($port);
$url = $regs[2];
}
- if (ereg('^www\.(.*)$', $this->realhostname, $regs))
- $this->hostname = $regs[1];
- else
- $this->hostname = $this->realhostname;
-
- $index_files = array('index.html', 'index.htm', 'index.phtml', 'index.shtml', 'index.php3', 'index.php', 'default.asp');
+ $this->hostname = preg_replace('/^www\./', '', $this->realhostname);
- for ($i = 0; $i < count($index_files); $i++)
- {
- $url = eregi_replace("^$index_files[$i]\$", '', $url);
- $url = eregi_replace("/$index_files[$i]\$", '', $url);
- }
+ # Get rid of common index file names
+ $url = preg_replace('#(^|/)(index\.[ps]?html?|index\.php[34]?|default\.aspx?)$#', '', $url);
$this->path = ereg_replace('^/$', '', $url);
@@ -245,14 +241,7 @@ function get($p=null, $timeout=5000)
{
# urlencode data pairs if is array
if (is_array($p['data']))
- {
- $data_pairs = array();
-
- foreach ($p['data'] as $key => $value)
- $data_pairs[] = "$key=".urlencode($value);
-
- $data = implode('&', $data_pairs);
- }
+ $data = it_url::params($p['data']);
$p['headers'] = (array)$p['headers'] + array(
'Host' => $url->realhostname,
@@ -271,6 +260,9 @@ function get($p=null, $timeout=5000)
else
$method = "GET";
+ if ($url->user || $url->pass)
+ $p['headers'] += array('Authorization' => 'Basic ' . base64_encode($url->user . ':' . $url->pass));
+
foreach ($p['headers'] as $header => $value)
$headers .= "$header: $value\r\n";