diff options
-rwxr-xr-x | tests/url.t | 70 | ||||
-rw-r--r-- | url.class | 46 |
2 files changed, 89 insertions, 27 deletions
diff --git a/tests/url.t b/tests/url.t new file mode 100755 index 0000000..bc63bcb --- /dev/null +++ b/tests/url.t @@ -0,0 +1,70 @@ +#!/www/server/bin/php -qC +<?php + +# Tests for url.class, currently only constructor's parser + +require 'searchlib/search_test.class'; + +# Create object and parse url +$url = new it_url('HTTP://falcon:joshua@www.Relog.CH:80/default.asp'); + +plan(9); + +is( + $url->url, + 'http://www.relog.ch/', + '$url->url' +); + +is( + $url->protocol, + 'http', + '$url->protocol' +); + +is( + $url->hostname, + 'relog.ch', + '$url->hostname' +); + +is( + $url->realhostname, + 'www.relog.ch', + '$url->realhostname' +); + +is( + $url->port, + 80, + '$url->port' +); + +is( + $url->path, + '', + '$url->path' +); + +is( + $url->user, + 'falcon', + '$url->user' +); + +is( + $url->pass, + 'joshua', + '$url->pass' +); + +# and now check for path +$url = new it_url('HTTP://falcon:joshua@www.Relog.CH:80/foo/bar.html'); + +is( + $url->path, + 'foo/bar.html', + '$url->path' +); + +?> @@ -4,25 +4,27 @@ ** ** ITools - the Internet Tools Library ** -** Copyright (C) 1995-2003 by the ITools Authors. +** Copyright (C) 1995-2007 by the ITools Authors. ** This program is free software; you can redistribute it and/or ** modify it under the terms of either the GNU General Public License ** or the GNU Lesser General Public License, as published by the Free ** Software Foundation. See http://www.gnu.org/licenses/ for details. ** -** url.class - Create an URL object and canonize it +** url.class - URL parsing, retrieval and caching functions */ class it_url { - /* E.g. HTTP://www.Relog.CH.:80/default.asp */ + /* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */ var $url; /* E.g. http://www.relog.ch/ */ var $protocol; /* E.g. http */ var $hostname; /* E.g. relog.ch */ var $realhostname; /* E.g. www.relog.ch */ var $port; /* E.g. 80 */ var $path; /* E.g. / */ - var $rawurl; /* E.g. HTTP://www.Relog.CH.:80/default.asp */ + var $rawurl; /* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */ + var $user; /* E.g. falcon */ + var $pass; /* E.g. joshua */ var $page; /* Page or empty */ var $page_read; /* true if page read */ @@ -42,10 +44,12 @@ function it_url($url, $options = array()) { $this->rawurl = $url; - if (eregi('^([a-z]+):/+(.*)$', $url, $regs)) + if (preg_match('#^([a-z]+):/+(?:([^:]*):([^@]*)@)?(.*)$#i', $url, $regs)) { $this->protocol = strtolower($regs[1]); - $url = $regs[2]; + $this->user = strtolower($regs[2]); + $this->pass = strtolower($regs[3]); + $url = $regs[4]; } else if (ereg('^[a-z]:', $url) || ereg('^/', $url)) { @@ -60,7 +64,7 @@ function it_url($url, $options = array()) else if ($this->protocol == 'https') $protoport = 443; - $this->port = $protoport; + $this->port = intval($protoport); if (class_exists('Net_IDNA')) $idn = Net_IDNA::getInstance(); @@ -77,23 +81,15 @@ function it_url($url, $options = array()) $this->realhostname = strtolower($hostname); if ($port) - $this->port = $port; + $this->port = intval($port); $url = $regs[2]; } - if (ereg('^www\.(.*)$', $this->realhostname, $regs)) - $this->hostname = $regs[1]; - else - $this->hostname = $this->realhostname; - - $index_files = array('index.html', 'index.htm', 'index.phtml', 'index.shtml', 'index.php3', 'index.php', 'default.asp'); + $this->hostname = preg_replace('/^www\./', '', $this->realhostname); - for ($i = 0; $i < count($index_files); $i++) - { - $url = eregi_replace("^$index_files[$i]\$", '', $url); - $url = eregi_replace("/$index_files[$i]\$", '', $url); - } + # Get rid of common index file names + $url = preg_replace('#(^|/)(index\.[ps]?html?|index\.php[34]?|default\.aspx?)$#', '', $url); $this->path = ereg_replace('^/$', '', $url); @@ -245,14 +241,7 @@ function get($p=null, $timeout=5000) { # urlencode data pairs if is array if (is_array($p['data'])) - { - $data_pairs = array(); - - foreach ($p['data'] as $key => $value) - $data_pairs[] = "$key=".urlencode($value); - - $data = implode('&', $data_pairs); - } + $data = it_url::params($p['data']); $p['headers'] = (array)$p['headers'] + array( 'Host' => $url->realhostname, @@ -271,6 +260,9 @@ function get($p=null, $timeout=5000) else $method = "GET"; + if ($url->user || $url->pass) + $p['headers'] += array('Authorization' => 'Basic ' . base64_encode($url->user . ':' . $url->pass)); + foreach ($p['headers'] as $header => $value) $headers .= "$header: $value\r\n"; |