summaryrefslogtreecommitdiff
path: root/it_url.class
diff options
context:
space:
mode:
Diffstat (limited to 'it_url.class')
-rw-r--r--it_url.class86
1 files changed, 15 insertions, 71 deletions
diff --git a/it_url.class b/it_url.class
index c2116f5..405397e 100644
--- a/it_url.class
+++ b/it_url.class
@@ -1,6 +1,6 @@
<?php
/*
-** Copyright (C) 1995-2007 by the ITools Authors.
+** Copyright (C) 1995-2016 by the ITools Authors.
** This file is part of ITools - the Internet Tools Library
**
** ITools is free software; you can redistribute it and/or modify
@@ -27,7 +27,7 @@ class it_url
var $hostname; /* E.g. relog.ch */
var $realhostname; /* E.g. www.relog.ch */
var $port; /* E.g. 80 */
- var $explicitport; /* E.g. 80, explicitly set in rawurl */
+ var $explicitport; /* E.g. :80, explicitly set in rawurl */
var $path; /* E.g. / */
var $rawurl; /* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */
var $user; /* E.g. falcon */
@@ -42,78 +42,22 @@ class it_url
/**
* Constructor: canonicalize an URL
* @param $url URL this object represents
- * @param $options['encoding'] encoding of hostname ('utf-8', 'iso-8859-1' etc.)
*/
-function it_url($url = null, $options = array())
+function it_url($url = null)
{
$this->rawurl = $url;
-
- if (preg_match('#^([a-z]+):/+(?:([^:]*):([^@]*)@)?(.*)$#is', $url, $regs))
- {
- $this->protocol = strtolower($regs[1]);
- $this->user = $regs[2];
- $this->pass = $regs[3];
- $url = $regs[4];
- }
- else if (preg_match('/^[a-z]:/', $url) || preg_match('#^/#', $url))
- {
- $this->protocol = 'file';
- }
- else
- $this->protocol = 'http';
-
- /* Default port */
- if ($this->protocol == 'http')
- $protoport = 80;
- else if ($this->protocol == 'https')
- $protoport = 443;
-
- $this->port = intval($protoport);
-
- if (class_exists('Net_IDNA', false))
- $idn = Net_IDNA::getInstance();
-
- if ($idn)
- $pattern = '^([^/]+)/*(.*)$';
- else
- $pattern = '^([a-z0-9_:\.-]+)/*(.*)$';
-
- $this->explicitport = '';
- if (preg_match("#$pattern#is", $url, $regs))
- {
- list($hostname, $port) = explode(':', $regs[1]);
-
- $this->realhostname = strtolower($hostname);
-
- if ($port) {
- $this->port = intval($port);
- $this->explicitport = ":" . $port;
- }
-
- $url = $regs[2];
- }
-
+ $comp = parse_url($url);
+ $this->protocol = strtolower($comp['scheme']) ?: "http";
+ $protoport = $this->protocol == 'https' ? 443 : 80; # port according to protocol
+ $this->port = intval($comp['port'] ?: $protoport); # this is set even in default case
+ $this->explicitport = $comp['port'] ? ':' . $comp['port'] : ''; # only set if explicitly specified in url, contains leading :
+ $this->user = $comp['user'];
+ $this->pass = $comp['pass'];
+ $this->realhostname = strtolower($comp['host']);
$this->hostname = preg_replace('/^www\./', '', $this->realhostname);
-
- $this->path = preg_replace('#^/$#', '', $url);
-
- if ($this->port != $protoport)
- $this->url = "$this->protocol://$this->realhostname:$this->port/$this->path";
- else
- $this->url = "$this->protocol://$this->realhostname/$this->path";
-
- if ($idn)
- {
- $realhostname = $this->realhostname;
-
- if (!preg_match('/^utf-?8$/i', $options['encoding']))
- $realhostname = utf8_encode($realhostname);
-
- $encoded = $idn->encode($realhostname);
-
- if ($encoded != $realhostname)
- $this->realhostname = $encoded;
- }
+ $this->path = ltrim($comp['path'] . ($comp['query'] ? '?' . $comp['query'] : ''), '/'); # $this->path is named poorly, it includes path and query
+ $this->url = "$this->protocol://$this->realhostname" . ($this->port != $protoport ? $this->explicitport : '') . "/$this->path";
+ $this->realhostname = idn_to_ascii($this->realhostname) ?: $this->realhostname; # punycode or original
}
@@ -445,7 +389,7 @@ function get_multi($p=null)
foreach ($urls as $key => $url)
{
$handle = curl_init();
- curl_setopt($handle, CURLOPT_URL, $url['url']);
+ curl_setopt($handle, CURLOPT_URL, it::replace([ '^//' => "http://" ], $url['url']));
curl_setopt_array($handle, $opts);
curl_multi_add_handle($mh, $handle);
$keys[$handle] = $key;