diff options
Diffstat (limited to 'it_url.class')
-rw-r--r-- | it_url.class | 86 |
1 files changed, 15 insertions, 71 deletions
diff --git a/it_url.class b/it_url.class index c2116f5..405397e 100644 --- a/it_url.class +++ b/it_url.class @@ -1,6 +1,6 @@ <?php /* -** Copyright (C) 1995-2007 by the ITools Authors. +** Copyright (C) 1995-2016 by the ITools Authors. ** This file is part of ITools - the Internet Tools Library ** ** ITools is free software; you can redistribute it and/or modify @@ -27,7 +27,7 @@ class it_url var $hostname; /* E.g. relog.ch */ var $realhostname; /* E.g. www.relog.ch */ var $port; /* E.g. 80 */ - var $explicitport; /* E.g. 80, explicitly set in rawurl */ + var $explicitport; /* E.g. :80, explicitly set in rawurl */ var $path; /* E.g. / */ var $rawurl; /* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */ var $user; /* E.g. falcon */ @@ -42,78 +42,22 @@ class it_url /** * Constructor: canonicalize an URL * @param $url URL this object represents - * @param $options['encoding'] encoding of hostname ('utf-8', 'iso-8859-1' etc.) */ -function it_url($url = null, $options = array()) +function it_url($url = null) { $this->rawurl = $url; - - if (preg_match('#^([a-z]+):/+(?:([^:]*):([^@]*)@)?(.*)$#is', $url, $regs)) - { - $this->protocol = strtolower($regs[1]); - $this->user = $regs[2]; - $this->pass = $regs[3]; - $url = $regs[4]; - } - else if (preg_match('/^[a-z]:/', $url) || preg_match('#^/#', $url)) - { - $this->protocol = 'file'; - } - else - $this->protocol = 'http'; - - /* Default port */ - if ($this->protocol == 'http') - $protoport = 80; - else if ($this->protocol == 'https') - $protoport = 443; - - $this->port = intval($protoport); - - if (class_exists('Net_IDNA', false)) - $idn = Net_IDNA::getInstance(); - - if ($idn) - $pattern = '^([^/]+)/*(.*)$'; - else - $pattern = '^([a-z0-9_:\.-]+)/*(.*)$'; - - $this->explicitport = ''; - if (preg_match("#$pattern#is", $url, $regs)) - { - list($hostname, $port) = explode(':', $regs[1]); - - $this->realhostname = strtolower($hostname); - - if ($port) { - $this->port = intval($port); - $this->explicitport = ":" . $port; - } - - $url = $regs[2]; - } - + $comp = parse_url($url); + $this->protocol = strtolower($comp['scheme']) ?: "http"; + $protoport = $this->protocol == 'https' ? 443 : 80; # port according to protocol + $this->port = intval($comp['port'] ?: $protoport); # this is set even in default case + $this->explicitport = $comp['port'] ? ':' . $comp['port'] : ''; # only set if explicitly specified in url, contains leading : + $this->user = $comp['user']; + $this->pass = $comp['pass']; + $this->realhostname = strtolower($comp['host']); $this->hostname = preg_replace('/^www\./', '', $this->realhostname); - - $this->path = preg_replace('#^/$#', '', $url); - - if ($this->port != $protoport) - $this->url = "$this->protocol://$this->realhostname:$this->port/$this->path"; - else - $this->url = "$this->protocol://$this->realhostname/$this->path"; - - if ($idn) - { - $realhostname = $this->realhostname; - - if (!preg_match('/^utf-?8$/i', $options['encoding'])) - $realhostname = utf8_encode($realhostname); - - $encoded = $idn->encode($realhostname); - - if ($encoded != $realhostname) - $this->realhostname = $encoded; - } + $this->path = ltrim($comp['path'] . ($comp['query'] ? '?' . $comp['query'] : ''), '/'); # $this->path is named poorly, it includes path and query + $this->url = "$this->protocol://$this->realhostname" . ($this->port != $protoport ? $this->explicitport : '') . "/$this->path"; + $this->realhostname = idn_to_ascii($this->realhostname) ?: $this->realhostname; # punycode or original } @@ -445,7 +389,7 @@ function get_multi($p=null) foreach ($urls as $key => $url) { $handle = curl_init(); - curl_setopt($handle, CURLOPT_URL, $url['url']); + curl_setopt($handle, CURLOPT_URL, it::replace([ '^//' => "http://" ], $url['url'])); curl_setopt_array($handle, $opts); curl_multi_add_handle($mh, $handle); $keys[$handle] = $key; |