diff options
Diffstat (limited to 'it_url.class')
-rw-r--r-- | it_url.class | 97 |
1 files changed, 20 insertions, 77 deletions
diff --git a/it_url.class b/it_url.class index ab1f90f..1a8d171 100644 --- a/it_url.class +++ b/it_url.class @@ -1,6 +1,6 @@ <?php /* -** Copyright (C) 1995-2007 by the ITools Authors. +** Copyright (C) 1995-2016 by the ITools Authors. ** This file is part of ITools - the Internet Tools Library ** ** ITools is free software; you can redistribute it and/or modify @@ -27,7 +27,7 @@ class it_url var $hostname; /* E.g. relog.ch */ var $realhostname; /* E.g. www.relog.ch */ var $port; /* E.g. 80 */ - var $explicitport; /* E.g. 80, explicitly set in rawurl */ + var $explicitport; /* E.g. :80, explicitly set in rawurl */ var $path; /* E.g. / */ var $rawurl; /* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */ var $user; /* E.g. falcon */ @@ -42,78 +42,22 @@ class it_url /** * Constructor: canonicalize an URL * @param $url URL this object represents - * @param $options['encoding'] encoding of hostname ('utf-8', 'iso-8859-1' etc.) */ -function it_url($url = null, $options = array()) +function it_url($url = null) { $this->rawurl = $url; - - if (preg_match('#^([a-z]+):/+(?:([^:]*):([^@]*)@)?(.*)$#is', $url, $regs)) - { - $this->protocol = strtolower($regs[1]); - $this->user = $regs[2]; - $this->pass = $regs[3]; - $url = $regs[4]; - } - else if (preg_match('/^[a-z]:/', $url) || preg_match('#^/#', $url)) - { - $this->protocol = 'file'; - } - else - $this->protocol = 'http'; - - /* Default port */ - if ($this->protocol == 'http') - $protoport = 80; - else if ($this->protocol == 'https') - $protoport = 443; - - $this->port = intval($protoport); - - if (class_exists('Net_IDNA', false)) - $idn = Net_IDNA::getInstance(); - - if ($idn) - $pattern = '^([^/]+)/*(.*)$'; - else - $pattern = '^([a-z0-9_:\.-]+)/*(.*)$'; - - $this->explicitport = ''; - if (preg_match("#$pattern#is", $url, $regs)) - { - list($hostname, $port) = explode(':', $regs[1]); - - $this->realhostname = strtolower($hostname); - - if ($port) { - $this->port = intval($port); - $this->explicitport = ":" . $port; - } - - $url = $regs[2]; - } - + $comp = parse_url($url); + $this->protocol = strtolower($comp['scheme']) ?: "http"; + $protoport = $this->protocol == 'https' ? 443 : 80; # port according to protocol + $this->port = intval($comp['port'] ?: $protoport); # this is set even in default case + $this->explicitport = $comp['port'] ? ':' . $comp['port'] : ''; # only set if explicitly specified in url, contains leading : + $this->user = $comp['user']; + $this->pass = $comp['pass']; + $this->realhostname = strtolower($comp['host']); $this->hostname = preg_replace('/^www\./', '', $this->realhostname); - - $this->path = preg_replace('#^/$#', '', $url); - - if ($this->port != $protoport) - $this->url = "$this->protocol://$this->realhostname:$this->port/$this->path"; - else - $this->url = "$this->protocol://$this->realhostname/$this->path"; - - if ($idn) - { - $realhostname = $this->realhostname; - - if (!preg_match('/^utf-?8$/i', $options['encoding'])) - $realhostname = utf8_encode($realhostname); - - $encoded = $idn->encode($realhostname); - - if ($encoded != $realhostname) - $this->realhostname = $encoded; - } + $this->path = ltrim($comp['path'] . ($comp['query'] ? '?' . $comp['query'] : ''), '/'); # $this->path is named poorly, it includes path and query + $this->url = "$this->protocol://$this->realhostname" . ($this->port != $protoport ? $this->explicitport : '') . "/$this->path"; + $this->realhostname = idn_to_ascii($this->realhostname) ?: $this->realhostname; # punycode or original } @@ -349,10 +293,8 @@ static function curl_opts($p=array()) CURLOPT_FOLLOWLOCATION => false, CURLOPT_HTTPHEADER => $headers, - CURLOPT_SSL_VERIFYPEER => true, - CURLOPT_SSL_VERIFYHOST => 2, CURLOPT_CAPATH => '/etc/ssl/certs/', - CURLOPT_SSL_VERIFYPEER => 1, + CURLOPT_SSL_VERIFYPEER => true, CURLOPT_SSL_VERIFYHOST => 2, CURLINFO_HEADER_OUT => 1, @@ -445,7 +387,7 @@ function get_multi($p=null) foreach ($urls as $key => $url) { $handle = curl_init(); - curl_setopt($handle, CURLOPT_URL, $url['url']); + curl_setopt($handle, CURLOPT_URL, it::replace([ '^//' => "http://" ], $url['url'])); curl_setopt_array($handle, $opts); curl_multi_add_handle($mh, $handle); $keys[$handle] = $key; @@ -541,6 +483,7 @@ function get_cache_filename($p) * @param $p['keepfailed'] keep old versions of files if download fails (sending alerts conservatively) * @param $p['returnheaders'] Return array($path, $headers) instead of simply $path * @param $p['it_error'] parameters for it::error() + * @return Cache filename or false if fetch failed */ function get_cache($p = array()) { @@ -645,11 +588,11 @@ function get_cache($p = array()) } # cache cleanup at night - if ((date('H')*3600 + date('i')*60 < $p['cleanbefore']) && (time()-@filemtime($p['cachedir'] . "/cleaned") > 80000)) + if (date('H') >= 1 && (date('H')*3600 + date('i')*60 < $p['cleanbefore']) && (time()-@filemtime($p['cachedir'] . "/cleaned") > 80000)) { touch($p['cachedir'] . "/cleaned"); $maxagemin = intval($p['maxage']/60); - exec("nohup bash -c 'cd {$p['cachedir']} && sleep 10 && find ?? -mmin +$maxagemin -print0 | xargs -0 -r rm' </dev/null >/dev/null 2>&1 &"); + exec("nohup bash -c 'cd {$p['cachedir']} && for i in ??; do sleep 20; ionice -c 3 find \$i -mmin +$maxagemin -delete; done' </dev/null >/dev/null 2>&1 &"); } if (EDC('getcachelog')) @@ -665,7 +608,7 @@ function get_cache($p = array()) */ function get_cache_contents($p) { - return ($fn = self::get_cache($p)) ? file_get_contents($fn) : ($p['safety'] === 0 ? null : it::error(array('title' => "failed getting " . $p['url'], 'body' => var_export($p, true)))); + return ($fn = self::get_cache($p)) ? file_get_contents($fn) : ($p['safety'] === 0 ? null : it::error(array('title' => "failed getting " . it_url::absolute($p['url']), 'body' => var_export($p, true)))); } /** |