1 files changed, 98 insertions, 25 deletions
diff --git a/it_url.class b/it_url.class
index 7ed0bce..4d3c70b 100644
--- a/it_url.class
+++ b/it_url.class
@@ -27,6 +27,7 @@ class it_url
 	var $hostname;		/* E.g. relog.ch */
 	var $realhostname;	/* E.g. www.relog.ch */
 	var $port;		/* E.g. 80 */
+	var $explicitport; /* E.g. 80, explicitly set in rawurl */
 	var $path;		/* E.g. / */
 	var $rawurl;		/* E.g. HTTP://falcon:joshua@www.Relog.CH.:80/default.asp */
 	var $user;		/* E.g. falcon */
@@ -81,14 +82,17 @@ function it_url($url = null, $options = array())
 	else
 		$pattern = '^([a-z0-9_:\.-]+)/*(.*)$';
 
+	$this->explicitport = '';
 	if (preg_match("#$pattern#is", $url, $regs))
 	{
 		list($hostname, $port) = explode(':', $regs[1]);
 
 		$this->realhostname = strtolower($hostname);
 
-		if ($port)
+		if ($port) {
 			$this->port = intval($port);
+			$this->explicitport = ":" . $port;
+		}
 
 		$url = $regs[2];
 	}
@@ -251,20 +255,16 @@ function get($p=null, $timeout=5)
 		$url = new it_url($p['url']);
 
 	if ($url->protocol == 'http')
-	{
 		$result = $url->request($p);
-		if ($url->headers['Location'] && preg_match('#^(https?://[^/]*)?(/)?(.*)$#i', $url->headers['Location'], $parts) && ($parts[1] != $url->url)) # Handle redirects (supports relative and global)
-		{
-			unset($p['url'], $p['headers']['Host']);
-			$url->it_url($parts[1] ? $parts[1].$parts[2].$parts[3] : $url->protocol.'://'.$url->realhostname.($parts[2] ? $parts[2].$parts[3] : '/'.dirname($url->path).'/'.$parts[3]));
-			if (++$url->redir <= 4)  /* Avoid infinite redirects */
-				return $url->get($p);
-		}
-	}
 	else
+		$result = $url->request_curl($p);
+
+	if ($url->headers['Location'] && preg_match('#^(https?://[^/]*)?(/)?(.*)$#i', $url->headers['Location'], $parts) && ($parts[1] != $url->url)) # Handle redirects (supports relative and global)
 	{
-		$results = self::get_multi(array('urls' => array('one' => $p['url'])) + $p);
-		$result = $results['one'];
+		unset($p['url'], $p['headers']['Host']);
+		$url->it_url($parts[1] ? $parts[1].$parts[2].$parts[3] : $url->protocol.'://'.$url->realhostname.($parts[2] ? $parts[2].$parts[3] : '/'.dirname($url->path).'/'.$parts[3]));
+		if (++$url->redir <= 4)  /* Avoid infinite redirects */
+			return $url->get($p);
 	}
 
 	if (!$result && $p['retries'] > 0 && $url->result < 400)
@@ -310,7 +310,7 @@ function request($p=array())
 			$data = $p['data'];
 
 		$p['headers'] = (array)$p['headers'] + array(
-			'Host' => $url->realhostname . ($url->port != 80 ? ":" . $url->port : ''),
+			'Host' => $url->realhostname . $url->explicitport,
 			'User-Agent' => "Mozilla/5.0 (compatible; MSIE 9.0; ITools)",
 			'Accept-Language' => $p['headers']['Accept-Language'] ?: T_lang(), # can prevent loading of it_text
 		);
@@ -390,6 +390,86 @@ function request($p=array())
 	return $result;
 }
 
+static function curl_opts($p=array())
+{
+	$p += array('totaltimeout' => "999999", 'timeout' => 5);
+
+	foreach ($p['headers'] as $header => $value)
+		$headers[] = "$header: $value";
+
+	if ($p['maxlength']) {
+		$maxlength = $p['maxlength'];
+		$add = [
+			#CURLOPT_BUFFERSIZE       => 1024 * 1024 * 10,
+			CURLOPT_NOPROGRESS       => false,
+			CURLOPT_PROGRESSFUNCTION => function ($dummy0, $dummy1, $size, $dummy2, $dummy3) use ($maxlength) { return $size < $maxlength ? 0 : 1; },
+		];
+	}
+
+	return (array)$add + [
+		CURLOPT_HEADER => false,
+		CURLOPT_RETURNTRANSFER => true,
+		CURLOPT_TIMEOUT => $p['totaltimeout'],
+		CURLOPT_LOW_SPEED_LIMIT => 5,
+		CURLOPT_LOW_SPEED_TIME => $p['timeout'],
+		CURLOPT_FOLLOWLOCATION => false,
+		CURLOPT_HTTPHEADER => $headers,
+		CURLOPT_SSL_VERIFYPEER => 0,
+		CURLOPT_SSL_VERIFYHOST => 0,
+		CURLINFO_HEADER_OUT => 1,
+	];
+}
+
+/*
+ * drop in replacement for request using curl
+ *
+ * todo:
+ * @param $p['filemtime']    Add HTTP header to only fetch when newer than this, otherwise return true instead of data
+ * @param $p['data']         POST data array with key-value pairs
+ * @param $p['method']       different HTTP method
+*/
+
+function request_curl($p=array())
+{
+	$url = $this;
+	if ($p['url'])
+		$this->it_url($p['url']);
+
+
+	$p['headers'] = (array)$p['headers'] + array(
+		'Host' => $url->realhostname . $url->explicitport,
+		'User-Agent' => "Mozilla/4.0 (compatible; MSIE 7.0; ITools)",
+		'Accept-Language' => T_lang(),
+	);
+
+	$opts = [CURLOPT_FOLLOWLOCATION => false, CURLOPT_HEADER => 1] + self::curl_opts($p);
+
+	$curl = curl_init($url->rawurl);
+	curl_setopt_array($curl, $opts);
+
+	$got = curl_exec($curl);
+
+	EDC('curlinfo', curl_getinfo($curl));
+
+	if ($got === false && $p['safety'] == 1)
+		it::error(array('title' => "problem getting $url->url with curl: " . curl_error($curl)) + (array)$p['it_error']);
+
+	if ($got) {
+		list($url->header, $url->data) = explode("\r\n\r\n", $got, 2);
+
+		$url->parse_http_header($url->header);
+
+		if ($p['maxlength'] && (strlen($this->data) > $p['maxlength']))
+			$result = false;
+		else
+			$result =& $url->data;
+	} else
+		$result = false;
+
+	return $result;
+}
+
+
 
 /**
  * Get multiple URL in parallel with timeout. Needs to be called statically
@@ -403,22 +483,15 @@ function request($p=array())
  */
 function get_multi($p=null)
 {
-	$p += array('totaltimeout' => "999999", 'timeout' => 5, 'retries' => 1);
+	$p += array('retries' => 1);
+
 	$p['headers'] = (array)$p['headers'] + array(
 		'User-Agent' => "Mozilla/4.0 (compatible; MSIE 7.0; ITools)",
 		'Accept-Language' => T_lang(),
 	);
-	foreach ($p['headers'] as $header => $value)
-		$headers[] = "$header: $value";
-	$opts = array(
-		CURLOPT_HEADER => false,
-		CURLOPT_RETURNTRANSFER => true,
-		CURLOPT_TIMEOUT => $p['totaltimeout'],
-		CURLOPT_LOW_SPEED_LIMIT => 5,
-		CURLOPT_LOW_SPEED_TIME => $p['timeout'],
-		CURLOPT_FOLLOWLOCATION => true,
-		CURLOPT_HTTPHEADER => $headers,
-	);
+
+	$opts = [CURLOPT_FOLLOWLOCATION => true] + self::curl_opts($p);
+
 	$mh = curl_multi_init();
 
 	$urls = array();