summaryrefslogtreecommitdiff
path: root/urlcache/urlcache.class
diff options
context:
space:
mode:
Diffstat (limited to 'urlcache/urlcache.class')
-rw-r--r--urlcache/urlcache.class217
1 files changed, 0 insertions, 217 deletions
diff --git a/urlcache/urlcache.class b/urlcache/urlcache.class
deleted file mode 100644
index b058a1c..0000000
--- a/urlcache/urlcache.class
+++ /dev/null
@@ -1,217 +0,0 @@
-<?php
-/*
-** $Id$
-**
-** Relog Internet Tools 3 Library ("ITOOLS3")
-**
-** it_urlcache.class - Interface to URL content cache in DB
-**
-** This class relies on a DB table with the following scheme:
-** url CHAR(255) NOT NULL,
-** updateinterval INT NOT NULL,
-** lastupdate INT NOT NULL,
-** nextupdate INT NOT NULL,
-** contentmd5 CHAR(32) NOT NULL,
-** content TEXT NOT NULL,
-** PRIMARY KEY (url),
-** KEY (nextupdate)
-*/
-
-/* PRIVATE */
-define("_IT_URLCACHE_DEFAULT_INTERVAL", 300);
-define("_IT_URLCACHE_MAX_CONNECTIONS", 30);
-
-class it_urlcache extends it_db_record
-{
- /* PRIVATE */
- var $default_interval = _IT_URLCACHE_DEFAULT_INTERVAL;
- var $max_connections = _IT_URLCACHE_MAX_CONNECTIONS;
-
-function set_default_interval($default_interval)
-{
- $this->default_interval = $default_interval;
-}
-
-
-function set_max_connections($max_connections)
-{
- $this->max_connections = $max_connections;
-}
-
-
-/*
- * Register an URL to be fetched by background process.
- * Note: get() will return an empty string until the url is fetched for the
- * first time
- */
-function register($url)
-{
- if (!$this->read($url))
- {
- $tags = array("url" => $url, "updateinterval" => $this->default_interval);
- $this->create($tags);
- }
-}
-
-
-/*
- * Removes the url from the cache. It won't be fetched again until register()
- * is called again.
- */
-function unregister($url)
-{
- if ($this->read($url))
- $this->delete();
-}
-
-
-/*
- * Get content of url. Returns empty string if it was never successfully
- * fetched.
- */
-function get_content($url)
-{
- $this->read($url);
- return $this->data['content'];
-}
-
-
-/*
- * Try to refetch all URLs in the database.
- * Note: This is intented for background process use only as it may take
- * a while to finish...
- */
-function update_cache()
-{
- $now = time();
- $pages = array();
-
- $result = $this->table->safe_sql_select("WHERE nextupdate < $now", "url");
-
- while (list($url) = $this->table->db->fetch_array($result))
- $pages[] = $url;
-
- $this->table->db->free($result);
-
- $this->_fetch_urls($pages, $now);
-}
-
-
-/* PRIVATE */
-function _update_cache($url, $content, $now)
-{
- $contentmd5 = md5($content);
-
- if ($this->read($url))
- {
- $interval = $this->data['updateinterval'];
- $age = $now - $this->data['lastupdate'];
-
- if ($this->data['contentmd5'] == $contentmd5)
- {
- if ($age > ($interval * 2))
- $interval = $interval * 1.5;
-
- $tags = array("updateinterval" => $interval, "nextupdate" => $now + $interval);
- }
- else
- {
- if ($age < ($interval / 2))
- $interval = $interval / 1.5;
-
- if ($interval < $this->default_interval)
- $interval = $this->default_interval;
-
- $tags = array("updateinterval" => $interval, "lastupdate" => $now, "nextupdate" => $now + $interval, "contentmd5" => $contentmd5, "content" => $content);
- }
-
- $this->update($tags);
- }
- else
- {
- $tags = array("url" => $url, "updateinterval" => $this->default_interval, "lastupdate" => $now, "nextupdate" => $now + $this->default_interval, "contentmd5" => $contentmd5, "content" => $content);
- $this->create($tags);
- }
-}
-
-
-/*
- * PRIVATE
- * Fetch URL in array $urls and store update cache database accordingly
- */
-function _fetch_urls($pages, $now)
-{
- $pos = 0;
- $fds = array();
- $urls = array();
- $content = array();
- $count = 0;
-
- for ($pos = 0; ($pos < count($pages)) && ($pos < $this->max_connections); $pos++)
- {
- debug("Opening $pages[$pos]", 10);
-
- if ($fd = fopen($pages[$pos], "r"))
- {
- socket_set_blocking($fd, false);
- $fds[] = $fd;
- $urls[] = $pages[$pos];
- $content[] = "";
- $count++;
- debug("Opened $pages[$pos]", 10);
- }
- }
-
- while ($count > 0)
- {
- $read = 0;
-
- for ($i = 0; $i < count($fds); $i++)
- {
- if ($fds[$i])
- {
- if (!feof($fds[$i]))
- {
- $data = fread($fds[$i], 4096);
- $content[$i] .= $data;
- $read += strlen($data);
- debug("Read " . strlen($data) . " from $urls[$i]", 10);
- }
-
- if (feof($fds[$i]))
- {
- fclose($fds[$i]);
- $fds[$i] = 0;
- $count--;
- $this->_update_cache($urls[$i], $content[$i], $now);
- debug("Finished $urls[$i]: " . strlen($content[$i]) . " bytes", 10);
-
- /* Schedule next url for retrieval */
- if ($pos < count($pages))
- {
- if ($fd = fopen($pages[$pos], "r"))
- {
- socket_set_blocking($fd, false);
- $fds[$i] = $fd;
- $urls[$i] = $pages[$pos];
- $content[$i] = "";
- $count++;
- debug("Opened $pages[$pos]", 10);
- }
-
- $pos++;
- }
- }
- }
- }
-
- /* We are in non-blocking mode, be nice */
- if (($count > 0) && ($read == 0))
- {
- sleep(1);
- debug("Sleeping...", 10);
- }
- }
-}
-
-} /* End class it_urlcache */