diff options
Diffstat (limited to 'urlcache/urlcache.class')
-rw-r--r-- | urlcache/urlcache.class | 217 |
1 files changed, 0 insertions, 217 deletions
diff --git a/urlcache/urlcache.class b/urlcache/urlcache.class deleted file mode 100644 index b058a1c..0000000 --- a/urlcache/urlcache.class +++ /dev/null @@ -1,217 +0,0 @@ -<?php -/* -** $Id$ -** -** Relog Internet Tools 3 Library ("ITOOLS3") -** -** it_urlcache.class - Interface to URL content cache in DB -** -** This class relies on a DB table with the following scheme: -** url CHAR(255) NOT NULL, -** updateinterval INT NOT NULL, -** lastupdate INT NOT NULL, -** nextupdate INT NOT NULL, -** contentmd5 CHAR(32) NOT NULL, -** content TEXT NOT NULL, -** PRIMARY KEY (url), -** KEY (nextupdate) -*/ - -/* PRIVATE */ -define("_IT_URLCACHE_DEFAULT_INTERVAL", 300); -define("_IT_URLCACHE_MAX_CONNECTIONS", 30); - -class it_urlcache extends it_db_record -{ - /* PRIVATE */ - var $default_interval = _IT_URLCACHE_DEFAULT_INTERVAL; - var $max_connections = _IT_URLCACHE_MAX_CONNECTIONS; - -function set_default_interval($default_interval) -{ - $this->default_interval = $default_interval; -} - - -function set_max_connections($max_connections) -{ - $this->max_connections = $max_connections; -} - - -/* - * Register an URL to be fetched by background process. - * Note: get() will return an empty string until the url is fetched for the - * first time - */ -function register($url) -{ - if (!$this->read($url)) - { - $tags = array("url" => $url, "updateinterval" => $this->default_interval); - $this->create($tags); - } -} - - -/* - * Removes the url from the cache. It won't be fetched again until register() - * is called again. - */ -function unregister($url) -{ - if ($this->read($url)) - $this->delete(); -} - - -/* - * Get content of url. Returns empty string if it was never successfully - * fetched. - */ -function get_content($url) -{ - $this->read($url); - return $this->data['content']; -} - - -/* - * Try to refetch all URLs in the database. - * Note: This is intented for background process use only as it may take - * a while to finish... - */ -function update_cache() -{ - $now = time(); - $pages = array(); - - $result = $this->table->safe_sql_select("WHERE nextupdate < $now", "url"); - - while (list($url) = $this->table->db->fetch_array($result)) - $pages[] = $url; - - $this->table->db->free($result); - - $this->_fetch_urls($pages, $now); -} - - -/* PRIVATE */ -function _update_cache($url, $content, $now) -{ - $contentmd5 = md5($content); - - if ($this->read($url)) - { - $interval = $this->data['updateinterval']; - $age = $now - $this->data['lastupdate']; - - if ($this->data['contentmd5'] == $contentmd5) - { - if ($age > ($interval * 2)) - $interval = $interval * 1.5; - - $tags = array("updateinterval" => $interval, "nextupdate" => $now + $interval); - } - else - { - if ($age < ($interval / 2)) - $interval = $interval / 1.5; - - if ($interval < $this->default_interval) - $interval = $this->default_interval; - - $tags = array("updateinterval" => $interval, "lastupdate" => $now, "nextupdate" => $now + $interval, "contentmd5" => $contentmd5, "content" => $content); - } - - $this->update($tags); - } - else - { - $tags = array("url" => $url, "updateinterval" => $this->default_interval, "lastupdate" => $now, "nextupdate" => $now + $this->default_interval, "contentmd5" => $contentmd5, "content" => $content); - $this->create($tags); - } -} - - -/* - * PRIVATE - * Fetch URL in array $urls and store update cache database accordingly - */ -function _fetch_urls($pages, $now) -{ - $pos = 0; - $fds = array(); - $urls = array(); - $content = array(); - $count = 0; - - for ($pos = 0; ($pos < count($pages)) && ($pos < $this->max_connections); $pos++) - { - debug("Opening $pages[$pos]", 10); - - if ($fd = fopen($pages[$pos], "r")) - { - socket_set_blocking($fd, false); - $fds[] = $fd; - $urls[] = $pages[$pos]; - $content[] = ""; - $count++; - debug("Opened $pages[$pos]", 10); - } - } - - while ($count > 0) - { - $read = 0; - - for ($i = 0; $i < count($fds); $i++) - { - if ($fds[$i]) - { - if (!feof($fds[$i])) - { - $data = fread($fds[$i], 4096); - $content[$i] .= $data; - $read += strlen($data); - debug("Read " . strlen($data) . " from $urls[$i]", 10); - } - - if (feof($fds[$i])) - { - fclose($fds[$i]); - $fds[$i] = 0; - $count--; - $this->_update_cache($urls[$i], $content[$i], $now); - debug("Finished $urls[$i]: " . strlen($content[$i]) . " bytes", 10); - - /* Schedule next url for retrieval */ - if ($pos < count($pages)) - { - if ($fd = fopen($pages[$pos], "r")) - { - socket_set_blocking($fd, false); - $fds[$i] = $fd; - $urls[$i] = $pages[$pos]; - $content[$i] = ""; - $count++; - debug("Opened $pages[$pos]", 10); - } - - $pos++; - } - } - } - } - - /* We are in non-blocking mode, be nice */ - if (($count > 0) && ($read == 0)) - { - sleep(1); - debug("Sleeping...", 10); - } - } -} - -} /* End class it_urlcache */ |