diff options
Diffstat (limited to 'urlcache')
-rw-r--r-- | urlcache/.cvsignore | 2 | ||||
-rw-r--r-- | urlcache/Makefile | 43 | ||||
-rw-r--r-- | urlcache/urlcache.class | 217 | ||||
-rw-r--r-- | urlcache/urlcache.php | 28 | ||||
-rw-r--r-- | urlcache/urlcache.sql | 14 |
5 files changed, 0 insertions, 304 deletions
diff --git a/urlcache/.cvsignore b/urlcache/.cvsignore deleted file mode 100644 index b8e8856..0000000 --- a/urlcache/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -*.slib -*.lib diff --git a/urlcache/Makefile b/urlcache/Makefile deleted file mode 100644 index 0cccffd..0000000 --- a/urlcache/Makefile +++ /dev/null @@ -1,43 +0,0 @@ -## -## $Id$ -## -## Makefile for itools/urlcache.lib -## - -CPP= cpp -QUIETMAKE= $(MAKE) -s -PHPCOMPILE= /usr/local/bin/phpcompile - -MODULE= urlcache -SUBDIRS= -CLASSES= urlcache.class - -# -# Library creation rules, do not change stuff below... -# -SLIB= $(MODULE).slib -LIB= ../$(MODULE).lib - -all: $(LIB) - -$(LIB): $(SLIB) - @if [ -x $(PHPCOMPILE) ]; then (echo Compiling $(SLIB) to $(LIB) ...) 1>&2; $(PHPCOMPILE) <$(SLIB) >$(LIB); else (echo $(PHPCOMPILE) not found, copying $(SLIB) to $(LIB) ...) 1>&2; cp $(SLIB) $(LIB); fi - -$(SLIB): $(CLASSES) DUMMY - @(echo Creating $(SLIB) from $(SUBDIRS) $(CLASSES) ...) 1>&2 - @echo "<?php" >$(SLIB) - @(for dir in DUMMY $(SUBDIRS); do (test -d $$dir && cd $$dir && $(QUIETMAKE) cat); done; for class in DUMMY $(CLASSES); do test -f $$class && cat $$class; done) | $(CPP) -P -undef | perl -ne 's/^\s+//g; print unless /^\s*$$/' | grep -v "^<?php" | grep -v "^?>" >>$(SLIB) - @echo "?>" >>$(SLIB) - -$(SUBDIRS):: - @(cd $@; $(QUIETMAKE)) - -DUMMY: - -cat: $(SLIB) - @cat $(SLIB) - -clean: - @(echo Cleaning $(SLIB) $(LIB) ...) 1>&2 - @rm -f $(SLIB) $(LIB) - @for dir in DUMMY $(SUBDIRS); do (test -d $$dir && cd $$dir && $(QUIETMAKE) $@) || :; done diff --git a/urlcache/urlcache.class b/urlcache/urlcache.class deleted file mode 100644 index b058a1c..0000000 --- a/urlcache/urlcache.class +++ /dev/null @@ -1,217 +0,0 @@ -<?php -/* -** $Id$ -** -** Relog Internet Tools 3 Library ("ITOOLS3") -** -** it_urlcache.class - Interface to URL content cache in DB -** -** This class relies on a DB table with the following scheme: -** url CHAR(255) NOT NULL, -** updateinterval INT NOT NULL, -** lastupdate INT NOT NULL, -** nextupdate INT NOT NULL, -** contentmd5 CHAR(32) NOT NULL, -** content TEXT NOT NULL, -** PRIMARY KEY (url), -** KEY (nextupdate) -*/ - -/* PRIVATE */ -define("_IT_URLCACHE_DEFAULT_INTERVAL", 300); -define("_IT_URLCACHE_MAX_CONNECTIONS", 30); - -class it_urlcache extends it_db_record -{ - /* PRIVATE */ - var $default_interval = _IT_URLCACHE_DEFAULT_INTERVAL; - var $max_connections = _IT_URLCACHE_MAX_CONNECTIONS; - -function set_default_interval($default_interval) -{ - $this->default_interval = $default_interval; -} - - -function set_max_connections($max_connections) -{ - $this->max_connections = $max_connections; -} - - -/* - * Register an URL to be fetched by background process. - * Note: get() will return an empty string until the url is fetched for the - * first time - */ -function register($url) -{ - if (!$this->read($url)) - { - $tags = array("url" => $url, "updateinterval" => $this->default_interval); - $this->create($tags); - } -} - - -/* - * Removes the url from the cache. It won't be fetched again until register() - * is called again. - */ -function unregister($url) -{ - if ($this->read($url)) - $this->delete(); -} - - -/* - * Get content of url. Returns empty string if it was never successfully - * fetched. - */ -function get_content($url) -{ - $this->read($url); - return $this->data['content']; -} - - -/* - * Try to refetch all URLs in the database. - * Note: This is intented for background process use only as it may take - * a while to finish... - */ -function update_cache() -{ - $now = time(); - $pages = array(); - - $result = $this->table->safe_sql_select("WHERE nextupdate < $now", "url"); - - while (list($url) = $this->table->db->fetch_array($result)) - $pages[] = $url; - - $this->table->db->free($result); - - $this->_fetch_urls($pages, $now); -} - - -/* PRIVATE */ -function _update_cache($url, $content, $now) -{ - $contentmd5 = md5($content); - - if ($this->read($url)) - { - $interval = $this->data['updateinterval']; - $age = $now - $this->data['lastupdate']; - - if ($this->data['contentmd5'] == $contentmd5) - { - if ($age > ($interval * 2)) - $interval = $interval * 1.5; - - $tags = array("updateinterval" => $interval, "nextupdate" => $now + $interval); - } - else - { - if ($age < ($interval / 2)) - $interval = $interval / 1.5; - - if ($interval < $this->default_interval) - $interval = $this->default_interval; - - $tags = array("updateinterval" => $interval, "lastupdate" => $now, "nextupdate" => $now + $interval, "contentmd5" => $contentmd5, "content" => $content); - } - - $this->update($tags); - } - else - { - $tags = array("url" => $url, "updateinterval" => $this->default_interval, "lastupdate" => $now, "nextupdate" => $now + $this->default_interval, "contentmd5" => $contentmd5, "content" => $content); - $this->create($tags); - } -} - - -/* - * PRIVATE - * Fetch URL in array $urls and store update cache database accordingly - */ -function _fetch_urls($pages, $now) -{ - $pos = 0; - $fds = array(); - $urls = array(); - $content = array(); - $count = 0; - - for ($pos = 0; ($pos < count($pages)) && ($pos < $this->max_connections); $pos++) - { - debug("Opening $pages[$pos]", 10); - - if ($fd = fopen($pages[$pos], "r")) - { - socket_set_blocking($fd, false); - $fds[] = $fd; - $urls[] = $pages[$pos]; - $content[] = ""; - $count++; - debug("Opened $pages[$pos]", 10); - } - } - - while ($count > 0) - { - $read = 0; - - for ($i = 0; $i < count($fds); $i++) - { - if ($fds[$i]) - { - if (!feof($fds[$i])) - { - $data = fread($fds[$i], 4096); - $content[$i] .= $data; - $read += strlen($data); - debug("Read " . strlen($data) . " from $urls[$i]", 10); - } - - if (feof($fds[$i])) - { - fclose($fds[$i]); - $fds[$i] = 0; - $count--; - $this->_update_cache($urls[$i], $content[$i], $now); - debug("Finished $urls[$i]: " . strlen($content[$i]) . " bytes", 10); - - /* Schedule next url for retrieval */ - if ($pos < count($pages)) - { - if ($fd = fopen($pages[$pos], "r")) - { - socket_set_blocking($fd, false); - $fds[$i] = $fd; - $urls[$i] = $pages[$pos]; - $content[$i] = ""; - $count++; - debug("Opened $pages[$pos]", 10); - } - - $pos++; - } - } - } - } - - /* We are in non-blocking mode, be nice */ - if (($count > 0) && ($read == 0)) - { - sleep(1); - debug("Sleeping...", 10); - } - } -} - -} /* End class it_urlcache */ diff --git a/urlcache/urlcache.php b/urlcache/urlcache.php deleted file mode 100644 index fb6fd33..0000000 --- a/urlcache/urlcache.php +++ /dev/null @@ -1,28 +0,0 @@ -#!/www/server/bin/php -q -<?php -/* -** $Id$ -** -** ITools - the Internet Tools Library -** -** Copyright (C) 1995-2003 by the ITools Authors. -** This program is free software; you can redistribute it and/or -** modify it under the terms of either the GNU General Public License -** or the GNU Lesser General Public License, as published by the Free -** Software Foundation. See http://www.gnu.org/licenses/ for details. -** -** cacher.php - Script to be called from crontab to keep url cache up-to-date -*/ - -set_time_limit(4 * 60); - -require("itools/itools.lib"); -require("itools/urlcache.lib"); - -$it_debug = new it_debug(10, "weber@search.ch"); -$it_db = new it_db("urlcache", "urlcache", "JKhsad34H"); -$table = new it_db_table($it_db, "urlcache"); -$it_urlcache = new it_urlcache($table, "url"); -$it_urlcache->update_cache(); - -?> diff --git a/urlcache/urlcache.sql b/urlcache/urlcache.sql deleted file mode 100644 index fcf8659..0000000 --- a/urlcache/urlcache.sql +++ /dev/null @@ -1,14 +0,0 @@ -DROP TABLE IF EXISTS urlcache; - -CREATE TABLE urlcache -( - url CHAR(255) NOT NULL, - updateinterval INT NOT NULL, - lastupdate INT NOT NULL, - nextupdate INT NOT NULL, - contentmd5 CHAR(32) NOT NULL, - content TEXT NOT NULL, - - PRIMARY KEY (url), - KEY (nextupdate) -); |