summaryrefslogtreecommitdiff
path: root/urlcache
diff options
context:
space:
mode:
Diffstat (limited to 'urlcache')
-rw-r--r--urlcache/.cvsignore2
-rw-r--r--urlcache/Makefile43
-rw-r--r--urlcache/urlcache.class217
-rw-r--r--urlcache/urlcache.php28
-rw-r--r--urlcache/urlcache.sql14
5 files changed, 0 insertions, 304 deletions
diff --git a/urlcache/.cvsignore b/urlcache/.cvsignore
deleted file mode 100644
index b8e8856..0000000
--- a/urlcache/.cvsignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.slib
-*.lib
diff --git a/urlcache/Makefile b/urlcache/Makefile
deleted file mode 100644
index 0cccffd..0000000
--- a/urlcache/Makefile
+++ /dev/null
@@ -1,43 +0,0 @@
-##
-## $Id$
-##
-## Makefile for itools/urlcache.lib
-##
-
-CPP= cpp
-QUIETMAKE= $(MAKE) -s
-PHPCOMPILE= /usr/local/bin/phpcompile
-
-MODULE= urlcache
-SUBDIRS=
-CLASSES= urlcache.class
-
-#
-# Library creation rules, do not change stuff below...
-#
-SLIB= $(MODULE).slib
-LIB= ../$(MODULE).lib
-
-all: $(LIB)
-
-$(LIB): $(SLIB)
- @if [ -x $(PHPCOMPILE) ]; then (echo Compiling $(SLIB) to $(LIB) ...) 1>&2; $(PHPCOMPILE) <$(SLIB) >$(LIB); else (echo $(PHPCOMPILE) not found, copying $(SLIB) to $(LIB) ...) 1>&2; cp $(SLIB) $(LIB); fi
-
-$(SLIB): $(CLASSES) DUMMY
- @(echo Creating $(SLIB) from $(SUBDIRS) $(CLASSES) ...) 1>&2
- @echo "<?php" >$(SLIB)
- @(for dir in DUMMY $(SUBDIRS); do (test -d $$dir && cd $$dir && $(QUIETMAKE) cat); done; for class in DUMMY $(CLASSES); do test -f $$class && cat $$class; done) | $(CPP) -P -undef | perl -ne 's/^\s+//g; print unless /^\s*$$/' | grep -v "^<?php" | grep -v "^?>" >>$(SLIB)
- @echo "?>" >>$(SLIB)
-
-$(SUBDIRS)::
- @(cd $@; $(QUIETMAKE))
-
-DUMMY:
-
-cat: $(SLIB)
- @cat $(SLIB)
-
-clean:
- @(echo Cleaning $(SLIB) $(LIB) ...) 1>&2
- @rm -f $(SLIB) $(LIB)
- @for dir in DUMMY $(SUBDIRS); do (test -d $$dir && cd $$dir && $(QUIETMAKE) $@) || :; done
diff --git a/urlcache/urlcache.class b/urlcache/urlcache.class
deleted file mode 100644
index b058a1c..0000000
--- a/urlcache/urlcache.class
+++ /dev/null
@@ -1,217 +0,0 @@
-<?php
-/*
-** $Id$
-**
-** Relog Internet Tools 3 Library ("ITOOLS3")
-**
-** it_urlcache.class - Interface to URL content cache in DB
-**
-** This class relies on a DB table with the following scheme:
-** url CHAR(255) NOT NULL,
-** updateinterval INT NOT NULL,
-** lastupdate INT NOT NULL,
-** nextupdate INT NOT NULL,
-** contentmd5 CHAR(32) NOT NULL,
-** content TEXT NOT NULL,
-** PRIMARY KEY (url),
-** KEY (nextupdate)
-*/
-
-/* PRIVATE */
-define("_IT_URLCACHE_DEFAULT_INTERVAL", 300);
-define("_IT_URLCACHE_MAX_CONNECTIONS", 30);
-
-class it_urlcache extends it_db_record
-{
- /* PRIVATE */
- var $default_interval = _IT_URLCACHE_DEFAULT_INTERVAL;
- var $max_connections = _IT_URLCACHE_MAX_CONNECTIONS;
-
-function set_default_interval($default_interval)
-{
- $this->default_interval = $default_interval;
-}
-
-
-function set_max_connections($max_connections)
-{
- $this->max_connections = $max_connections;
-}
-
-
-/*
- * Register an URL to be fetched by background process.
- * Note: get() will return an empty string until the url is fetched for the
- * first time
- */
-function register($url)
-{
- if (!$this->read($url))
- {
- $tags = array("url" => $url, "updateinterval" => $this->default_interval);
- $this->create($tags);
- }
-}
-
-
-/*
- * Removes the url from the cache. It won't be fetched again until register()
- * is called again.
- */
-function unregister($url)
-{
- if ($this->read($url))
- $this->delete();
-}
-
-
-/*
- * Get content of url. Returns empty string if it was never successfully
- * fetched.
- */
-function get_content($url)
-{
- $this->read($url);
- return $this->data['content'];
-}
-
-
-/*
- * Try to refetch all URLs in the database.
- * Note: This is intented for background process use only as it may take
- * a while to finish...
- */
-function update_cache()
-{
- $now = time();
- $pages = array();
-
- $result = $this->table->safe_sql_select("WHERE nextupdate < $now", "url");
-
- while (list($url) = $this->table->db->fetch_array($result))
- $pages[] = $url;
-
- $this->table->db->free($result);
-
- $this->_fetch_urls($pages, $now);
-}
-
-
-/* PRIVATE */
-function _update_cache($url, $content, $now)
-{
- $contentmd5 = md5($content);
-
- if ($this->read($url))
- {
- $interval = $this->data['updateinterval'];
- $age = $now - $this->data['lastupdate'];
-
- if ($this->data['contentmd5'] == $contentmd5)
- {
- if ($age > ($interval * 2))
- $interval = $interval * 1.5;
-
- $tags = array("updateinterval" => $interval, "nextupdate" => $now + $interval);
- }
- else
- {
- if ($age < ($interval / 2))
- $interval = $interval / 1.5;
-
- if ($interval < $this->default_interval)
- $interval = $this->default_interval;
-
- $tags = array("updateinterval" => $interval, "lastupdate" => $now, "nextupdate" => $now + $interval, "contentmd5" => $contentmd5, "content" => $content);
- }
-
- $this->update($tags);
- }
- else
- {
- $tags = array("url" => $url, "updateinterval" => $this->default_interval, "lastupdate" => $now, "nextupdate" => $now + $this->default_interval, "contentmd5" => $contentmd5, "content" => $content);
- $this->create($tags);
- }
-}
-
-
-/*
- * PRIVATE
- * Fetch URL in array $urls and store update cache database accordingly
- */
-function _fetch_urls($pages, $now)
-{
- $pos = 0;
- $fds = array();
- $urls = array();
- $content = array();
- $count = 0;
-
- for ($pos = 0; ($pos < count($pages)) && ($pos < $this->max_connections); $pos++)
- {
- debug("Opening $pages[$pos]", 10);
-
- if ($fd = fopen($pages[$pos], "r"))
- {
- socket_set_blocking($fd, false);
- $fds[] = $fd;
- $urls[] = $pages[$pos];
- $content[] = "";
- $count++;
- debug("Opened $pages[$pos]", 10);
- }
- }
-
- while ($count > 0)
- {
- $read = 0;
-
- for ($i = 0; $i < count($fds); $i++)
- {
- if ($fds[$i])
- {
- if (!feof($fds[$i]))
- {
- $data = fread($fds[$i], 4096);
- $content[$i] .= $data;
- $read += strlen($data);
- debug("Read " . strlen($data) . " from $urls[$i]", 10);
- }
-
- if (feof($fds[$i]))
- {
- fclose($fds[$i]);
- $fds[$i] = 0;
- $count--;
- $this->_update_cache($urls[$i], $content[$i], $now);
- debug("Finished $urls[$i]: " . strlen($content[$i]) . " bytes", 10);
-
- /* Schedule next url for retrieval */
- if ($pos < count($pages))
- {
- if ($fd = fopen($pages[$pos], "r"))
- {
- socket_set_blocking($fd, false);
- $fds[$i] = $fd;
- $urls[$i] = $pages[$pos];
- $content[$i] = "";
- $count++;
- debug("Opened $pages[$pos]", 10);
- }
-
- $pos++;
- }
- }
- }
- }
-
- /* We are in non-blocking mode, be nice */
- if (($count > 0) && ($read == 0))
- {
- sleep(1);
- debug("Sleeping...", 10);
- }
- }
-}
-
-} /* End class it_urlcache */
diff --git a/urlcache/urlcache.php b/urlcache/urlcache.php
deleted file mode 100644
index fb6fd33..0000000
--- a/urlcache/urlcache.php
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/www/server/bin/php -q
-<?php
-/*
-** $Id$
-**
-** ITools - the Internet Tools Library
-**
-** Copyright (C) 1995-2003 by the ITools Authors.
-** This program is free software; you can redistribute it and/or
-** modify it under the terms of either the GNU General Public License
-** or the GNU Lesser General Public License, as published by the Free
-** Software Foundation. See http://www.gnu.org/licenses/ for details.
-**
-** cacher.php - Script to be called from crontab to keep url cache up-to-date
-*/
-
-set_time_limit(4 * 60);
-
-require("itools/itools.lib");
-require("itools/urlcache.lib");
-
-$it_debug = new it_debug(10, "weber@search.ch");
-$it_db = new it_db("urlcache", "urlcache", "JKhsad34H");
-$table = new it_db_table($it_db, "urlcache");
-$it_urlcache = new it_urlcache($table, "url");
-$it_urlcache->update_cache();
-
-?>
diff --git a/urlcache/urlcache.sql b/urlcache/urlcache.sql
deleted file mode 100644
index fcf8659..0000000
--- a/urlcache/urlcache.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-DROP TABLE IF EXISTS urlcache;
-
-CREATE TABLE urlcache
-(
- url CHAR(255) NOT NULL,
- updateinterval INT NOT NULL,
- lastupdate INT NOT NULL,
- nextupdate INT NOT NULL,
- contentmd5 CHAR(32) NOT NULL,
- content TEXT NOT NULL,
-
- PRIMARY KEY (url),
- KEY (nextupdate)
-);