From a5a19fd672bc0b8113d620669b557f17dccd343a Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Thu, 26 Oct 2006 13:35:12 +0000 Subject: Moved itools to live branch --- urlcache/.cvsignore | 2 + urlcache/Makefile | 48 +++++++++++ urlcache/urlcache.class | 225 ++++++++++++++++++++++++++++++++++++++++++++++++ urlcache/urlcache.php | 28 ++++++ urlcache/urlcache.sql | 14 +++ 5 files changed, 317 insertions(+) create mode 100644 urlcache/.cvsignore create mode 100644 urlcache/Makefile create mode 100644 urlcache/urlcache.class create mode 100644 urlcache/urlcache.php create mode 100644 urlcache/urlcache.sql (limited to 'urlcache') diff --git a/urlcache/.cvsignore b/urlcache/.cvsignore new file mode 100644 index 0000000..b8e8856 --- /dev/null +++ b/urlcache/.cvsignore @@ -0,0 +1,2 @@ +*.slib +*.lib diff --git a/urlcache/Makefile b/urlcache/Makefile new file mode 100644 index 0000000..ef66132 --- /dev/null +++ b/urlcache/Makefile @@ -0,0 +1,48 @@ +## +## $Id$ +## +## Makefile for itools/urlcache.lib +## +## $Log$ +## Revision 1.1 2000/01/28 02:37:54 cschneid +## Added URL cache class and simple script to be put into crontab +## +## + +CPP= cpp +QUIETMAKE= $(MAKE) -s +PHPCOMPILE= /usr/local/bin/phpcompile + +MODULE= urlcache +SUBDIRS= +CLASSES= urlcache.class + +# +# Library creation rules, do not change stuff below... +# +SLIB= $(MODULE).slib +LIB= ../$(MODULE).lib + +all: $(LIB) + +$(LIB): $(SLIB) + @if [ -x $(PHPCOMPILE) ]; then (echo Compiling $(SLIB) to $(LIB) ...) 1>&2; $(PHPCOMPILE) <$(SLIB) >$(LIB); else (echo $(PHPCOMPILE) not found, copying $(SLIB) to $(LIB) ...) 1>&2; cp $(SLIB) $(LIB); fi + +$(SLIB): $(CLASSES) DUMMY + @(echo Creating $(SLIB) from $(SUBDIRS) $(CLASSES) ...) 1>&2 + @echo "$(SLIB) + @(for dir in DUMMY $(SUBDIRS); do (test -d $$dir && cd $$dir && $(QUIETMAKE) cat); done; for class in DUMMY $(CLASSES); do test -f $$class && cat $$class; done) | $(CPP) -P -undef | perl -ne 's/^\s+//g; print unless /^\s*$$/' | grep -v "^" >>$(SLIB) + @echo "?>" >>$(SLIB) + +$(SUBDIRS):: + @(cd $@; $(QUIETMAKE)) + +DUMMY: + +cat: $(SLIB) + @cat $(SLIB) + +clean: + @(echo Cleaning $(SLIB) $(LIB) ...) 1>&2 + @rm -f $(SLIB) $(LIB) + @for dir in DUMMY $(SUBDIRS); do (test -d $$dir && cd $$dir && $(QUIETMAKE) $@) || :; done diff --git a/urlcache/urlcache.class b/urlcache/urlcache.class new file mode 100644 index 0000000..790efa4 --- /dev/null +++ b/urlcache/urlcache.class @@ -0,0 +1,225 @@ +default_interval = $default_interval; +} + + +function set_max_connections($max_connections) +{ + $this->max_connections = $max_connections; +} + + +/* + * Register an URL to be fetched by background process. + * Note: get() will return an empty string until the url is fetched for the + * first time + */ +function register($url) +{ + if (!$this->read($url)) + { + $tags = array("url" => $url, "updateinterval" => $this->default_interval); + $this->create($tags); + } +} + + +/* + * Removes the url from the cache. It won't be fetched again until register() + * is called again. + */ +function unregister($url) +{ + if ($this->read($url)) + $this->delete(); +} + + +/* + * Get content of url. Returns empty string if it was never successfully + * fetched. + */ +function get_content($url) +{ + $this->read($url); + return $this->data['content']; +} + + +/* + * Try to refetch all URLs in the database. + * Note: This is intented for background process use only as it may take + * a while to finish... + */ +function update_cache() +{ + $now = time(); + $pages = array(); + + $result = $this->table->safe_sql_select("WHERE nextupdate < $now", "url"); + + while (list($url) = $this->table->db->fetch_array($result)) + $pages[] = $url; + + $this->table->db->free($result); + + $this->_fetch_urls($pages, $now); +} + + +/* PRIVATE */ +function _update_cache($url, $content, $now) +{ + $contentmd5 = md5($content); + + if ($this->read($url)) + { + $interval = $this->data['updateinterval']; + $age = $now - $this->data['lastupdate']; + + if ($this->data['contentmd5'] == $contentmd5) + { + if ($age > ($interval * 2)) + $interval = $interval * 1.5; + + $tags = array("updateinterval" => $interval, "nextupdate" => $now + $interval); + } + else + { + if ($age < ($interval / 2)) + $interval = $interval / 1.5; + + if ($interval < $this->default_interval) + $interval = $this->default_interval; + + $tags = array("updateinterval" => $interval, "lastupdate" => $now, "nextupdate" => $now + $interval, "contentmd5" => $contentmd5, "content" => $content); + } + + $this->update($tags); + } + else + { + $tags = array("url" => $url, "updateinterval" => $this->default_interval, "lastupdate" => $now, "nextupdate" => $now + $this->default_interval, "contentmd5" => $contentmd5, "content" => $content); + $this->create($tags); + } +} + + +/* + * PRIVATE + * Fetch URL in array $urls and store update cache database accordingly + */ +function _fetch_urls($pages, $now) +{ + $pos = 0; + $fds = array(); + $urls = array(); + $content = array(); + $count = 0; + + for ($pos = 0; ($pos < count($pages)) && ($pos < $this->max_connections); $pos++) + { + debug("Opening $pages[$pos]", 10); + + if ($fd = fopen($pages[$pos], "r")) + { + socket_set_blocking($fd, false); + $fds[] = $fd; + $urls[] = $pages[$pos]; + $content[] = ""; + $count++; + debug("Opened $pages[$pos]", 10); + } + } + + while ($count > 0) + { + $read = 0; + + for ($i = 0; $i < count($fds); $i++) + { + if ($fds[$i]) + { + if (!feof($fds[$i])) + { + $data = fread($fds[$i], 4096); + $content[$i] .= $data; + $read += strlen($data); + debug("Read " . strlen($data) . " from $urls[$i]", 10); + } + + if (feof($fds[$i])) + { + fclose($fds[$i]); + $fds[$i] = 0; + $count--; + $this->_update_cache($urls[$i], $content[$i], $now); + debug("Finished $urls[$i]: " . strlen($content[$i]) . " bytes", 10); + + /* Schedule next url for retrieval */ + if ($pos < count($pages)) + { + if ($fd = fopen($pages[$pos], "r")) + { + socket_set_blocking($fd, false); + $fds[$i] = $fd; + $urls[$i] = $pages[$pos]; + $content[$i] = ""; + $count++; + debug("Opened $pages[$pos]", 10); + } + + $pos++; + } + } + } + } + + /* We are in non-blocking mode, be nice */ + if (($count > 0) && ($read == 0)) + { + sleep(1); + debug("Sleeping...", 10); + } + } +} + +} /* End class it_urlcache */ diff --git a/urlcache/urlcache.php b/urlcache/urlcache.php new file mode 100644 index 0000000..fb6fd33 --- /dev/null +++ b/urlcache/urlcache.php @@ -0,0 +1,28 @@ +#!/www/server/bin/php -q +update_cache(); + +?> diff --git a/urlcache/urlcache.sql b/urlcache/urlcache.sql new file mode 100644 index 0000000..fcf8659 --- /dev/null +++ b/urlcache/urlcache.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS urlcache; + +CREATE TABLE urlcache +( + url CHAR(255) NOT NULL, + updateinterval INT NOT NULL, + lastupdate INT NOT NULL, + nextupdate INT NOT NULL, + contentmd5 CHAR(32) NOT NULL, + content TEXT NOT NULL, + + PRIMARY KEY (url), + KEY (nextupdate) +); -- cgit v1.2.3