From a5a19fd672bc0b8113d620669b557f17dccd343a Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Thu, 26 Oct 2006 13:35:12 +0000 Subject: Moved itools to live branch --- urlcache/urlcache.class | 225 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 urlcache/urlcache.class (limited to 'urlcache/urlcache.class') diff --git a/urlcache/urlcache.class b/urlcache/urlcache.class new file mode 100644 index 0000000..790efa4 --- /dev/null +++ b/urlcache/urlcache.class @@ -0,0 +1,225 @@ +default_interval = $default_interval; +} + + +function set_max_connections($max_connections) +{ + $this->max_connections = $max_connections; +} + + +/* + * Register an URL to be fetched by background process. + * Note: get() will return an empty string until the url is fetched for the + * first time + */ +function register($url) +{ + if (!$this->read($url)) + { + $tags = array("url" => $url, "updateinterval" => $this->default_interval); + $this->create($tags); + } +} + + +/* + * Removes the url from the cache. It won't be fetched again until register() + * is called again. + */ +function unregister($url) +{ + if ($this->read($url)) + $this->delete(); +} + + +/* + * Get content of url. Returns empty string if it was never successfully + * fetched. + */ +function get_content($url) +{ + $this->read($url); + return $this->data['content']; +} + + +/* + * Try to refetch all URLs in the database. + * Note: This is intented for background process use only as it may take + * a while to finish... + */ +function update_cache() +{ + $now = time(); + $pages = array(); + + $result = $this->table->safe_sql_select("WHERE nextupdate < $now", "url"); + + while (list($url) = $this->table->db->fetch_array($result)) + $pages[] = $url; + + $this->table->db->free($result); + + $this->_fetch_urls($pages, $now); +} + + +/* PRIVATE */ +function _update_cache($url, $content, $now) +{ + $contentmd5 = md5($content); + + if ($this->read($url)) + { + $interval = $this->data['updateinterval']; + $age = $now - $this->data['lastupdate']; + + if ($this->data['contentmd5'] == $contentmd5) + { + if ($age > ($interval * 2)) + $interval = $interval * 1.5; + + $tags = array("updateinterval" => $interval, "nextupdate" => $now + $interval); + } + else + { + if ($age < ($interval / 2)) + $interval = $interval / 1.5; + + if ($interval < $this->default_interval) + $interval = $this->default_interval; + + $tags = array("updateinterval" => $interval, "lastupdate" => $now, "nextupdate" => $now + $interval, "contentmd5" => $contentmd5, "content" => $content); + } + + $this->update($tags); + } + else + { + $tags = array("url" => $url, "updateinterval" => $this->default_interval, "lastupdate" => $now, "nextupdate" => $now + $this->default_interval, "contentmd5" => $contentmd5, "content" => $content); + $this->create($tags); + } +} + + +/* + * PRIVATE + * Fetch URL in array $urls and store update cache database accordingly + */ +function _fetch_urls($pages, $now) +{ + $pos = 0; + $fds = array(); + $urls = array(); + $content = array(); + $count = 0; + + for ($pos = 0; ($pos < count($pages)) && ($pos < $this->max_connections); $pos++) + { + debug("Opening $pages[$pos]", 10); + + if ($fd = fopen($pages[$pos], "r")) + { + socket_set_blocking($fd, false); + $fds[] = $fd; + $urls[] = $pages[$pos]; + $content[] = ""; + $count++; + debug("Opened $pages[$pos]", 10); + } + } + + while ($count > 0) + { + $read = 0; + + for ($i = 0; $i < count($fds); $i++) + { + if ($fds[$i]) + { + if (!feof($fds[$i])) + { + $data = fread($fds[$i], 4096); + $content[$i] .= $data; + $read += strlen($data); + debug("Read " . strlen($data) . " from $urls[$i]", 10); + } + + if (feof($fds[$i])) + { + fclose($fds[$i]); + $fds[$i] = 0; + $count--; + $this->_update_cache($urls[$i], $content[$i], $now); + debug("Finished $urls[$i]: " . strlen($content[$i]) . " bytes", 10); + + /* Schedule next url for retrieval */ + if ($pos < count($pages)) + { + if ($fd = fopen($pages[$pos], "r")) + { + socket_set_blocking($fd, false); + $fds[$i] = $fd; + $urls[$i] = $pages[$pos]; + $content[$i] = ""; + $count++; + debug("Opened $pages[$pos]", 10); + } + + $pos++; + } + } + } + } + + /* We are in non-blocking mode, be nice */ + if (($count > 0) && ($read == 0)) + { + sleep(1); + debug("Sleeping...", 10); + } + } +} + +} /* End class it_urlcache */ -- cgit v1.2.3