default_interval = $default_interval; } function set_max_connections($max_connections) { $this->max_connections = $max_connections; } /* * Register an URL to be fetched by background process. * Note: get() will return an empty string until the url is fetched for the * first time */ function register($url) { if (!$this->read($url)) { $tags = array("url" => $url, "updateinterval" => $this->default_interval); $this->create($tags); } } /* * Removes the url from the cache. It won't be fetched again until register() * is called again. */ function unregister($url) { if ($this->read($url)) $this->delete(); } /* * Get content of url. Returns empty string if it was never successfully * fetched. */ function get_content($url) { $this->read($url); return $this->data['content']; } /* * Try to refetch all URLs in the database. * Note: This is intented for background process use only as it may take * a while to finish... */ function update_cache() { $now = time(); $pages = array(); $result = $this->table->safe_sql_select("WHERE nextupdate < $now", "url"); while (list($url) = $this->table->db->fetch_array($result)) $pages[] = $url; $this->table->db->free($result); $this->_fetch_urls($pages, $now); } /* PRIVATE */ function _update_cache($url, $content, $now) { $contentmd5 = md5($content); if ($this->read($url)) { $interval = $this->data['updateinterval']; $age = $now - $this->data['lastupdate']; if ($this->data['contentmd5'] == $contentmd5) { if ($age > ($interval * 2)) $interval = $interval * 1.5; $tags = array("updateinterval" => $interval, "nextupdate" => $now + $interval); } else { if ($age < ($interval / 2)) $interval = $interval / 1.5; if ($interval < $this->default_interval) $interval = $this->default_interval; $tags = array("updateinterval" => $interval, "lastupdate" => $now, "nextupdate" => $now + $interval, "contentmd5" => $contentmd5, "content" => $content); } $this->update($tags); } else { $tags = array("url" => $url, "updateinterval" => $this->default_interval, "lastupdate" => $now, "nextupdate" => $now + $this->default_interval, "contentmd5" => $contentmd5, "content" => $content); $this->create($tags); } } /* * PRIVATE * Fetch URL in array $urls and store update cache database accordingly */ function _fetch_urls($pages, $now) { $pos = 0; $fds = array(); $urls = array(); $content = array(); $count = 0; for ($pos = 0; ($pos < count($pages)) && ($pos < $this->max_connections); $pos++) { debug("Opening $pages[$pos]", 10); if ($fd = fopen($pages[$pos], "r")) { socket_set_blocking($fd, false); $fds[] = $fd; $urls[] = $pages[$pos]; $content[] = ""; $count++; debug("Opened $pages[$pos]", 10); } } while ($count > 0) { $read = 0; for ($i = 0; $i < count($fds); $i++) { if ($fds[$i]) { if (!feof($fds[$i])) { $data = fread($fds[$i], 4096); $content[$i] .= $data; $read += strlen($data); debug("Read " . strlen($data) . " from $urls[$i]", 10); } if (feof($fds[$i])) { fclose($fds[$i]); $fds[$i] = 0; $count--; $this->_update_cache($urls[$i], $content[$i], $now); debug("Finished $urls[$i]: " . strlen($content[$i]) . " bytes", 10); /* Schedule next url for retrieval */ if ($pos < count($pages)) { if ($fd = fopen($pages[$pos], "r")) { socket_set_blocking($fd, false); $fds[$i] = $fd; $urls[$i] = $pages[$pos]; $content[$i] = ""; $count++; debug("Opened $pages[$pos]", 10); } $pos++; } } } } /* We are in non-blocking mode, be nice */ if (($count > 0) && ($read == 0)) { sleep(1); debug("Sleeping...", 10); } } } } /* End class it_urlcache */