<?php require 'app.inc.php'; require 'appmenu.inc.php'; # configure the stuff below # your logfile $todaylog = "/var/log/apache2/access.log"; $def_kb = 1024; // Number of kB to seek from the end of the file $engines = array( array('Google', '{\\.google\\..+/(search|imgres)\\?}i', array('q', 'as_q', 'prev')), array('Yahoo!', '{search\\.yahoo\\.com/search\\?}i', array('p')), array('technorati', '{technorati.com/(tags?|search)/(.+)}i', 2), array('Altavista', '{altavista\\.}i', array('q')), array('a9', '{a9\\.com/-/search}i', array('q')), array('a9', '{a9\\.com/(.+)\\??}i', 1), array('Jux2', '{jux2\\.com/}i', array('q')), array('Wikipedia', '{wikipedia\\.org/wiki/(.+)}i', 1), array('search.ch', '{www\\.search\\.ch/search\\.html}i', array('q')) ); # end of config function queryargs($uri) { $args = array(); $parts = explode('?', $uri); if (sizeof($parts) > 1) foreach (explode('&', $parts[1]) as $a) { $arg1 = explode('=', $a); $args[$arg1[0]] = $arg1[1]; } return $args; } // error_reporting(1); $searches = array(); $kb = $def_kb; if (isset($_GET['kb'])) $kb = intval($_GET['kb']); if ($kb < 1 && $kb != -1) $kb = $def_kb; $ofs = $kb * 1024; # open and position the log file if (!posix_access($todaylog, POSIX_R_OK)) return; $fd = fopen($todaylog, "r"); if (!$fd) return; if ($kb != -1) { # seek backwards from EOF fseek($fd, 0, 2); // Go to EOF if (ftell($fd) < $ofs) fseek($fd, 0, 0); // Back to top of file else { fseek($fd, -$ofs, 2); fgets($fd, 1024); // Discard partial line } } # read the remainder while ($x = fgets($fd, 1024)) { // 64.68.82.204 - - [01/Mar/2004:07:26:34 +0100] "GET /robots.txt HTTP/1.0" rc size "referrer" "user agent" list($ip, , , $datetime, $tz, , $log_uri, , , , $referrer, ) = explode(" ", $x); $referrer = substr($referrer, 1, -1); // drop the quote characters unset($query); unset($srch); foreach ($engines as $e) if (preg_match($e[1], $referrer, $matches)) { $srch = $e[2]; break; } if (!isset($srch)) continue; if (is_array($srch)) { $args = queryargs($referrer); foreach ($srch as $s) if ($query = $args[$s]) { // Special treatment for google image searches if ($s == 'prev') { $imgq = urldecode($query); $args = queryargs($imgq); $query = $args['q']; $host = explode('/imgres', $referrer); $referrer = $host[0] . $imgq; } break; } } elseif (is_int($srch)) $query = $matches[$srch]; if ($query) { $t = substr($datetime, 1, 11) . ' ' . substr($datetime, -8) . ' ' . substr($tz, 0, -1); $searches[] = array($query, $referrer, "at $t from $ip", $log_uri); } } fclose($fd); $app = new Application("Referrers from search engines", $menu); $app->head_add = "<meta name=\"robots\" content=\"noindex,nofollow\" />\n"; $app->header(); echo "<p>Searches from "; foreach ($engines as $i => $e) { echo $e[0]; if ($i == sizeof($engines) - 2) echo ' and '; elseif ($i < sizeof($engines) - 2) echo ', '; } echo " in "; if ($kb != -1) echo "the last {$kb}kB of "; echo "my access log:</p>\n<p>"; if (sizeof($searches) == 0) echo "None."; foreach ($searches as $s) { $txt0 = htmlspecialchars(urldecode($s[0])); $uri1 = str_replace('&', '&', $s[1]); $uri3 = htmlspecialchars(urldecode($s[3])); echo "<a href=\"$uri1\" title=\"{$s[2]}\">$txt0</a>: <a href=\"{$s[3]}\">{$uri3}</a><br />\n"; } echo "</p>\n"; $app->footer(1); ?>