<?php

require 'app.inc.php';
require 'appmenu.inc.php';

# configure the stuff below

# your logfile
$todaylog = "/var/log/apache2/access.log";
$def_kb = 1024;	// Number of kB to seek from the end of the file

$engines = array(
    array('Google', '{\\.google\\..+/(search|imgres)\\?}i', array('q', 'as_q', 'prev')),
    array('Yahoo!', '{search\\.yahoo\\.com/search\\?}i', array('p')),
    array('technorati', '{technorati.com/(tags?|search)/(.+)}i', 2),
    array('Altavista', '{altavista\\.}i', array('q')),
    array('a9', '{a9\\.com/-/search}i', array('q')),
    array('a9', '{a9\\.com/(.+)\\??}i', 1),
    array('Jux2', '{jux2\\.com/}i', array('q')),
    array('Wikipedia', '{wikipedia\\.org/wiki/(.+)}i', 1),
    array('search.ch', '{www\\.search\\.ch/search\\.html}i', array('q'))
);

# end of config

function queryargs($uri) {
    $args = array();
    $parts = explode('?', $uri);
    if (sizeof($parts) > 1)
	foreach (explode('&', $parts[1]) as $a) {
	    $arg1 = explode('=', $a);
	    $args[$arg1[0]] = $arg1[1];
	}
    return $args;
}

// error_reporting(1);

$searches = array();

$kb = $def_kb;
if (isset($_GET['kb']))
    $kb = intval($_GET['kb']);
if ($kb < 1 && $kb != -1)
    $kb = $def_kb;
$ofs = $kb * 1024;

# open and position the log file
if (!posix_access($todaylog, POSIX_R_OK))
    return;
$fd = fopen($todaylog, "r");
if (!$fd)
    return;
if ($kb != -1) {
    # seek backwards from EOF
    fseek($fd, 0, 2);	// Go to EOF
    if (ftell($fd) < $ofs)
	fseek($fd, 0, 0);	// Back to top of file
    else {
	fseek($fd, -$ofs, 2);
	fgets($fd, 1024);	// Discard partial line
    }
}

# read the remainder
while ($x = fgets($fd, 1024)) {

// 64.68.82.204 - - [01/Mar/2004:07:26:34 +0100] "GET /robots.txt HTTP/1.0" rc size "referrer" "user agent"
    list($ip, , , $datetime, $tz, , $log_uri, , , , $referrer, ) = explode(" ", $x);

    $referrer = substr($referrer, 1, -1);	// drop the quote characters

    unset($query); unset($srch);
    foreach ($engines as $e)
	if (preg_match($e[1], $referrer, $matches)) {
	    $srch = $e[2];
	    break;
	}

    if (!isset($srch))
	continue;
    if (is_array($srch)) {
	$args = queryargs($referrer);
	foreach ($srch as $s)
	    if ($query = $args[$s]) {
		// Special treatment for google image searches
		if ($s == 'prev') {
		    $imgq = urldecode($query);
		    $args = queryargs($imgq);
		    $query = $args['q'];
		    $host = explode('/imgres', $referrer);
		    $referrer = $host[0] . $imgq;
		}
		break;
	    }
    }
    elseif (is_int($srch))
	$query = $matches[$srch];

    if ($query) {
	$t = substr($datetime, 1, 11) . ' ' . substr($datetime, -8) . ' ' . substr($tz, 0, -1);
	$searches[] = array($query, $referrer, "at $t from $ip", $log_uri);
    }
}
fclose($fd);

$app = new Application("Referrers from search engines", $menu);

$app->head_add = "<meta name=\"robots\" content=\"noindex,nofollow\" />\n";
$app->header();

echo "<p>Searches from ";
foreach ($engines as $i => $e) {
    echo $e[0];
    if ($i == sizeof($engines) - 2)
	echo ' and ';
    elseif ($i < sizeof($engines) - 2)
	echo ', ';
}
echo " in ";
if ($kb != -1)
    echo "the last {$kb}kB of ";
echo "my access log:</p>\n<p>";

if (sizeof($searches) == 0)
    echo "None.";
foreach ($searches as $s) {
    $txt0 = htmlspecialchars(urldecode($s[0]));
    $uri1 = str_replace('&', '&amp;', $s[1]);
    $uri3 = htmlspecialchars(urldecode($s[3]));
    echo "<a href=\"$uri1\" title=\"{$s[2]}\">$txt0</a>: <a href=\"{$s[3]}\">{$uri3}</a><br />\n";
}
echo "</p>\n";

$app->footer(1);

?>