/* weblog_digest - summarize traffic bursts in a web log file
**
** Copyright  1998 by Jef Poskanzer <jef@acme.com>. All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions
** are met:
** 1. Redistributions of source code must retain the above copyright
**    notice, this list of conditions and the following disclaimer.
** 2. Redistributions in binary form must reproduce the above copyright
**    notice, this list of conditions and the following disclaimer in the
**    documentation and/or other materials provided with the distribution.
** 
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
** ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
** FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
** DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
** OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
** HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
** LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
** OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
** SUCH DAMAGE.
*/


#include <sys/types.h>
#include <stdio.h>
#include <string.h>
#include <time.h>


/* Defines. */
#define DEFAULT_IDLETIME 15


/* Globals. */
static char* argv0;
static int quiet;
static long idletime;


/* Forwards. */
static void usage( void );
static void bogus( char* where, char* rest );
static time_t date_parse( char* str );


int
main( int argc, char** argv )
    {
    int argn;
    char line[10000];
    char* cp;
    char* host_str;
    char* rfc931user_str;
    char* authuser_str;
    char* date_str;
    char* method_str;
    char* file_str;
    char* protocol_str;
    char* status_str;
    char* bytes_str;
    char* referer_str;
    char* useragent_str;
    int i;
    time_t date, prev_date, group_start;
    long bytes;
    long long group_bytes;
    int group_count;
    long secs;

    argv0 = argv[0];

    /* Parse args. */
    argn = 1;
    quiet = 0;
    idletime = DEFAULT_IDLETIME;
    while ( argn < argc && argv[argn][0] == '-' && argv[argn][1] != '\0' )
	{
	if ( strcmp( argv[argn], "-quiet" ) == 0 )
	    quiet = 1;
	else if ( strcmp( argv[argn], "-idle" ) == 0 )
	    {
	    ++argn;
	    idletime = atol( argv[argn] );
	    }
	else
	    usage();
	++argn;
	}
    if ( argn != argc )
	usage();

    /* Read the input file. */
    prev_date = (time_t) -1;
    group_start = (time_t) -1;
    while ( fgets( line, sizeof(line), stdin ) != (char*) 0 )
	{
	/* Parse the line. */
	host_str = line;
	cp = strchr( host_str, ' ' );
	if ( cp == (char*) 0 )
	    { bogus( "host", host_str ); continue; }
	*cp = '\0';
	rfc931user_str = cp + 1;
	cp = strchr( rfc931user_str, ' ' );
	if ( cp == (char*) 0 )
	    { bogus( "rfc931user", rfc931user_str ); continue; }
	*cp = '\0';
	authuser_str = cp + 1;
	cp = strchr( authuser_str, ' ' );
	if ( cp == (char*) 0 )
	    { bogus( "authuser", authuser_str ); continue; }
	*cp = '\0';
	date_str = cp + 1;
	if ( *date_str != '[' )
	    { bogus( "date", date_str ); continue; }
	++date_str;
	cp = strchr( date_str, ']' );
	if ( cp == (char*) 0 )
	    { bogus( "date", date_str ); continue; }
	*cp = '\0';
	method_str = cp + 1;
	if ( *method_str != ' ' )
	    { bogus( "method", method_str ); continue; }
	++method_str;
	if ( *method_str != '"' )
	    { bogus( "method", method_str ); continue; }
	++method_str;
	cp = strchr( method_str, ' ' );
	if ( cp == (char*) 0 )
	    { bogus( "method", method_str ); continue; }
	*cp = '\0';
	file_str = cp + 1;
	i = strcspn( file_str, " \"" );
	if ( file_str[i] != ' ' && file_str[i] != '"' )
	    { bogus( "file", file_str ); continue; }
	if ( file_str[i] == ' ' )
	    {
	    file_str[i] = '\0';
	    protocol_str = &(file_str[i + 1]);
	    cp = strchr( protocol_str, '"' );
	    if ( cp == (char*) 0 )
		{ bogus( "protocol", protocol_str ); continue; }
	    *cp = '\0';
	    status_str = cp + 1;
	    }
	else if ( file_str[i] == '"' )
	    {
	    file_str[i] = '\0';
	    protocol_str = "HTTP/0.9";
	    status_str = &(file_str[i + 1]);
	    }
	if ( *status_str != ' ' )
	    { bogus( "status", status_str ); continue; }
	++status_str;
	cp = strchr( status_str, ' ' );
	if ( cp == (char*) 0 )
	    { bogus( "status", status_str ); continue; }
	*cp = '\0';
	bytes_str = cp + 1;
	cp = strchr( bytes_str, ' ' );
	if ( cp == (char*) 0 )
	    {
	    cp = strchr( bytes_str, '\n' );
	    if ( cp != (char*) 0 )
		*cp = '\0';
	    referer_str = "";
	    useragent_str = "";
	    }
	else
	    {
	    *cp = '\0';
	    referer_str = cp + 1;
	    if ( *referer_str != '"' )
		{ bogus( "referer", referer_str ); continue; }
	    ++referer_str;
	    cp = strchr( referer_str, '"' );
	    if ( cp == (char*) 0 )
		{ bogus( "referer", referer_str ); continue; }
	    *cp = '\0';
	    useragent_str = cp + 1;
	    if ( *useragent_str != ' ' )
		{ bogus( "useragent", useragent_str ); continue; }
	    ++useragent_str;
	    if ( *useragent_str != '"' )
		{ bogus( "useragent", useragent_str ); continue; }
	    ++useragent_str;
	    cp = strchr( useragent_str, '"' );
	    if ( cp == (char*) 0 )
		{ bogus( "useragent", useragent_str ); continue; }
	    *cp = '\0';
	    ++cp;
	    }

	/* Convert date & bytes. */
	date = date_parse( date_str );
	if ( date == (time_t) -1 )
	    { bogus( "date", date_str ); continue; }
	bytes = atol( bytes_str );

	/* Is this the first line, a new group, or a continuation of the
	** current group?
	*/
	if ( prev_date == (time_t) -1 ||
	     date - prev_date >= idletime )
	    {
	    if ( prev_date != (time_t) -1 )
		{
		secs = prev_date - group_start;
		if ( group_count == 1 || secs == 0 )
		    (void) printf(
			"%d req, %g bytes\n", group_count, (float) group_bytes );
		else
		    (void) printf(
			"%d reqs, %g bytes, %d secs, %g reqs/sec, %g bytes/sec\n",
			group_count, (float) group_bytes, secs,
			(float) group_count / secs,
			(float) group_bytes / secs );
		(void) printf( "%d idle secs\n", date - prev_date );
		}
	    group_start = date;
	    group_count = 1;
	    group_bytes = bytes;
	    }
	else
	    {
	    ++group_count;
	    group_bytes += bytes;
	    }

	prev_date = date;
	}

    /* Do the last group. */
    if ( prev_date != (time_t) -1 )
	secs = prev_date - group_start;
	if ( group_count == 1 || secs == 0 )
	    (void) printf(
		"%d req, %g bytes\n", group_count, (float) group_bytes );
	else
	    (void) printf(
		"%d reqs, %g bytes, %d secs, %g reqs/sec, %g bytes/sec\n",
		group_count, (float) group_bytes, secs,
		(float) group_count / secs,
		(float) group_bytes / secs );

    /* Done. */
    exit( 0 );
    }


static void
usage( void )
    {
    (void) fprintf( stderr, "usage:  %s [-quiet] [-idle idletime]\n", argv0 );
    exit( 1 );
    }

static void
bogus( char* where, char* rest )
    {
    int len;

    if ( ! quiet )
	{
	len = strlen( rest );
	if ( rest[len - 1] == '\n' )
	    rest[len - 1] = '\0';
	(void) fprintf( stderr, "%s: bogus line at %s - '%s'\n", argv0, where, rest );
	}
    }


/* Stripped-down version of date_parse. */


struct strlong {
    char* s;
    long l;
    };


static void
pound_case( char* str )
    {
    for ( ; *str != '\0'; ++str )
	{
	if ( isupper( *str ) )
	    *str = tolower( *str );
	}
    }

static int
strlong_compare( v1, v2 )
    char* v1;
    char* v2;
    {
    return strcmp( ((struct strlong*) v1)->s, ((struct strlong*) v2)->s );
    }


static int
strlong_search( char* str, struct strlong* tab, int n, long* lP )
    {
    int i, h, l, r;

    l = 0;
    h = n - 1;
    for (;;)
	{
	i = ( h + l ) / 2;
	r = strcmp( str, tab[i].s );
	if ( r < 0 )
	    h = i - 1;
	else if ( r > 0 )
	    l = i + 1;
	else
	    {
	    *lP = tab[i].l;
	    return 1;
	    }
	if ( h < l )
	    return 0;
	}
    }


static int
scan_mon( char* str_mon, long* tm_monP )
    {
    static struct strlong mon_tab[] = {
	{ "jan", 0 }, { "january", 0 },
	{ "feb", 1 }, { "february", 1 },
	{ "mar", 2 }, { "march", 2 },
	{ "apr", 3 }, { "april", 3 },
	{ "may", 4 },
	{ "jun", 5 }, { "june", 5 },
	{ "jul", 6 }, { "july", 6 },
	{ "aug", 7 }, { "august", 7 },
	{ "sep", 8 }, { "september", 8 },
	{ "oct", 9 }, { "october", 9 },
	{ "nov", 10 }, { "november", 10 },
	{ "dec", 11 }, { "december", 11 },
	};
    static int sorted = 0;

    if ( ! sorted )
	{
	(void) qsort(
	    mon_tab, sizeof(mon_tab)/sizeof(struct strlong),
	    sizeof(struct strlong), strlong_compare );
	sorted = 1;
	}
    pound_case( str_mon );
    return strlong_search( 
	str_mon, mon_tab, sizeof(mon_tab)/sizeof(struct strlong), tm_monP );
    }


static int
scan_gmtoff( char* str_gmtoff, long* gmtoffP )
    {
    static struct strlong gmtoff_tab[] = {
	{ "gmt", 0L }, { "utc", 0L }, { "ut", 0L },
	{ "0000", 0L }, { "+0000", 0L }, { "-0000", 0L },
	{ "0100", 3600L }, { "+0100", 3600L }, { "-0100", -3600L },
	{ "0200", 7200L }, { "+0200", 7200L }, { "-0200", -7200L },
	{ "0300", 10800L }, { "+0300", 10800L }, { "-0300", -10800L },
	{ "0400", 14400L }, { "+0400", 14400L }, { "-0400", -14400L },
	{ "0500", 18000L }, { "+0500", 18000L }, { "-0500", -18000L },
	{ "0600", 21600L }, { "+0600", 21600L }, { "-0600", -21600L },
	{ "0700", 25200L }, { "+0700", 25200L }, { "-0700", -25200L },
	{ "0800", 28800L }, { "+0800", 28800L }, { "-0800", -28800L },
	{ "0900", 32400L }, { "+0900", 32400L }, { "-0900", -32400L },
	{ "1000", 36000L }, { "+1000", 36000L }, { "-1000", -36000L },
	{ "1100", 39600L }, { "+1100", 39600L }, { "-1100", -39600L },
	{ "1200", 43200L }, { "+1200", 43200L }, { "-1200", -43200L },
	{ "cet", 3600L }, { "ced", 7200L },	/* Central European time */
	{ "mez", 3600L }, { "mesz", 7200L },	/* Mitteleuropdische Zeit */
	{ "jst", 32400L }, { "jdt", 36000L },	/* Japan time */
	{ "bst", -3600L },
	{ "nst", -12600L },
	{ "ast", -14400L }, { "adt", -10800L },
	{ "est", -18000L }, { "edt", -14400L },
	{ "cst", -21600L }, { "cdt", -18000L },
	{ "mst", -25200L }, { "mdt", -21600L },
	{ "pst", -28800L }, { "pdt", -25200L },
	{ "yst", -32400L }, { "ydt", -28800L },
	{ "hst", -36000L }, { "hdt", -32400L },
	{ "a", -3600L }, { "b", -7200L }, { "c", -10800L }, { "d", -14400L },
	{ "e", -18000L }, { "f", -21600L }, { "g", -25200L }, { "h", -28800L },
	{ "i", -32400L }, { "k", -36000L }, { "l", -39600L }, { "m", -43200L },
	{ "n", 3600L }, { "o", 7200L }, { "p", 10800L }, { "q", 14400L },
	{ "r", 18000L }, { "s", 21600L }, { "t", 25200L }, { "u", 28800L },
	{ "v", 32400L }, { "w", 36000L }, { "x", 39600L }, { "y", 43200L },
	{ "z", 0L },
	};
    static int sorted = 0;

    if ( ! sorted )
	{
	(void) qsort(
	    gmtoff_tab, sizeof(gmtoff_tab)/sizeof(struct strlong),
	    sizeof(struct strlong), strlong_compare );
	sorted = 1;
	}
    pound_case( str_gmtoff );
    return strlong_search( 
	str_gmtoff, gmtoff_tab, sizeof(gmtoff_tab)/sizeof(struct strlong),
	gmtoffP );
    }


static int
is_leap( int year )
    {
    if ( year < 70 )
	year += 2000;
    else if ( year < 1900 )
	year += 1900;
    return year % 400? ( year % 100 ? ( year % 4 ? 0 : 1 ) : 0 ) : 1;
    }


/* Basically the same as mktime(). */
static time_t
tm_to_time( struct tm* tmP )
    {
    time_t t;
    static int monthtab[12] = {
	0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };

    /* Years since epoch, converted to days. */
    t = ( tmP->tm_year - 70 ) * 365;
    /* Leap days for previous years. */
    t += ( tmP->tm_year - 69 ) / 4;
    /* Days for the beginning of this month. */
    t += monthtab[tmP->tm_mon];
    /* Leap day for this year. */
    if ( tmP->tm_mon >= 2 && is_leap( tmP->tm_year ) )
	++t;
    /* Days since the beginning of this month. */
    t += tmP->tm_mday - 1;	/* 1-based field */
    /* Hours, minutes, and seconds. */
    t = t * 24 + tmP->tm_hour;
    t = t * 60 + tmP->tm_min;
    t = t * 60 + tmP->tm_sec;

    return t;
    }


static time_t
date_parse( char* str )
    {
    time_t now;
    struct tm* now_tmP;
    struct tm tm;
    char* cp;
    char str_mon[500], str_gmtoff[500];
    int tm_sec, tm_min, tm_hour, tm_mday, tm_year;
    long tm_mon, gmtoff;
    time_t t;

    /* Initialize tm with relevant parts of current local time. */
    now = time( (time_t*) 0 );
    now_tmP = localtime( &now );

    bzero( (char*) &tm, sizeof(struct tm) );
    tm.tm_sec = now_tmP->tm_sec;
    tm.tm_min = now_tmP->tm_min;
    tm.tm_hour = now_tmP->tm_hour;
    tm.tm_mday = now_tmP->tm_mday;
    tm.tm_mon = now_tmP->tm_mon;
    tm.tm_year = now_tmP->tm_year;
    tm.tm_isdst = now_tmP->tm_isdst;

    /* Skip initial whitespace ourselves - sscanf is clumsy at this. */
    for ( cp = str; *cp == ' ' || *cp == '\t'; ++cp )
	;

    /* DD/mth/YY:HH:MM:SS zone */
    if ( sscanf( cp, "%d/%[a-zA-Z]/%d:%d:%d:%d %[^: 	\n]",
	      &tm_mday, str_mon, &tm_year, &tm_hour, &tm_min, &tm_sec,
	      str_gmtoff ) == 7 &&
	    scan_mon( str_mon, &tm_mon ) &&
	    scan_gmtoff( str_gmtoff, &gmtoff ) )
	{
	tm.tm_mday = tm_mday;
	tm.tm_mon = tm_mon;
	tm.tm_year = tm_year;
	tm.tm_hour = tm_hour;
	tm.tm_min = tm_min;
	tm.tm_sec = tm_sec;
	}
    else
	return (time_t) -1;

    if ( tm.tm_year > 1900 )
	tm.tm_year -= 1900;
    else if ( tm.tm_year < 70 )
	tm.tm_year += 100;

    t = tm_to_time( &tm );
    t -= gmtoff;

    return t;
    }
