#!/usr/bin/perl -w

=head1 NAME

asql - Provide an SQL interface to Apache logfiles.

=cut


=head1 SYNOPSIS

  asql [options]

  General Options:

   --help       Show brief help intstructions.
   --manual     Show more complete help.
   --version    Show the version of the software.

  Scripting Options:

   --load       Load the named file, or glob.
   --execute    Execute a single query then exit.

  Options:

   --file       Rather than running as a shell read commands from a named file.
   --quiet      Don't show the banner at startup.

=cut


=head1 DESCRIPTION

  asql provides a simple console interface to allow a user to
 query the contents of an Apache logfile via an SQL interface.

  The shell features include:

=over 8

=item Persistent alias definitions.

=item Command line completion

=item Command history

=item Simple scriptability

=back

=cut


=head1 INTRODUCTION

  The asql shell will create a temporary SQLite database based upon
 any number of Apache logfiles.  This temporary database may then
 be interactively queried using common SQL syntax.

  To get started you should load your logfiles into the database:

=for example begin

   load /var/log/apache2/acces*

=for example end


  (The tool will automatically decompress files which have been
 compressed with gzip or bzip2.)

  Once you've loaded at least one file you may run queries, for
 example:

=for example begin

  SELECT source,SUM(size) AS Number FROM logs GROUP BY source ORDER BY Number DESC, ip

=for example end

  This example shows all the clients connecting to your webserver
 and the size of files/requests that they have downloaded in total.

  As you can see we've selected two columns "source" and "SUM(size)".
 To see which other columns are available you may execute the "show"
 command.

  Because parsing the Apache logfile(s) specified might be quite
 slow there is the option of dumping the temporary SQLite database
 to a known filename with the 'save' command.  The analog to the
 save command is the 'restore' command, which will read in an
 existing SQLite database and allow future queries to be executed
 against it.

=cut


=head1 FILES

 When the shell starts up it will read and intepret the initialisation
 file of ~/.asqlrc if it exists.  Any commands present in that file
 will be executed prior to the launch of the interactive session.

 All aliases will be read and written to the file ~/.asql.aliases.

 All interactive history will be written to the file ~/.asql.

=cut

=head1 AUTHOR

 Steve
 --
 http://www.steve.org.uk/

=cut


=head1 LICENSE

Copyright (c) 2007,2008 by Steve Kemp.  All rights reserved.

This module is free software;
you can redistribute it and/or modify it under
the same terms as Perl itself.
The LICENSE file contains the full text of the license.

=cut



use strict;
use warnings;
use Data::Dumper;
use DBI;
use English;
use Getopt::Long;
use File::Copy qw/copy/;
use File::Temp qw/tempfile/;
use Pod::Usage;


#
#  Release number of this script.
#
my $RELEASE = '0.8';


#
#  Holder for values read from the configuration file, and the
# default values.
#
my %CONFIG;

#
#  Name of default save database
#
$CONFIG{ 'savedb' } = $ENV{ 'HOME' } . "/.asql.db";

#
#  Name of alias filename.
#
$CONFIG{ 'aliases' } = $ENV{ 'HOME' } . "/.asql.aliases";

#
#  Name of the RC file.
#
$CONFIG{ 'startup' } = $ENV{ 'HOME' } . "/.asqlrc";

#
#  Name of the history file
#
$CONFIG{ 'history' } = $ENV{ 'HOME' } . "/.asql";



#
#  Aliases..
#
my %ALIASES;



#
#  Dispatch table which contains the mapping between the commands
# we make available and the routine which implements that behaviour.
#
#  This table also contains both the long and the short form of each
# commands help text.
#
#
#  START_COMMAND_TABLE
#
my %dispatch = (
    "alias" => {
        sub  => \&do_alias,
        args => "[name sql...]",
        help =>
"Define a persistent alias, or list those aliases currently defined.\n\nIf no arguments are given all current aliases, as loaded from the\nalias file are displayed.\n\nAliases persist by default and may be created by running something\nlike this:\n\n  alias agents SELECT distinct(agents) FROM logs\n\nOnce this has been entered the new command 'agents' will run the given\nquery.\n\nTo remove the alias run:\n\n  alias agents\n\n",
        info => "Define, or view, persistent aliases.",
    },
    "alter" => {
        sub => sub {my ($args) = (@_); do_sql( "alter", $args );},
        help => "Run an ALTER query against the database.\n",
        info => "Run an ALTER query against the database.",
    },
    "create" => {
        sub => sub {my ($args) = (@_); do_sql( "create", $args );},
        help => "Run a CREATE query against the database.\n",
        info => "Run a CREATE query against the database.",
    },
    "delete" => {
        sub => sub {my ($args) = (@_); do_sql( "delete", $args );},
        help => "Run a DELETE query against the database.\n",
        info => "Run a DELETE query against the database.",
    },
    "drop" => {
        sub => sub {my ($args) = (@_); do_sql( "drop", $args );},
        help => "Run a DROP query against the database.\n",
        info => "Run a DROP query against the database.",
    },
    "insert" => {
        sub => sub {my ($args) = (@_); do_sql( "insert", $args );},
        help => "Run an INSERT query against the database.\n",
        info => "Run an INSERT query against the database.",
    },
    "load" => {
        sub  => \&do_load,
        args => "[file|glob] [--label=name",
        help =>
"Load an Apache logfile into the currently open table.\n\nYou may either specify a single file, or a glob pattern.\n\nFiles with a .gz, or .bz2 suffix will be automtically decompressed and loaded.\n\nTo save time parsing the logfile(s) specified you may save the database once\nit has been populated via 'save' and 'restore'.",
        info => "Load an Apache logfile.",
    },
    "exit" => {
                sub  => \&do_exit,
                help => "Exit the shell.",
                info => "Exit the shell.",
    },
    "help" => {
        sub  => \&do_help,
        args => "[command]",
        help =>
"If a command is given then show help about that command.\n\nIf no command is specified give an overview of all available commands.\n",
        info => "Show general, or command-specific, help information.",
    },
    "quit" =>    # Dupe: exit
      {
        sub  => \&do_exit,
        help => "Exit this shell.",
        info => "Exit this shell.",
      },
    "restore" => {
        sub  => \&do_restore,
        args => '[filename]',
        help =>
"Load a SQLite database which was previously saved via 'save'.\n\nThis immediately makes any previously saved records available, without the need to reload the logile(s).\n",
        info => "Load a previously save'd temporary database.",
    },
    "save" => {
        sub  => \&do_save,
        args => '[filename]',
        help =>
"Save the temporary SQLite database which was create at startup time.\n\nThis means you won't need to wait for the relatively slow logfile parsing\nat startup.  Use the 'restore' command to reload this database in the future.",
        info => "Save the temporary database.",
    },
    "select" => {
        sub => sub {my ($args) = (@_); do_sql( "select", $args );},
        help =>
"Run a SELECT query against the database.\nExample queries\n\n  SELECT distinct(source) FROM logs\n\nSELECT referer,COUNT(referer) AS number from logs GROUP BY referer ORDER BY number DESC,referer\n\netc.",
        info => "Run a SELECT query against the database.",
    },
    "show" => {
                sub  => \&do_show,
                help => "Show the structure of the database.",
                info => "Show the structure of the database.",
    },
    "update" => {
        sub => sub {my ($args) = (@_); do_sql( "update", $args );},
        help => "Run an UPDATE query against the database.\n",
        info => "Run an UPDATE query against the database.",
    },
);

#
#  END_COMMAND_TABLE
#



my %months;
$months{ 'jan' } = 1;
$months{ 'feb' } = 2;
$months{ 'mar' } = 3;
$months{ 'apr' } = 4;
$months{ 'may' } = 5;
$months{ 'jun' } = 6;
$months{ 'jul' } = 7;
$months{ 'aug' } = 8;
$months{ 'sep' } = 9;
$months{ 'oct' } = 10;
$months{ 'nov' } = 11;
$months{ 'dec' } = 11;

####
#
#  Start of code
#
####


#
#  Parse any command line arguments which might be present.
#
#  Do this first so that --help, etc, works.
#
parseCommandLineArguments();


#
# Sanity check our perl module availability.
#
sanityCheck();


#
#  Load any aliases
#
loadAliases();


#
# Create a temporary database file
#
my ( undef, $DBIFILE ) = File::Temp::tempfile();


#
# Show our banner.
#
showBanner();


#
#  Setup a signal handler to make sure we cleanup appropriately,
# specifically so that we save our aliases.
#
$SIG{ INT } = "do_exit";


#
#  Create the readline interface, and save the attributes away
# so that we may use them in our command completion code.
#
my $term    = createTerminal();
my $attribs = $term->Attribs;



#
#  Load any command history which might be present.
#
loadHistory($term);


#
#  If the user has a startup-file then load that prior to running
# any interactive/scripted session.
#
if ( -e $CONFIG{ 'startup' } )
{
    processFile( $CONFIG{ 'startup' } );
}


#
#  Load data?
#
if ( $CONFIG{ 'load' } )
{
    do_load( $CONFIG{ 'load' } );
}

#
#  Execute directly?
#
if ( $CONFIG{ 'execute' } )
{
    processLine( $CONFIG{ 'execute' } );
    do_exit();
}

#
#  Run the contents of a named file?
#
if ( $CONFIG{ 'file' } )
{

    # Run the scripted file, and save our history.
    processFile( $CONFIG{ 'file' } );
    do_exit();
}


#
#  OK interactive usage.
#  NOTE: this never returns.
#
runMainLoop($term);



#
#  Never reached
#
exit;



=begin doc

  Parse any command line options which might be present.

=end doc

=cut

sub parseCommandLineArguments
{
    my $SHOW_HELP    = 0;
    my $SHOW_MANUAL  = 0;
    my $SHOW_VERSION = 0;

    #
    #  Parse options.
    #
    exit
      if (
           !GetOptions(
                        "help",      \$SHOW_HELP,
                        "manual",    \$SHOW_MANUAL,
                        "version",   \$SHOW_VERSION,
                        "file=s",    \$CONFIG{ 'file' },
                        "load=s",    \$CONFIG{ 'load' },
                        "execute=s", \$CONFIG{ 'execute' },
                        "quiet",     \$CONFIG{ 'quiet' },
           )
      );

    pod2usage(1) if $SHOW_HELP;
    pod2usage( -verbose => 2 ) if $SHOW_MANUAL;

    if ($SHOW_VERSION)
    {
        print "asql v$RELEASE\n";
        exit;
    }
}



=begin doc

  Sanity check that we can load the Perl modules we require.

=end doc

=cut

sub sanityCheck
{

    #
    #  Test we have the perl modules we need.
    #
    BEGIN
    {
        eval {
            require Term::ReadLine;
            require Term::ReadLine::Gnu;
        };
    }
    if ($@)
    {
        print "Package 'Term::ReadLine::Gnu' not installed.\n";
        print "Aborting\n";
        sleep 5;
        exit;
    }

}



=begin doc

  Load any persistent aliases which might be present.

=end doc

=cut

sub loadAliases
{

    # no file == no aliases
    return if ( !-e $CONFIG{ 'aliases' } );

    # read the file into this buffer.
    my $aliases = '';

    open( INPUT, "<", $CONFIG{ 'aliases' } )
      or die "Failed to read aliases file $CONFIG{'aliases'} : $!";

    foreach my $line (<INPUT>)
    {
        $aliases .= $line if ( defined($line) );
    }
    close(INPUT);

    #
    #  Evaluate and store any aliases
    #
    my $VAR1 = undef;
    my $f    = eval($aliases);
    %ALIASES = %$f if ($f);

}



=begin doc

  Dump out any saved aliases.

=end doc

=cut

sub saveAliases
{
    open( INPUT, ">", $CONFIG{ 'aliases' } )
      or die "Failed to write aliases file $CONFIG{'aliases'} : $!";
    print INPUT Dumper( \%ALIASES );
    close(INPUT);
}



=begin doc

  This routine is the core of the shell, it should parse evaluate
 and display the result(s) of the specified query.

=end doc

=cut

sub runQuery
{
    my ($sql) = (@_);

    #
    #  Make sure we have a database handle
    #
    my $dbh = $CONFIG{ 'dbi' };
    if ( !$dbh || !$dbh->ping() )
    {
        print "Database handle closed - trying to reopen!\n";

        $dbh = DBI->connect_cached( "dbi:SQLite:dbname=$DBIFILE", "", "" );

        if ( !$dbh )
        {
            print "Failed\n";
            return;
        }
        else
        {
            print "Re-connected\n";
            $CONFIG{ 'dbi' } = $dbh;
        }
    }

    #
    #  Prepare the statement
    #
    my $sth = $dbh->prepare($sql);
    if ($sth)
    {

        #
        #  Execute it and show the results.
        #
        $sth->execute();
        print "@$_\n" while $_ = $sth->fetchrow_arrayref();
        $sth->finish();
        print "\n";
    }
    else
    {

        #
        #  Failed to compile - the user will already see
        # an error, but we'll reiterate it.
        #
        print "Failed to compile SQL\n";
    }
}



=begin doc

 Show the startup banner for the shell.

=end doc

=cut

sub showBanner
{
    return if (    ( $CONFIG{ 'quiet' } )
                || ( $CONFIG{ 'execute' } ) );

    print "asql v$RELEASE - type 'help' for help.\n";
}



=begin doc

  Create the terminal interface, complete with command completion.

  Rather than hard-wiring the commands which are available we take them
 from our global dispatch table.

=end doc

=cut

sub createTerminal
{
    my $term = new Term::ReadLine 'asql';

    #
    # Process our dispatch table to determine which commands
    # are available.
    #
    my @cmds = ();

    #
    #  Add all commands.
    #
    push @cmds, ( keys %dispatch );
    push @cmds, ( keys %ALIASES );

    #
    #  Add completion
    #
    my $attribs = $term->Attribs;
    $attribs->{ completion_entry_function } =
      $attribs->{ list_completion_function };
    $attribs->{ completion_word }               = \@cmds;
    $attribs->{ attempted_completion_function } = \&completion;

    #
    #  Return it
    #
    return ($term);
}



=begin doc

  Here we perform filename completion for the second arg - if the first
 argument is "load" or "restore".

  If not we complete based upon our command names, and any loaded aliases.

=end doc

=cut

sub completion
{
    my ( $text, $line, $start, $end ) = @_;

    if (    ( substr( $line, 0, $start ) =~ /^load([ \t]+)$/i )
         || ( substr( $line, 0, $start ) =~ /^restore([ \t]+)$/i ) )
    {
        return
          $term->completion_matches( $text,
                                 $attribs->{ 'filename_completion_function' } );
    }
    else
    {
        return ( grep( /\Q^$text/, keys %dispatch, keys %ALIASES ) );
    }
}



=begin doc

  If the user has a history present in ~/.asql load it up.

=end doc

=cut

sub loadHistory
{
    my ($term) = (@_);

    #
    #  Load the file, if it exists, from the home directory.
    #
    my $file = $CONFIG{ 'history' };
    if ( -e $file )
    {

        #
        #  Load the history if we can.
        #
        if ( UNIVERSAL::can( $term, 'ReadHistory' ) )
        {
            $term->ReadHistory($file);
        }
    }
}



=begin doc

  Read and intepret the contents of the given file, if it exists.

=end doc

=cut

sub processFile
{
    my ($file) = (@_);

    if ( !-e $file )
    {
        print "File not found: $file\n";
        return;
    }

    open( INPUTFILE, "<", $file )
      or die "Failed to open $file - $!";

    while ( my $line = <INPUTFILE> )
    {
        chomp($line);
        processLine($line) if ( defined($line) && length($line) );
    }
    close(INPUTFILE);
}



=begin doc

  Run the input reading + dispatching loop.   We use the dispatch
 table already defined to handle input.

  Parsing of command line input is extremely minimal - we break the
 input line into "word" which is the first whitespace deliminated
 token on the line and "args" which is the remainder of the line.

  This is sufficient for our purposes.

=end doc

=cut

sub runMainLoop
{
    my ($term) = (@_);

    #
    #  Prompt
    #
    my $prompt = "asql> ";

    #
    #  Command loop.
    #
    while ( defined( my $line = $term->readline($prompt) ) )
    {

        # Ignore empty lines.
        next if ( !length($line) );

        # Strip leading and trailing whitespace.
        $line =~ s/^\s+//;
        $line =~ s/\s+$//;

        # The line is empty now?
        next if ( !length($line) );

        # actually process the input
        processLine($line);
    }

    #
    #  Save history on exit.
    #
    do_exit();
}



=begin doc

  Process a single line which has been read from the console, or from
 a script file.

=end doc

=cut

sub processLine
{
    my ($line) = (@_);

    # If we have arguments then split them up.
    my ( $word, @args ) = split( /[ \t]/, $line );

    # make sure we did receive a command
    return if ( !$word );

    # Lookup command in our dispatch table.
    my $cmd = $dispatch{ lc($word) };

    if ($cmd)
    {

        # Call the function with any arguments we might have.
        $cmd->{ 'sub' }->( join( " ", @args ) );

        # Add a successful line to our history, if we can.
        if ( UNIVERSAL::can( $term, 'add_history' ) )
        {
            $term->add_history($line);
        }
    }
    else
    {

        #
        #  Catch any alias definitions we might have present.
        #
        if ( $ALIASES{ $word } )
        {

            #
            #  Call ourself to process the line.
            #
            processLine( $ALIASES{ $word } );

            #
            # Add a successful line to our history, if we can.
            #
            if ( UNIVERSAL::can( $term, 'add_history' ) )
            {
                $term->add_history($line);
            }

        }
        else
        {
            if ( defined($word) && length($word) )
            {
                print "Unknown command: '$word' - type 'help' for help.\n";
            }
        }
    }
}



=begin doc

  Show, or define a new alias

=end doc

=cut

sub do_alias
{
    my ($line) = (@_);

    if ( !defined($line) || !length($line) )
    {
        my $count = 0;
        foreach my $name ( keys %ALIASES )
        {
            print "ALIAS $name $ALIASES{$name}\n";
            $count += 1;
        }
        print "No aliases are currently defined.\n" unless ($count);
        return;
    }

    #
    #  Define a new one
    #
    my ( $name, @rest ) = split( / /, $line );
    my $value = join( " ", @rest );

    print "ALIAS $name $value\n";

    #
    #  Delete the current alias of that name, if it exists.
    #
    delete $ALIASES{ $name };
    $ALIASES{ $name } = $value if ( defined($value) && ( length($value) ) );
}



=begin doc

 Exit this shell, first saving any command history.

=end doc

=cut

sub do_exit
{

    #
    #  The history file
    #
    my $file = $CONFIG{ 'history' };

    #
    #  Save the history if the term module can.
    #
    if ( UNIVERSAL::can( $term, 'WriteHistory' ) )
    {
        $term->WriteHistory($file);
    }

    #
    #  Disconnect from the database
    #
    if (    $CONFIG{ 'dbi' }
         && $CONFIG{ 'dbi' }->ping() )
    {
        $CONFIG{ 'dbi' }->disconnect();
    }

    #
    #  Remove it
    #
    unlink($DBIFILE) if ( defined($DBIFILE) && ( -e $DBIFILE ) );

    #
    #  Save any aliases
    #
    saveAliases();


    exit;
}



=begin doc

  Show the user some help.

  When called with no arguments it will display all supported commands.

  If called with arguments then they we will show only help for the
 specified command(s).

=end doc

=cut

sub do_help
{
    my ($term) = (@_);

    #
    #  Help on a single command
    #
    if ( ( defined($term) ) && ( length($term) ) )
    {
        foreach my $cmd ( split( /[ \t]/, $term ) )
        {

            # Lookup command in our dispatch table.
            my $c = $dispatch{ lc($cmd) };
            if ($c)
            {

                # arguments for the command?
                my $args = $c->{ 'args' } || '';

                print "\nCommand: $cmd $args\n\n";
                print $c->{ 'help' } . "\n";
            }
            else
            {
                print "Unknown command '$cmd' - no help text available\n";
            }
        }
        return;
    }


    print "asql v$RELEASE\n";
    print "The following commands are available within this shell:\n\n";

    #
    #  Build up the short-help, indented it nicely.
    #
    foreach my $entry ( sort keys %dispatch )
    {
        my $hash = $dispatch{ $entry };

        print sprintf( "%10s - %s\n", $entry, $hash->{ 'info' } );
    }

    #
    #  Footer.
    #
    print "\nFor command-specific help run \"help command\".\n\n";

}



=begin doc

  Load the specified files into our tables.

=end doc

=cut


sub do_load
{
    my (@files) = (@_);

    #
    #  Create the tables if we've not already done so.
    #
    if ( !$CONFIG{ 'loaded' } )
    {

        #
        # create a database handle
        #
        $CONFIG{ 'dbi' } =
          DBI->connect_cached( "dbi:SQLite:dbname=$DBIFILE", "", "" );

        #
        #  Delete the table if it already exists.
        #
        eval {
            local $CONFIG{ 'dbi' }->{ PrintError } = 0;
            $CONFIG{ 'dbi' }->do("DROP TABLE logs");
        };
        $CONFIG{ 'dbi' }->do(
"CREATE TABLE logs (id INTEGER PRIMARY KEY, source, request, status, size, method, referer, agent, version, date, user, label);"
        );

        $CONFIG{ 'loaded' } = 1;
    }

    #
    #  Did we get a label?
    #
    my $label = '';
    my @FILES;
    foreach my $arg (@files)
    {
        if ( $arg =~ /(.*)([ \t]+)--label=(.*)/ )
        {
            $arg   = $1;
            $label = $3;
        }
        push( @FILES, $arg );
    }


    #
    #  Now load each file.
    #
    foreach my $file (@FILES)
    {

        # skip arguments
        next if ( $file =~ /^--/ );

        # file exists - load it.
        if ( -e $file )
        {
            loadFile( $file, $label );
        }
        else
        {

            # could be a glob?
            foreach my $f ( glob($file) )
            {
                if ( -e $f )
                {
                    loadFile( $f, $label );
                }
                else
                {

                    # not found
                    print "Ignoring file which doesn't exist: $file\n";
                }
            }
        }
    }
}



=begin doc

  reconnect to a named databae.

=end doc

=cut

sub do_restore
{
    my ($file) = (@_);

    $file = $CONFIG{ 'savedb' }
      if ( ( !defined($file) ) || ( !length($file) ) );

    if ( !-e $file )
    {
        print "The restore file dosen't exist: $file\n";
        return;
    }

    #
    #  Disconnect
    #
    if (    $CONFIG{ 'dbi' }
         && $CONFIG{ 'dbi' }->ping() )
    {
        $CONFIG{ 'dbi' }->disconnect();
    }

    #
    #  Re-connect
    #
    $CONFIG{ 'dbi' } = DBI->connect_cached( "dbi:SQLite:dbname=$file", "", "" );

    #
    #  fake a load - so that we may seleect.
    #
    $CONFIG{ 'loaded' } += 1;
}



=begin doc

  Save the current temporary SQLite database to a file

=end doc

=cut

sub do_save
{
    my ($file) = (@_);

    $file = $CONFIG{ 'savedb' }
      if ( ( !defined($file) ) || ( !length($file) ) );

    print "Saving to : $file\n";

    #
    #  Disconnect
    #
    if (    $CONFIG{ 'dbi' }
         && $CONFIG{ 'dbi' }->ping() )
    {
        $CONFIG{ 'dbi' }->disconnect();
    }

    #
    #  Copy the file
    #
    File::Copy::copy( $DBIFILE, $file );

    #
    #  Reconnect
    #
    $CONFIG{ 'dbi' } =
      DBI->connect_cached( "dbi:SQLite:dbname=$DBIFILE", "", "" );
}



=begin doc

  This routine is called to invoke an SQL operation, the first argument
 is an SQL keyword, the additional arguments are the rest of the query.

  See the dispatch table for details of the different primary SQL commands
 we accept.

=end doc

=cut


sub do_sql
{
    my ( $command, $query ) = (@_);


    #
    #  Make sure we received a query
    #
    if ( !length($query) )
    {
        print "Missing arguments\n";
        return;
    }

    #
    #  Make sure we've loaded at least one file.
    #
    if ( !$CONFIG{ 'loaded' } )
    {
        print "No files loaded yet!\n";
        return;
    }


    runQuery( $command . " " . $query );
}



=begin doc

  Show the structure of our table.

=end doc

=cut

sub do_show
{
    print <<EOF;

   The log table has the following columns:

  id      - ID of the request
  source  - IP, or hostname, which made the request.
  request - The HTTP request
  status  - The HTTP status-code returned
  size    - The size of the response served, in bytes.
  method  - The HTTP method invoked (GET, PUT, POST etc).
  referer - The HTTP referer (sic).
  agent   - The User-Agent which made the request.
  version - The HTTP version used by this client.
  date    - The date and time at which the request was made.
  label   - Any label applied when the logfile was read.
  user    - The remote (authenticated) user, if any.

EOF
}



=begin doc

  Load the specified filename, or glob pattern, into our temporary
 SQLite database.

=end doc

=cut

sub loadFile
{
    my ( $filename, $label ) = (@_);

    # ensure we received a logfile.
    die "No filename...." if ( !defined($filename) );

    # did we get a label?
    $label = $filename unless ( defined($label) );

    # status.
    print "Loading: $filename\n" unless ( $CONFIG{ 'load' } );

    #
    #  Get the database handle and prepare the insertion query.
    #
    my $dbh = $CONFIG{ 'dbi' };
    my $sth = $dbh->prepare(
"INSERT INTO logs( source, request, status, size, method, referer, agent, version, date, user, label ) VALUES( ?,?,?,?,?,?,?,?,?,?,? )"
    );

    #
    #  Open the named logfile, using a pipe for .bz2/.gz files.
    #
    if ( $filename =~ /gz$/i )
    {

        #
        # Will fail if there is no zcat installed.
        #
        open( INPUT, "zcat $filename|" )
          or die "Cannot read piped file - $filename : $! ";
    }
    elsif ( $filename =~ /bz2$/i )
    {

        #
        # Will fail if there is no bzcat installed.
        #
        open( INPUT, "bzcat $filename|" )
          or die "Cannot read piped file - $filename : $! ";
    }
    else
    {

        # Open file normally.
        open( INPUT, "<", $filename )
          or die "Cannot open file - $filename : $!";
    }


    #
    #  Cache of parsed dates + times.
    #
    my %cache;


    #
    #  Parse each line in a minimal fashion.
    #
    foreach my $line (<INPUT>)
    {

        #
        #  Parse.
        #
        my $results = parseApacheLogLine($line);

        #
        #  Get the results, if they are present.
        #
        my $host     = $results->{ 'host' }  || "";
        my $size     = $results->{ 'bytes' } || "";
        my $version  = $results->{ 'proto' } || "";
        my $time     = $results->{ 'time' }  || "";
        my $date     = $results->{ 'date' }  || "";
        my $path     = $results->{ 'file' }  || "";
        my $protocol = $results->{ 'rtype' } || "";
        my $agent    = $results->{ 'agent' } || "";
        my $user     = $results->{ 'user' }  || "";
        my $refer    = $results->{ 'refer' } || "";
        my $code     = $results->{ 'code' }  || "";

        if ( $date =~ /^([^\/]+)\/([^\/]+)\/(.*)/ )
        {

            #
            #  If cached then use that value..
            #
            if ( $cache{ $date } )
            {
                $date = $cache{ $date };
            }
            else
            {

                #
                #  Otherwise we'll convert manually.
                #
                my $day  = $1;
                my $mon  = $2;
                my $year = $3;

                $mon = $months{ lc($mon) };

                #
                #  Update cache - and use it.
                #
                $cache{ $date } = $year . "-" . $mon . "-" . $day;
                $date = $cache{ $date };
            }

            $date = $date . 'T' . $time;
        }


        #
        #  HTTP version is of the form HTTP/N.N
        #
        $version = $1 if ( $version =~ /HTTP\/([0-9\.]+)/ );

        #
        #  Insert row.
        #
        $sth->execute(
                       $host,     $path,  $code,  $size,
                       $protocol, $refer, $agent, $version,
                       $date,     $user,  $label
        );
    }

    #
    #  All done
    #
    close(INPUT);
    $sth->finish();
}



=begin doc

  Parse a single line of Apache logfile into a hash-reference
 we can work with.

  This code was taken directly from the CPAN module
 Parse::AccessLogEntry by Marc Slagle.

=end doc

=cut

sub parseApacheLogLine
{
    my $Line = shift;
    my $Ref;
    my $Rest;
    my $R2;
    ( $Ref->{ host }, $Ref->{ user }, $Ref->{ date }, $Rest ) =
      $Line =~ m,^([^\s]+)\s+-\s+([^ ]+)\s+\[(.*?)\]\s+(.*),;
    my @Dsplit = split( /\s+/, $Ref->{ date } );
    $Ref->{ diffgmt } = $Dsplit[1];
    my @Ds2 = split( /\:/, $Dsplit[0], 2 );
    $Ref->{ date } = $Ds2[0];
    $Ref->{ time } = $Ds2[1];

    if ($Rest)
    {
        (
           $Ref->{ rtype },
           $Ref->{ file },
           $Ref->{ proto },
           $Ref->{ code },
           $Ref->{ bytes },
           $R2
        ) = split( /\s/, $Rest, 6 );
        $Ref->{ rtype } =~ tr/\"//d;
        $Ref->{ proto } =~ tr/\"//d;
        if ($R2)
        {
            my @Split = split( /\"/, $R2 );
            $Ref->{ refer } = $Split[1];
            $Ref->{ agent } = $Split[3];
        }
    }
    return $Ref;
}



#
#  Print a newline or two on termination, just to make things prettier.
#
END
{
    print "\n\n" unless( $CONFIG{'execute'} );
}



=head1 AUTHOR

 Steve
 --
 http://www.steve.org.uk/

=cut

=head1 LICENSE

Copyright (c) 2007-2008 by Steve Kemp.  All rights reserved.

This module is free software;
you can redistribute it and/or modify it under
the same terms as Perl itself.
The LICENSE file contains the full text of the license.

=cut
