#! /usr/bin/perl -w

# vim:syntax=perl

use strict;
use lib '/usr/share/perl5';
use Lire::Apache  qw( combined2dlf common2dlf referer2dlf );
use Lire::Program qw( :msg :dlf );

my $log = shift or lr_err "please give type as arg";

lr_err( "invalid type '$log'; should be 'combined', 'common' or 'referer'" )
  unless ( $log eq "combined" || $log eq "common" || $log eq "referer" );

init_dlf_converter( "www" );

my $lines	= 0;
my $dlflines    = 0;
my $errorlines  = 0;
while (<>) {
    chomp;
    $lines++;

    eval {
	my $dlf;
	if ( $log eq 'combined' ) {
	    $dlf = combined2dlf( $_ );
	} elsif ( $log eq 'referer' ) {
	    $dlf = referer2dlf( $_ );
	} else {
	    $dlf = common2dlf( $_ );
	}
	print join( " ", @$dlf), "\n";
	$dlflines++;
    };
    if ($@) {
	lr_warn( $@ );
	lr_notice( qq{cannot convert line $. "$_" to www dlf, skipping} );
	$errorlines++;
    }
}

end_dlf_converter( $lines, $dlflines, $errorlines );

__END__

=pod

=head1 NAME

apache-access_log2dlf - convert apache access logs to dlf format

=head1 SYNOPSIS

B<apache-access_log2dlf> I<combined>B<|>I<common>B<|>I<referer>

=head1 DESCRIPTION

This script reads apache logfile, in several formats. Supported formats
as defined in e.g. apache's httpd.conf are: "combined", "common" and "referer".

"common" is used to parse the Common Logfile Format
that several WWW servers use.  It is documented on
http://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format.

"combined" is defined in httpd.conf as:

 LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" 
   \"%{User-Agent}i\"" combined

This generates lines in the logfile like e.g.

 gelfand.mdcc.cx - - [31/Jul/2000:22:01:13 +0200] "GET 
   /~vanbaal/poetry.html HTTP/1.0" 200 2554 
   "http://mdcc.cx/~vanbaal/" "w3m/0.1.9"

The logs are converted to the DLF format:

 gelfand.mdcc.cx mdcc.cx - 200 3138 GET /howto.html html 
   /howto.html HTTP/1.0HTTP/1.0 31/Jul/200022:08:12

On apache servers, running various virtual servers, logs like

 1.2.6.241 3.4.3.21 - - [13/Oct/2000:10:57:13 +0200] 
   "GET / HTTP/1.0" 200 250

are frequently seen. This are 'common' logs, with an added first field which 
indicates the virtual server being visited. A quick and dirty trick to
handle these logs is doing:

 (while read virt rest; do echo $rest >> common.log-$virt; \
   done) < common.log

Since boa ( http://www.boa.org/ ) access logs are in exactly the same format 
as apache combined logs, this script can be used to process these too.


=head1 AUTHORS

Joost van Baal <joostvb@logreport.org>, Francis J. Lacoste
<flacoste@logreport.org> and Egon Willighagen <egonw@logreport.org>, based on
an idea by Edwin Groothuis

=head1 VERSION

$Id: apache-access_log2dlf.in,v 1.22 2001/11/10 22:23:30 flacoste Exp $

=head1 COPYRIGHT

Copyright (C) 2000-2001 Stichting LogReport Foundation LogReport@LogReport.org
 
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software 
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=cut

# Local Variables:
# mode: cperl
# End:
