#! /usr/bin/perl -w

# vim:syntax=perl

use strict;
use lib '/usr/share/perl5';
use Lire::DlfSchema;
use Lire::Email qw/sanitize splitemailadress splitrelay/;
use Lire::Syslog;
use Lire::Program qw/ :msg :dlf /;

use vars qw/ @accept_queue %deferred $error_on_msg $error_envid %error_headers
	     $start_time $dlf_maker $dlflines /;


sub print_dlf {
    my ( $msg ) = @_;

    foreach my $to ( split ",", $msg->{rcpt} ) {
	my $email;
	sanitize( "emailadress", $to, $email );
	( $msg->{to_user}, $msg->{to_domain} ) = splitemailadress( $email);

	my $rec = $dlf_maker->( $msg );
	print join( " ", @$rec ), "\n";
	$dlflines++;
    }
}

sub print_local_messages {
    my ( $time ) = @_;

    # All SMTP-Accept messages that weren't followed by a SMTP-Deliver
    # or Error-Handler event in the 10 seconds that followed its timestamp 
    # are considered delivered locally
    my $count = 0;
    foreach my $msg ( @accept_queue ) {
	last if $time - $msg->{time} < 10;
	$count++;

	$msg->{to_relay_host} = "localhost";
	$msg->{stat}	      = "sent";
	$msg->{delay}	      = 0;

	print_dlf( $msg );
    }

    # Remove from queue
    splice @accept_queue, 0, $count if $count;
}

sub print_deferred_messages {
    my ( $time ) = @_;

    foreach my $msg ( values %deferred ) {
	$msg->{delay} = $time - $msg->{time};
	print_dlf( $msg );
    }

    %deferred = ();
}

sub find_msg_in_accept_queue {
    my ( $msgid, $from ) = @_;

    for (my $i=@accept_queue; $i > 0; $i--) {
	my $msg = $accept_queue[$i-1];
	if ( $msg->{msgid} eq $msgid && $msg->{from} eq $from )
	{
	    return $i - 1;
	}
    }

    return -1;
}

#
# Since splitting the record is 81% faster than parsing it using
# m//, we use a error detection and correction schema rather than
# using the solution which is more expensive but give always the
# good result.
#
#Benchmark: timing 50000 iterations of parse_match, parse_split...
#parse_match:  3 wallclock secs ( 2.81 usr +  0.00 sys =  2.81 CPU) @ 17793.59/s (n=50000)
#parse_split:  2 wallclock secs ( 1.55 usr +  0.00 sys =  1.55 CPU) @ 32258.06/s (n=50000)
#               Rate parse_match parse_split
#parse_match 17794/s          --        -45%
#parse_split 32258/s         81%          --
sub correct_fields {
    my ( $fields, $module, $expected ) = @_;

    my $str = join ":", @$fields;
    my @fields = $str =~ m/((?:<[^>]+?>,?)+|[^:]+)/g;

    die "invalid $module record: should contains $expected fields but has ",
      scalar @fields, " after correction\n"
	if @fields != $expected;

    return \@fields;
}

sub smtp_accept_event {
    my ( $log, $fields ) = @_;

    # Fields order is
    # envelopeID msgID peerAddress peerHost mailFrom msgSize \
    # numRecipients recipientList
    # In the documentation, msgID and mailFrom are switched.
    $fields = correct_fields( $fields, "SMTP-Accept", 8 )
      if @$fields != 8;

    my $from;
    sanitize( "emailadress", $fields->[4], $from );

    my ( $user, $host ) = splitemailadress( $from );

    my $dlf = {
	       time	    => $log->{timestamp},
	       logrelay	    => $log->{hostname},
	       queueid	    => $fields->[0],
	       msgid	    => $fields->[1],
	       from	    => $fields->[4],
	       from_user    => $user,
	       from_domain  => $host,
	       size	    => $fields->[5],

	       # Non DLF mapped fields
	       nrcpt	     => $fields->[6],
	       rcpt	     => $fields->[7],
	      };
    sanitize( "relayhost", $fields->[3], $dlf->{from_relay_host} );
    sanitize( "relayip", $fields->[2], $dlf->{from_relay_ip} );

    push @accept_queue, $dlf;
}

sub smtp_deliver_event {
    my ( $log, $fields ) = @_;

    # Fields order is:
    # envelopeID msgID status destHost mailFrom msgSize \
    # numRecipients recipientList
    # In the documentation, msgID and mailFrom are switched.
    $fields = correct_fields( $fields, "SMTP-Deliver", 8 )
      if @$fields != 8;

    my $msg = $deferred{$fields->[0]};
    unless ( defined $msg ) {
	my $i = find_msg_in_accept_queue( $fields->[1], $fields->[4] );
	if ( $i >= 0 ) {
	    smtp_relayforward_event( $log, $fields, $i );
	    return;
	}

	my $email;
	sanitize( "emailadress", $fields->[4], $email );
	my ( $user, $host ) = splitemailadress( $email );

	# This deliver event doesn't have any corresponding SMTP-Accept
	# It's either a bounce or its SMTP-Accept event was in a previous
	# log file
	$msg = {
		logrelay    => $log->{hostname},
		queueid	    => $fields->[0],
		msgid	    => $fields->[1],
		from_user   => $user,
		from_domain => $host,
		size	    => $fields->[5],
		# Non DLF mapped fields
		nrcpt	    => $fields->[6],
		rcpt	    => $fields->[7],
	       };

	if ( $fields->[4] eq '<>' ) {
	    # Bounce
	    $msg->{time}	    = $log->{timestamp};
	    $msg->{from_relay_host} = "localhost";
	    $msg->{from_relay_ip}   = "127.0.0.1";
	} else {
	    # Log file rotation
	    $msg->{time}	    = $start_time;
	    # from_relay_host and from_relay_ip are UNKNOWN
	}
    }

    sanitize( "relayhost", $fields->[3], $msg->{to_relay_host} );

    if ( $fields->[2] eq 'Delivered' ) {
	$msg->{stat}  = 'sent';
	$msg->{delay} = $log->{timestamp} - $msg->{time};
	$msg->{nrcpt} = $fields->[6];
	$msg->{rcpt}  = $fields->[7];

	print_dlf( $msg );
    } else {
	$msg->{stat} = 'deferred';
	$deferred{$fields->[0]} = $msg;
    }
}

sub smtp_relayforward_event {
    my ( $log, $fields, $accept_idx ) = @_;

    my @forward = ();
    my @relay   = ();
    my $or_msg  = $accept_queue[$accept_idx];

    # Try to find each recipient in the original recipient list
    # All recipient that can't be found is a forward (alias)
    my @or_rcpt = split ",", $or_msg->{rcpt};
    foreach my $to ( split ",", $fields->[7] ) {
	if ( grep { $_ eq $to } @or_rcpt ) {
	    push @relay, $to;
	} else {
	    push @forward, $to;
	}
    }

    # Remove from accept queue if all relay recipients covers the original
    # message recipient list
    if ( @relay >= $or_msg->{nrcpt} ) {
	splice @accept_queue, $accept_idx, 1;
    } else {
	# Remove the relay recipients from the msg on the accept_queue
	# so that they aren't marked as delivered locally later on
	my @new_rcpt = ();
	foreach my $to ( split ",", $or_msg->{rcpt} ) {
	    # Destination will be equal in the case of a relay
	    next if grep { $_ eq $to } @relay;
	    push @new_rcpt, $to;
	}
	$or_msg->{rcpt} = join ",", @new_rcpt;
	$or_msg->{nrcpt} = @new_rcpt;
    }

    if ( @relay ) {
	# Create by copying
	my %dlf = %$or_msg;

	sanitize( "relayhost", $fields->[3], $dlf{to_relay_host} );
	$dlf{size} = $fields->[5];

	if ( $fields->[2] eq 'Delivered' ) {
	    $dlf{stat}  = 'sent';
	    $dlf{delay} = $log->{timestamp} - $or_msg->{time};
	    $dlf{nrcpt} = @relay;
	    $dlf{rcpt}  = join ",", @relay;

	    print_dlf( \%dlf );
	} else {
	    $dlf{stat} = 'deferred';
	    $deferred{$fields->[0]} = \%dlf;
	}
    }

    if ( @forward ) {
	my %dlf = (
		   time	    => $log->{timestamp},
		   logrelay => $log->{hostname},
		   queueid  => $fields->[0],
		   msgid    => $fields->[1],
		   from	    => $fields->[4],
		   from_user	=> $or_msg->{from_user},
		   from_domain	=> $or_msg->{from_domain},
		   size	    => $fields->[5],
		   from_relay_host  => "localhost",
		   from_relay_ip    => "127.0.0.1",

		   # Non DLF mapped fields
		   nrcpt     => scalar @forward,
		   rcpt	     => join( ",", @forward),
		  );

	sanitize( "relayhost", $fields->[3], $dlf{to_relay_host} );

	if ( $fields->[2] eq 'Delivered' ) {
	    $dlf{stat}  = 'sent';
	    $dlf{delay} = 0;

	    print_dlf( \%dlf );
	} else {
	    $dlf{stat} = 'deferred';
	    $deferred{$fields->[0]} = \%dlf;
	}
    }
}

sub smtp_deny_event {
    my ( $log, $msg ) = @_;

    my ( $host, $ip ) = $msg =~ /Denied TCP access to (\S+) \[([\d.]+)\]/
      or die "can't extract host and ip from Denied TCP event\n";

    my $dlf = $dlf_maker->( {
			     time		=> $log->{timestamp},
			     logrelay		=> $log->{hostname},
			     from_relay_host	=> $host,
			     from_relay_ip	=> $ip,
			     to_relay_host	=> "localhost",
			     stat		=> "denied tcp access",
			    } );
    print join( " ", @$dlf ), "\n";
    $dlflines++;
}

sub error_event {
    my ( $log, $fields ) = @_;

    # Fields error
    #	envelopeID mailFrom size msgID
    $fields = correct_fields( $fields, "ErrorHandler", 4 )
      if @$fields != 4;

    # Find the message to which this error is related on the
    # accept_queue
    my $i = find_msg_in_accept_queue( $fields->[3], $fields->[1] );
    die "error_event: can't find message $fields->[3] in \@accept_queue\n"
      if $i < 0;

    $error_envid    = $fields->[0];
    $error_on_msg   = $i;
}

sub error_dump_event {
    my ( $log, $header, $text ) = @_;

    # Error can't be traced to a previous error
    return unless defined $error_on_msg;

    # Handle Channel-To! form of headers
    if ($header =~ /^\s*([-\w]+!) (.*)/) {
	$header = $1;
	$text = $2 . $text if defined $2;
    }

    if ( $header =~ /^\s*$/ ) {
	process_error( $log->{timestamp} );
    } else {
	# Trim
	$header =~ s/^\s*//;
	$header =~ s/\s*$//;
	if (defined $text ) {
	    $text =~ s/^\s*//;
	    $text =~ s/\s*$//;
	}
	$header = lc $header;

	# Store multiple identical headers' values in an array
	if ( exists $error_headers{$header}) {
	    if ( ref $error_headers{$header} ) {
		push @{$error_headers{$header}}, $text;
	    } else {
		$error_headers{$header} = 
		  [ $error_headers{$header}, $text ];
	    }
	} else {
	    $error_headers{$header} = $text;
	}
    }
}

sub process_unknown_error {
    my ($time ) = @_;

    my $msg = $accept_queue[$error_on_msg];

    $msg->{delay}	    = $time - $msg->{time};
    $msg->{stat}	    = "unknown error";
    $msg->{to_relay_host}   = "localhost";

    # Mark all recipients as having an error, altough this isn't
    # necessarly the case
    print_dlf( $msg );

    splice @accept_queue, $error_on_msg, 1;

    $error_on_msg = undef;
    $error_envid  = undef;
}

sub extract_rcpt {
    return undef unless defined $_[0];
    return $_[0] =~ /^SMTP\s+(<.+>)$/;
}

sub process_error {
    my ($time ) = @_;

    my $msg = $accept_queue[$error_on_msg];

    if ( defined $error_headers{'message-id'} ) {
	# Sanity check
	die "inconsistent message ID. Expected $error_envid. Found ",
	  $error_headers{'message-id'}, "\n"
	    unless $error_headers{'message-id'} eq $error_envid;
    } else {
	process_unknown_error( $time );
	return;
    }

    my @tos;
    my @stat;
    if ( ref $error_headers{'channel-to'} ) {
	for ( my $i=0; $i < @{$error_headers{'channel-to'}}; $i++ ) {
	    my $to = extract_rcpt( $error_headers{'channel-to'}[$i] );
	    $to ||= extract_rcpt( $error_headers{'channel-to!'}[$i] );
	    $to ||= $error_headers{'account-to!'}[$i];
	    push @tos, $to;
	}
	@stat   = @{$error_headers{'diagnostic-code'}};
    } else {
	my $to = extract_rcpt( $error_headers{'channel-to'} );
	$to ||= extract_rcpt( $error_headers{'channel-to!'} );
	$to ||= $error_headers{'account-to!'};

	@tos    = ( $to );
	@stat   = ( $error_headers{'diagnostic-code'} );
    }

    if (! exists $msg->{from_relay_ip} && exists $error_headers{'host-from'})
    {
	my $relay;
	sanitize( "relay", $error_headers{'host-from'}, $relay );
	( $msg->{from_relay_host}, $msg->{from_relay_ip} ) =
	  splitrelay( $relay );
    }

    my $rcpt  = $msg->{rcpt};
    my %handled = ();
    $msg->{to_relay_host} = "localhost";
    for (my $i=0; $i < @tos; $i++ ) {
	my $to	    = $tos[$i];
	my $stat    = lc $stat[$i];
	$stat =~ s/^\d*\s*//; # Remove error code
	$msg->{stat} = $stat;
	$msg->{delay} = $time - $msg->{time};

	$msg->{nrcpt}--;
	$msg->{rcpt} = $to;
	$handled{$to} = 1;
	print_dlf( $msg );
    }

    if ( $msg->{nrcpt} <= 0 ){
	splice @accept_queue, $error_on_msg, 1
    } else {
	# Remove recipients which had an error
	my @new_rcpt = grep { ! $handled{$_} } split ",", $rcpt;
	$msg->{rcpt} = join ",", @new_rcpt;
	$msg->{nrcpt} = @new_rcpt;
    }

    $error_envid    = undef;
    $error_on_msg   = undef;
    %error_headers  = ();
}


my $schema = eval { Lire::DlfSchema::load_schema( "email" ) };
lr_err( "failed to load email schema: $@" ) if $@;
$dlf_maker =
  $schema->make_hashref2asciidlf_func( qw/time logrelay queueid msgid
		      from_user from_domain from_relay_host from_relay_ip
		      size delay
		      to_user to_domain to_relay_host
		      stat /);

my $lines	= 0;
$dlflines	= 0;
my $errorlines  = 0;
$error_on_msg   = undef;
$error_envid	= undef;
%error_headers  = ();
$start_time	= 0;
my $end_time	= 0;

my $parser = new Lire::Syslog;
init_dlf_converter( "email" );
my $failed_line = undef;
while ( <> ) {
    chomp;
    $lines++;

    # Look for ^M in the log file which fooled the logging system
    if ( /\r$/ ) {
	$failed_line .= $_;
	next;
    } elsif ( defined $failed_line) {
	$_ = $failed_line . $_;
	$failed_line = undef;
    }

    eval {
	my $log = $parser->parse( $_ );
	die "not a smtpd log line\n" unless $log->{process} eq 'smtpd';

	$start_time = $log->{timestamp} unless $start_time;
	$end_time   = $log->{timestamp} if $log->{timestamp} > $end_time;

	# Process error before flushing messages since it is possible
	# that $error_on_msg to become invalid if the order is reversed.
	process_error( $log->{timestamp} )
	  if ( defined $error_envid && $log->{content} !~ /General Error:/ );

	# Flush local messages every 50 lines, unless we are processing
	# error
	print_local_messages( $log->{timestamp} )
	  if ! ( $lines % 50 ) && $log->{content} !~ /General Error:/;


	my ( $level, $module, @fields ) = split /:/, $log->{content};

      SWITCH:
	for ( $module ) {
	    /SMTP-Accept/ && do {
		smtp_accept_event( $log, \@fields );
		last SWITCH;
	    };
	    /SMTP-Deliver/ && do {
		smtp_deliver_event( $log, \@fields );
		last SWITCH;
	    };
	    /Denied TCP/ && do {
		smtp_deny_event( $log, join( ":", $module, @fields ) );
		last SWITCH;
	    };
	    /SMTP-ProtocolPlugin/ && do {
		# Skip those messages
		last SWITCH;
	    };
	    $level =~ /General Error/ && do {
		error_dump_event( $log, $module, join( ":", @fields ) );
		last SWITCH;
	    };
	    /Error-Handler/ && do {
		error_event( $log, \@fields );
		last SWITCH;
	    };
	    $level =~ /General Information/ && do {
		# Skip informational message
		last SWITCH;
	    };
	    /Client End-Of-Stream|starting queue|ended queue|Processing queue/ && do {
		# Skip
		last SWITCH;
	    };
	    # Unknown message
	    die "unknown module: $module\n";
	};
    };
    if ($@) {
	lr_warn( $@ );
	lr_warn( "failed to parse line $. '$_'. Skipping." );
	$errorlines++;
    }
}

eval { process_error( $end_time ) if $error_envid };
if ($@) { lr_warn( $@ ) };

print_local_messages( $end_time + 10 );
print_deferred_messages( $end_time );

end_dlf_converter( $lines, $dlflines, $errorlines );

__END__

=pod

=head1 NAME

nms2dlf - convert Netscape Messaging Server log files to the email DLF

=head1 SYNOPSIS

B<nms2dlf>

=head1 DESCRIPTION

This program converts Netscape Messaging Server log file generated by
the SMTP service to the email DLF.

To process correctly the log file, you need to turn on logging of the
following modules:

    - SMTP-Accept
    - SMTP-Deliver
    - Error-Handler

=head1 LIMITATIONS

This DLF converter was developed for the Netscape Messaging Server
version 4.1. Other versions may or may not work. Contact the LogReport
developers if you have problems with that converter.

You can find information about the log format used by Netscape
Messaging Server at the following URL:

    http://developer.netscape.com/docs/manuals/messaging/41/ag/logging.htm

Not all messages are documented. And we found errors in the
documentation. The fields msgID and mailFrom are inversed in both
SMTP-Accept and SMTP-Deliver from what described the documentation.

Also, we found that there are no logs from the Mailbox-Delivery
module, so we interpret an SMTP-Accept event which isn't followed by a
SMTP-Devivery or Error-Handler event in the next 10 seconds as a
successful local delivery.

=head1 VERSION

$Id: nms2dlf.in,v 1.3 2002/02/03 17:18:18 flacoste Exp $

=head1 COPYRIGHT


Copyright (C) 2001 Stichting LogReport Foundation LogReport@LogReport.org

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software 
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=head1 AUTHOR

Francis J. Lacoste <flacoste@logreport.org>

=cut

# Local Variables:
# mode: cperl
# End:
