#!/usr/bin/perl -w
# Copyright (c) Dave Horsfall.
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. Neither the name of the University nor the names of its contributors
#    may be used to endorse or promote products derived from this software
#    without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
# 
# 
# @(#)$Id: rdnchk.e,v 1.12 2003-05-09 15:40:51+10 daveh Exp $
# $Log: rdnchk.e,v $
# Revision 1.12  2003-05-09 15:40:51+10  daveh
# Enforce single-value attributes for CI stuff.
#
# Revision 1.11  2003-05-08 16:46:47+10  daveh
# Add missing attributes when auto-fixing.
#
# Revision 1.10  2003-05-02 11:46:55+10  daveh
# Sort DNs before writing out, and minor mods.
#
# Revision 1.9  2003-04-24 10:39:19+10  daveh
# Always encode the userPassword attribute on output.
#
# Revision 1.8  2003-04-16 15:21:49+10  daveh
# Don't use non-core LDIF module; treat values as case-insensitive; binary.
#
# Revision 1.7  2003-04-02 11:16:20+10  daveh
# Delete ciComment (it's an attribute, not an objclass) and add ciLdapConfig
#
# Revision 1.6  2003-04-01 17:52:04+10  daveh
# Fix errors on request, and minor mods.
#
# Revision 1.5  2003-03-28 17:15:34+11  daveh
# More minor mods.
#
# Revision 1.4  2003-03-28 09:19:21+11  daveh
# Use reference to ARGV typeglob for input.
#
# Revision 1.3  2003-03-27 16:08:57+11  daveh
# Use Net::LDAP::LDIF, and minor speedups.
#
# Revision 1.2  2003-03-11 11:17:03+11  daveh
# Look for orphan DNs.
#
# Revision 1.1  2003-02-27 14:28:09+11  daveh
# Initial revision
#
#
# RDNCHK
#
# Given a slapcat input file, check for mismatched DN/RDN pairs etc.
# Optionally make fixes (use with care).
#
# The data structure is a hash of references to hashes of anonymous lists:
#
#   $entries{$dn} =	# $dn has been normalised
#   {
#     origDN => "original DN",
#     attr1 => [ "value1-a", "value1-b" ],
#     attr2 => [ "value2" ]
#   }
#
# which is accessed as (e.g):
#
#   @{entries{$dn}{"attr1"}}
#
# to return an array of the value(s) of $dn's attr1.
#
# Note that this structure is optimised for access to the DNs, *not*
# for searches.
#
# The DN is low-cased and leading/trailing/multiple spaces stripped
# (and the original stored for posterity).
#
# I assume that caseIgnoreMatch applies across the board, as otherwise
# it's too damned difficult.  This only fails, in practice, for encoded
# fields such as passwords, but I'm not looking at those (passwords are
# rarely, if ever, a candidate for being an RDN).  Remember: the specific
# purpose of this program is to perform a quick but reasonably thorough
# check for DN/RDN consistency, and it sorta grew from there.
#
# We can't use Perl Net::LDAP::LDIF, because it's not a core module
# (too hard to maintain our remote branches when upgrading).
#
# TODO:
#	Check custom stuff:
#
#	    ciDefPrinter is single-value per ciPrinterClass.
#	    Fundamentally difficult, because these are keys
#	    into printcap, not LDAP.
#

use Data::Dumper;
use Getopt::Long;
use MIME::Base64;

my $origDN = '.origDN';	# Attribute stores original DN

&parse_options;
$opt_write = 1 if $opt_fix;

#
# Process each entry.
# A list (returned in @_) holds each line, with the DN first.
#
while (@_ = &GetEntry)	# Loop per entry (exit on EOF)
{
    my $dn = shift @_;
    # Check if base64 encoded
    next if ! $dn =~ /^dn::? /i;
    if($dn =~ /^dn:: /i) {
      $dn =~ s/dn:: (.*)/$1/;
      $dn = decode_base64($dn);
      $dn =~ s/\s$//;
      $encoded = 1;
    } else {
      $dn =~ s/dn: (.*)/$1/;
      $encoded = 0;
    }
    my $cdn = &canon($dn);
    $entries{$cdn}{$origDN} = $dn;
    $entries{$cdn}{"encoded"} = $encoded;

    #
    # Infer the suffix.
    # Assume it's the shortest DN.
    #
    if (!$opt_suffix)
    {
	$suffix = $cdn
	    if (!defined $suffix) || (length $cdn < length $suffix);
    }

    #
    # Extract the first component (the RDN)
    # for later tests.
    #
    ($rdn, undef) = split(/,/, $cdn);
    ($rdnattr, $rdnval) = split(/=/, $rdn);

    #
    # Get the attributes/values.
    # Attributes are low-cased.
    #
    for (@_)
    {
	($attr, $val) = split(/\s/, $_, 2);	# In case of "::"
	$attr =~ s/://;
	if ($attr =~ /:/)			# Must be binary (base-64)
	{
	    $attr =~ s/://;
	    $val = &demime($val);
	}
	push @{$entries{$cdn}{lc $attr}}, $val;
    }

    #
    # Does the RDN exist?
    #
    if (!defined @{$entries{$cdn}{$rdnattr}})
    {
	print STDERR "dn: $dn\nMissing RDN";
	if ($opt_fix)
	{
	    push @{$entries{$cdn}{$rdnattr}}, $rdnval;
	    print STDERR "; inserted \"$rdnattr=$rdnval\"";
	}
	print STDERR "\n\n";
    }

    #
    # And how many?  Multiples are permitted
    # in some contexts, but not in ours.
    #
    my $attrs = $entries{$cdn}{$rdnattr};	# Actually a reference
    my $nrdn = @{$attrs};
    if ($nrdn > 1)
    {
	print STDERR "dn: $dn\nMultiple RDNs: \"@{$attrs}[0]\"";
	for (my $i = 1; $i < $nrdn; $i++)
	{
	    print STDERR ", \"@{$attrs}[$i]\"";
	}
	if ($opt_fix)
	{
	    print STDERR "; using \"$rdnval\"";
	    $entries{$cdn}{$rdnattr} = [ $rdnval ];
	}
	print STDERR "\n\n";
    }

    #
    # Do they match?
    #
    if (defined @{$attrs} && $rdnval ne &canon(@{$attrs}[0]))
    {
	print STDERR "dn: $dn\nMismatched RDN: \"$rdnattr=@{$attrs}[0]\"";
	if ($opt_fix)
	{
	    print STDERR "; using \"$rdnval\"";
	    $entries{$cdn}{$rdnattr} = [ $rdnval ];
	}
	print STDERR "\n\n";
    }

    #
    # Check single-value attributes.
    #
    foreach my $attr (@single)
    {
	my $nval = 0;
	my $attrs = $entries{$cdn}{lc $attr};
	$nval = @{$attrs} if defined @{$attrs};
	if ($nval > 1)
	{
	    print STDERR "dn: $dn\nMultiple attrs for \"$attr\": \"@{$attrs}[0]\"";
	    for (my $i = 1; $i < $nval; $i++)
	    {
		print STDERR ", \"@{$attrs}[$i]\"";
	    }
	    if ($opt_fix)
	    {
		print STDERR "; using \"@{$attrs}[0]\"";
		$entries{$cdn}{lc $attr} = [ @{$attrs}[0] ];
	    }
	    print STDERR "\n\n";
	}
    }

    #
    # Check the objectclass inheritance.
    #
    if ($opt_inheritance)	# Will soon be mandatory
    {
	foreach my $i (@{$entries{$cdn}{"objectclass"}})
	{
	    next if $i eq "top";	# top is topless :-)
	    if (!defined $sup{$i})
	    {
		print STDERR "dn: $dn\nUnknown objectclass: \"$i\"";
		if ($opt_fix)
		{
		    print STDERR "; ignored";
		    &remove($i, \@{$entries{$cdn}{"objectclass"}});
		}
		print STDERR "\n\n";
	    }
	    if (defined $sup{$i} &&
		!&present($sup{$i}, \@{$entries{$cdn}{"objectclass"}}))
	    {
		print STDERR "dn: $dn\nNo sup for \"$i\": \"$sup{$i}\"";
		if ($opt_fix)
		{
		    print STDERR "; inserted";
		    push @{$entries{$cdn}{"objectclass"}}, $sup{$i};
		}
		print STDERR "\n\n";
	    }
	} # each objectclass
    } # inheritance

    #
    # Check required attributes.
    # Can't do in above loop, because the keys
    # may have changed from inserting new classes.
    #
    foreach my $i (@{$entries{$cdn}{"objectclass"}})
    {
	&checkattrs($cdn, $i);
    }
} # main loop

#
# Make sure each entry has a parent.
# For now, we kill orphans on sight...
#
$suffix = $opt_suffix if $opt_suffix;
foreach my $thisdn (keys %entries)
{
    my $i = $thisdn;
    $i =~ s/[^,]*,//;
    if (!$entries{$i} && $thisdn ne &canon($suffix))
    {
	print STDERR "dn: $thisdn\nOrphan";
	if ($opt_fix)
	{
	    print STDERR "; deleted";
	    delete $entries{$thisdn};
	}
	print STDERR "\n\n";
    }

    # Fix up the suffix dn if it's our mess, adding a structural objectclass.
    if ($thisdn eq &canon($suffix)) {
	if (@{$entries{$thisdn}{'objectclass'}} == 1
	    && lc $entries{$thisdn}{'objectclass'}[0] eq 'dcobject')
	{
		if (defined($opt_org))
		{
			push(@{$entries{$thisdn}{'objectclass'}}, 'organization');
			push(@{$entries{$thisdn}{'o'}}, $opt_org);
		} else {
			push(@{$entries{$thisdn}{'objectclass'}}, 'domain');
		}
	}
	# check for $classes == dcObject.
    }
}

print STDERR Dumper(%entries) if $opt_dump;

#
# Write out (possibly fixed) file if requested.
#
# The DN keys are sorted by length, which ensures that
# parents come before children.
#
if ($opt_write)
{
    foreach my $dn (sort { length($a) <=> length($b) } keys %entries)
    {
	&write_out($dn)
    }
}

exit 0;

###########################################################################

#
# Canonicalise a string.
# Delete leading/trailing blanks around commas, and lowcase.
#
sub canon
{
    ($_) = @_;
    s/\s+/ /g;	# Catch tabs as well
    s/ ,/,/g;
    s/, /,/g;
    lc;
}

#
# Check required attributes, and fix up known attributes for which the
# syntax has changed
# Bad hack: the "fix up" should be replaced by something which knows
# about schemas and can fix all instances for all attributes of the
# relevant attribute syntax (1.3.6.1.4.1.1466.115.121.1.27).  For now,
# we only check the most commonly affected attributes, which we also
# happen to know are SINGLE-VALUE (with the exception of
# mailPreferenceOption and ipServiceProtocol).
#
sub checkattrs
{
    (my $dn, $class) = @_;
    foreach my $attr (@{$reqd{lc $class}})
    {
	if (!defined @{$entries{$dn}{lc $attr}})
	{
	    my $odn = $entries{$dn}{$origDN};
	    print STDERR "dn: $odn\nMissing reqd \"$class\" attr \"$attr\"";
	    if ($opt_fix)
	    {
		# Quick hack for CI
		my $fix = "UNKNOWN";
		if ($attr eq "cn" && $fix ne "")
		{
		    $fix = $entries{$dn}{"givenname"}[0];
		}
		push @{$entries{$dn}{$attr}}, $fix;
		print STDERR "; inserted \"$fix\"";
	    }
	    print STDERR "\n\n";
	}
    }
    foreach my $attr (@integer) {
	$entries{$dn}{lc $attr}[0] = 0
	    if ($entries{$dn}{lc $attr} && $entries{$dn}{lc $attr}[0] =~ /^0+$/);
    }
}

#
# Write an entry to standard output.
#
# Ought to wrap at 78 cols as well.
#
sub write_out
{
    my ($dn) = @_;
    my $odn = $entries{$dn}{$origDN};
    if ($entries{$dn}{"encoded"} == 1) {
      $encoded = encode_base64($odn,"");
      print "dn:: $encoded\n";
    } else {
      print "dn: $odn\n";
    }
    foreach my $attr (keys %{$entries{$dn}})
    {
	next if $attr eq $origDN;
	foreach my $value (@{$entries{$dn}{$attr}})
	{
	    print "$attr:";
	    if ($attr =~ /userpassword/i
		|| $value =~ /(^[ :]|[\x00-\x1f\x7f-\xff])/)
	    {
		print ": ", &enmime($value, "");
	    }
	    else
	    {
		print " $value";
	    }
	    print "\n";
	}
    }
    print "\n";
}

#
# Test for presence of element in list.
#
sub present
{
    my ($element, $list) = @_;
    my $found = 0;

    foreach my $i (@$list)
    {
        if ($i eq $element)
        {
            $found = 1;
            last;
        }
    }
    return $found;
}

#
# Remove specified element from list.
# It's a unique element, but multiple
# occurances will be removed.  It will
# change the order of the list.
#
sub remove
{
    my ($element, $list) = @_;

    for (my $i = 0; $i < @$list; $i++)
    {
        if ($element eq @$list[$i])
        {
	    @$list[$i] = @$list[$#$list];
            pop @$list;
        }
    }
}

#
# Initialise some stuff (automatically called).
#
sub INIT
{
    #
    # Initialise the superior objectclasses.
    # Ought to get this from the schema.
    #
    $sup{"dcObject"} = "top";
    $sup{"inetOrgPerson"} = "organizationalPerson";
    $sup{"organizationalPerson"} = "person";
    $sup{"organizationalRole"} = "top";
    $sup{"organizationalUnit"} = "top";
    $sup{"person"} = "top";
    $sup{"posixAccount"} = "top";
    $sup{"room"} = "top";
    $sup{"simpleSecurityObject"} = "top";

    #
    # These are incomplete/wrong/WIP.
    #
    $sup{"ciAdministrator"} = "top";
    $sup{"ciApplication"} = "top";
    $sup{"ciEmployee"} = "inetOrgPerson";
    $sup{"ciLdapConfig"} = "top";
    $sup{"ciPrinter"} = "top";
    $sup{"ciServer"} = "top";

    #
    # Required attributes.
    #
    $reqd{"person"} = [ "sn", "cn" ];	# Special - can be autofixed
    $reqd{"ciadministrator"} = [ "uid", "userPassword" ];
    $reqd{"ciapplication"} = [ "ciApp", "ciAppType", "ciHost", "ciStatus", "ciPortNum" ];
    $reqd{"ciemployee"} = [ "employeeNumber", "sn" ];
    $reqd{"cildapconfig"} = [ "ciHost" ];
    $reqd{"ciprinter"} = [ "ciPrinterName" ];
    $reqd{"ciserver"} = [ "name" ];

    #
    # Single-value attributes.
    #
    @single =
    (
	"ciAppType",
	"ciDBPath",
	"ciDomainName", 
	"ciLdapEnabled",
	"ciLdapServer",
	"ciOSType",
	"ciPortNum",
	"ciPrinterClass",
	"ciRegion",
	"ciStatus",
    );

    #
    # Integer attributes
    #
    @integer =
    (
	'uidNumber',
	'gidNumber',
	'mailPreferenceOption',
	'shadowLastChange',
	'shadowMin',
	'shadowMax',
	'shadowWarning',
	'shadowInactive',
	'shadowExpire',
	'shadowFlag',
	'ipServicePort',
	'ipServiceProtocol',
	'ipProtocolNumber',
	'oncRpcNumber',
    );

    #
    # Random stuff.
    #
    $/ = "";		# Read input in paragraph mode
}

#
# Process options.
#
sub parse_options
{
    $SIG{'__WARN__'} = sub { die $_[0] };	# Exit on bad options

    Getopt::Long::Configure("bundling");	# Old-style (-xyz, --word)
    GetOptions
    (
	"--dump"	=> \$opt_dump,		# Dump data structure
	"-D"		=> \$opt_dump,

	"--fix"		=> \$opt_fix,		# Fix errors if possible
	"-f"		=> \$opt_fix,		# (also implies "write")

	"--inheritance"	=> \$opt_inheritance,	# Check obj inheritance
	"-i"		=> \$opt_inheritance,	# (too many false alarms)

	"--suffix=s"	=> \$opt_suffix,	# Specify directory suffix
	"-s=s"		=> \$opt_suffix,

	 "--write"	=> \$opt_write,		# Write ordered file
	 "-w"		=> \$opt_write,

	 "--org=s"	=> \$opt_org,		# Organization to use for
	 "-o=s"		=> \$opt_org,		# fixing up the suffix
    )
}

#
# Get a complete entry as a list of lines.
# We use the trick of setting the input delimiter
# to "", to read a paragraph at a time, so we can
# join continued lines.
#
sub GetEntry
{
    my @a;
    do
    {
	$_ = (<>);
	return () if !defined;	# EOF
	s/$/\n/;	# In case we strip last newline below
	s/^#.*\n//g;	# Comments
	chomp;		# Always strips >= 2 newlines
	s/\n //g;	# Join lines
	@a = split /\n/;
    }
    while (@a < 2);	# Skips phantom entries (caused by comments)
    return @a;
}

#
# Given a string, return a de-mimed version.
# Can't use MIME::Base64 because it's not a core module.
# Instead, I pinched the code from it...
#
sub demime
{
    local($^W) = 0; # unpack("u",...) gives bogus warning in 5.00[123]

    my $str = shift;
    $str =~ tr|A-Za-z0-9+=/||cd;            # remove non-base64 chars
    if (length($str) % 4) {
	require Carp;
	Carp::carp("Length of base64 data not a multiple of 4")
    }
    $str =~ s/=+$//;                        # remove padding
    $str =~ tr|A-Za-z0-9+/| -_|;            # convert to uuencoded format

    return join'', map( unpack("u", chr(32 + length($_)*3/4) . $_),
	                $str =~ /(.{1,60})/gs);
}

#
# En-mime same.
# I didn't write this bletcherous code either.
#
sub enmime
{
    my $res = "";
    my $eol = $_[1];
    $eol = "\n" unless defined $eol;
    pos($_[0]) = 0;                          # ensure start at the beginning

    $res = join '', map( pack('u',$_)=~ /^.(\S*)/, ($_[0]=~/(.{1,45})/gs));

    $res =~ tr|` -_|AA-Za-z0-9+/|;               # `# help emacs
    # fix padding at the end
    my $padding = (3 - length($_[0]) % 3) % 3;
    $res =~ s/.{$padding}$/'=' x $padding/e if $padding;
    # break encoded string into lines of no more than 76 characters each
    if (length $eol) {
	$res =~ s/(.{1,76})/$1$eol/g;
    }
    return $res;
}
