#!/usr/bin/perl
# vim: set filetype=perl :
use strict;
use warnings;
use 5.010;
use English qw( -no_match_vars);
use autodie;
use File::stat;
use Cwd;

main() unless caller(0);

sub main {
    use Pod::Usage;
    use Getopt::Long qw( :config auto_help pass_through );
    use File::Path qw( make_path );
    use Git;

    my $ldif_cmd    = '/usr/sbin/safe-ldif';
    my $backup_dir  = '/var/backups/ldap';
    my $commit_msg  = 'ldap-git-backup';
    my $commit_date = time();
    my $gc = 1;
    my $help;
    GetOptions(
        'ldif-cmd=s'    => \$ldif_cmd,
        'backup-dir=s'  => \$backup_dir,
        'commit-msg=s'  => \$commit_msg,
        'commit-date=s' => \$commit_date,
        'gc!'           => \$gc,
        'help'          => \$help,
    );
    pod2usage('-verbose' => 2, '-exit_status' => 0) if $help;
    pod2usage() if @ARGV;

    if ( -e $commit_date ) {
        $commit_date = stat($commit_date)->mtime();
    }

    if ( $backup_dir =~ m{\A [^/]}xms ) {
        $backup_dir = getcwd() . '/' . $backup_dir;
    }

    my $ldif_aref = LDAP::Utils::read_ldif($ldif_cmd);
    make_path($backup_dir, {mode => 0700});
    my $mode = stat($backup_dir)->mode;
    warn "backup directory $backup_dir is world readable\n" if $mode & 05;
    warn "backup directory $backup_dir is world writable\n" if $mode & 02;
    chdir($backup_dir);
    Git::command('init');
    my $repo = Git->repository(Directory => $backup_dir);

    my @filelist = $repo->command('ls-files', '*.ldif');
    my %files_before = ();
    for my $f (@filelist) { $files_before{$f} = 1 }

    @filelist = ();
    for my $ldif (@$ldif_aref) {
        my $filename = LDAP::Utils::filename($ldif);
        open(my $fh, '>', "$backup_dir/$filename");
        print {$fh} $ldif;
        close($fh);
        push(@filelist, $filename);
        delete($files_before{$filename});
    }
    $repo->command('add', @filelist) if @filelist;
    $repo->command('rm', (keys %files_before)) if %files_before;

    $repo->command('commit', "--message=$commit_msg", "--date=$commit_date");
    $repo->command('gc', '--quiet') if $gc;
}

package LDAP::Utils;
use strict;
use warnings;
use 5.010;
use English qw( -no_match_vars);
use Carp;
use autodie;
use MIME::Base64;

sub read_ldif {
    my ($ldif_cmd) = @_;

    my $entry_count = -1;
    my $ldif_aref = [];
    until ($entry_count == @$ldif_aref) {
        $entry_count = @$ldif_aref;
        $ldif_aref = read_ldif_raw($ldif_cmd);
    }

    return $ldif_aref;
}

sub read_ldif_raw {
    my ($ldif_cmd) = @_;

    my $ldif_aref = [];
    my $ldif_fh;
    local $INPUT_RECORD_SEPARATOR = "\n\n";

    open( $ldif_fh, '-|', $ldif_cmd) or die "Can't exec '$ldif_cmd': $!";
    while (my $record = <$ldif_fh>) {
        push(@$ldif_aref, $record);
    }
    close($ldif_fh) or die "$ldif_cmd exited with $?: $!";

    return $ldif_aref;
}

our %filename_list = ();
sub filename {
    my ($ldif) = @_;

    use Digest::MD5 qw( md5_hex );
    my $filename = timestamp($ldif) . '-' . substr(md5_hex(dn($ldif)), 0, 7);
    if (defined($filename_list{$filename})) {
        $filename_list{$filename} += 1;
        $filename .= '-' . $filename_list{$filename};
    }
    else {
        $filename_list{$filename} = 0;
    }

    return $filename . '.ldif';
}

sub dn {
    my ($ldif) = @_;
    $ldif =~ s{\n }{}gs; # combine multiline attributes

    for my $line (split("\n", $ldif)) {
        next unless $line =~ m{\A dn:}xmsi;
        my $dn = get_value_from_attribute($line);
        return canonicalize_dn($dn);
    }

    return '';
}

sub canonicalize_dn {
    my ($dn) = @_;

    my @rdns = split(',', $dn);
    @rdns = map { canonicalize_rdn($_) } @rdns;
    return join(',', @rdns);
}

sub canonicalize_rdn {
    my ($rdn) = @_;

    my ($key, $value) = split('=', $rdn, 2);
    $key   =~ s{\A\s+}{}xms;
    $key   =~ s{\s+\Z}{}xms;
    $value =~ s{\A\s+}{}xms;
    $value =~ s{\s+\Z}{}xms;
    return lc($key) . '=' . lc($value);
}

sub timestamp {
    my ($ldif) = @_;
    $ldif =~ s{\n }{}gs; # combine multiline attributes

    for my $line (split("\n", $ldif)) {
        next unless $line =~ m{\A createtimestamp:}xmsi;
        return get_value_from_attribute($line);
    }
    return '00000000000000Z';
}

sub get_value_from_attribute {
    my ($attribute) = @_;

    my $value;
    if ( $attribute =~ m{\A [^:]+ ::}xms ) {
        $value = ( split(':: ', $attribute, 2) )[1];
        $value = decode_base64($value);
    }
    else {
        $value = ( split(': ', $attribute, 2) )[1];
        }
    return $value;
}

1;

__END__

=head1 NAME

ldap-git-backup - check in LDIF from an LDAP server into a GIT repository

=head1 SYNOPSIS

ldap-git-backup [options]

ldap-git-backup --help

=head1 DESCRIPTION

ldap-git-backup takes an LDIF dump of an LDAP server and updates a GIT repository
that serves as a versioned backup.

ldap-git-backup splits the LDIF data into separate entries.  It constructs
unique but stable file names using a combination of the creation time stamp for
ordering and the DN as the unique key for an entry.

=head1 OPTIONS

=over 4

=item B<--ldif-cmd E<lt>dump_ldif_commandE<gt>>

Specify a command to create a complete LDIF dump of the LDAP directory suitable
for a backup.  It should contain all entries necessary to restore the LDAP
database.  By default C</usr/sbin/safe-ldif> is taken which calls
C</usr/sbin/slapcat> from OpenLDAP.

If you have access to an LDAP server over the network you can use ldapsearch as
your C<--ldif-cmd>.  Example:

=over

  --ldif-cmd 'ldapsearch -u -x -o ldif-wrap=no \
  -H ldaps://ldap.example.org -b dc=example,dc=org'

=back

If you need to log in to a certain server first you would use something along
the lines of

=over

  --ldif-cmd 'ssh host.example.org ldapsearch -u -x -o ldif-wrap=no \
  -H ldaps://ldap.example.org -b dc=example,dc=org'

=back

You can make this as complicated as you like as long as it fits into a one line
command.  If you need more you may want to create a script for the purpose.

=item B<--backup-dir E<lt>backup_directoryE<gt>>

Specify the directory where the GIT repository for the versioned backup is held.
Default: F</var/backups/ldap>

=item B<--commit-msg E<lt>commit_stringE<gt>>

Specify a custom commit message.  Default: ldap-git-backup

Example:

=over

  --commit-msg "Import $(date --rfc-3339=seconds)"

=back

=item B<--commit-date E<lt>date_stringE<gt>>

=item B<--commit-date E<lt>fileE<gt>>

Specify a custom commit date.  If a file is given its modification time is used.

=item B<--no-gc>

Do not perform a garbage collection (git gc) after checking in the new backup.
By default gc is done so as to keep the size of the backup down.  You may want
to skip gc for the occasional backup run but leave it on for the scheduled
backups.

=item B<--help>

Prints this page.

=back

=head1 AUTHOR

Elmar S. Heeb <elmar@heebs.ch>

=cut
