#!/usr/bin/perl

#   tarcust -- A Tar Customizer
#   Copyright (C) 1999
#   Denis Barbier <barbier@imacs.polytechnique.fr>
#
#   New versions of this program are on the TarCust home page
#         http://www.engelschall.com/sw/tarcust/
#
#   This program is a free software released under the GNU GPL License;
#   see the LICENSE file for conditions.

require 5.000;
use vars qw(
      $TarCust_VersionStr $running_under_some_shell
      $blocksize $structure_header $posix_structure_header
      $header $header_line $total_size $size $contents
      $longnames $filein $fileout
      $opt_h $opt_v $opt_d $opt_V $opt_g $opt_G $opt_u
      $opt_U @opt_x @opt_s $opt_p $new_values $dir_mode
);

use subs qw(init_block string2hash hash2string key2text print_header
            read_contents msgdebug msgverbose showversion change_values
            usage);

use strict;
no strict "refs";

#   Version number, automatically kept in synchronization with version,pl
$TarCust_VersionStr = "0.7.2 (30-Aug-1999)";

#   The tar files format is quite simple. For each file, the archive
#   contains a header line of 512 bytes containing all informations
#   about this file followed by the contents of the file. After then
#   null chars are inserted so that each file size is a multiple of 512
#   bytes.
#   For files with a null size (e.g. directories, links, devices,...),
#   just the header is put in the file.

#   Default size of header line is 512.
$blocksize = 512;

#   Structure of the header
#
#               octal? offset length    pack   printf
#
$structure_header = {
    'name'      => [ 0,     0,   100,  "a99x"] ,
    'mode'      => [ 1,   100,     8,   "A7x", "%6o \0"] ,
    'uid'       => [ 1,   108,     8,   "A7x", "%6o \0"] ,
    'gid'       => [ 1,   116,     8,   "A7x", "%6o \0"] ,
    'size'      => [ 1,   124,    12,   "A12", "%11o "] ,
    'mtime'     => [ 1,   136,    12,   "A12", "%11o "] ,
    'chksum'    => [ 1,   148,     8,    "a8", "%6o\0 "] ,
    'typeflag'  => [ 0,   156,     1,    "a1"] ,
    'linkname'  => [ 0,   157,   100,  "a99x"] ,
    'magic'     => [ 0,   257,     6,    "a6"],
    'version'   => [ 0,   263,     2,    "a2"] ,
    'uname'     => [ 0,   265,    32,  "a31x"] ,
    'gname'     => [ 0,   297,    32,  "a31x"] ,
    'devmajor'  => [ 1,   329,     8,   "A7x", "%7o\0"] ,
    'devminor'  => [ 1,   337,     8,   "A7x", "%7o\0"] ,
    'prefix'    => [ 0,   345,   155, "a154x"] ,
    'null'      => [ 0,   500,    12,   "a12"] ,
};

#   Not used yet
$posix_structure_header = {
    'name'      => [ 0,     0,   100,  "a99x"] ,
    'mode'      => [ 1,   100,     8,   "A7x", "%7o\0"] ,
    'uid'       => [ 1,   108,     8,   "A7x", "%7o\0"] ,
    'gid'       => [ 1,   116,     8,   "A7x", "%7o\0"] ,
    'size'      => [ 1,   124,    12,   "A12", "%11o "] ,
    'mtime'     => [ 1,   136,    12,   "A12", "%11o "] ,
    'chksum'    => [ 1,   148,     8,    "a8", "%7o\0"] ,
    'typeflag'  => [ 0,   156,     1,    "a1"] ,
    'linkname'  => [ 0,   157,   100,  "a99x"] ,
    'magic'     => [ 0,   257,     8,    "a6"] ,
    'version'   => [ 0,   263,     2,    "a2"] ,
    'uname'     => [ 0,   265,    32,  "a31x"] ,
    'gname'     => [ 0,   297,    32,  "a31x"] ,
    'devmajor'  => [ 1,   329,     8,   "A7x", "%7o\0"] ,
    'devminor'  => [ 1,   337,     8,   "A7x", "%7o\0"] ,
    'prefix'    => [ 0,   345,   155, "a154x"] ,
    'null'      => [ 0,   500,    12,   "a12"] ,
};

#   Initializes the structure and returns a reference to it.
#   Apart the structure above, the header line is stored in the
#   ``input'' hash reference and the list of keys in the structure is
#   stored in ``keys''
sub init_block {
    my ($input) = @_;
    my $block = {};
    my @keys  = ();

    #   keys sre sorted for user messages
    foreach (sort
      { $structure_header->{$a}->[1] <=> $structure_header->{$b}->[1] }
             keys %$structure_header) {
        $block->{$_} = {
            'octal'  => $structure_header->{$_}->[0],
            'offset' => $structure_header->{$_}->[1],
            'length' => $structure_header->{$_}->[2],
            'format' => $structure_header->{$_}->[3],
            'printf' => $structure_header->{$_}->[4] ? 
                        $structure_header->{$_}->[4] : ''
        };
        push(@keys, $_);
    }
    $block->{"input"} = $input;
    $block->{"keys"} = \@keys;
    &string2hash($block);
    return $block;
}

#   read the header line and fills in the structure
sub string2hash {
    my ($block) = @_;
    my ($name, $value, $format, $offset, $length, $text);

    foreach $name (@{$block->{"keys"}}) {
        $format = $block->{$name}->{"format"};
        $offset = $block->{$name}->{"offset"};
        $length = $block->{$name}->{"length"};
        $text   = substr $block->{"input"}, $offset, $length;
        $value  = unpack "$format", $text;

        #   remove trailing blanks
        $value  =~ s/\0*$//;
        $value  = oct($value)
          if $block->{$name}->{"octal"} == 1 and $value ne '';

        #   and update hash elements
        $block->{$name}->{"value"} = $value;
    }
}

#   build the header line from the header structure
sub hash2string {
    my ($block) = @_;
    my ($name, $input);

    $input = ' ' x $blocksize;
    foreach $name (@{$block->{"keys"}}) {
        next if $name eq 'chksum';
        $input = &key2text($name, $input, $block);
    }

    #   calculate the checksum for this new string
    $block->{"chksum"}->{"value"} = unpack ("%16C*", $input);

    #   and store this checksum and the string
    $input = &key2text("chksum", $input, $block);
    $block->{"input"} = $input;

    &msgdebug($input) if $opt_d == 1;
    return $input;
}

#   replace in $input the portion corresponding to the $name key
sub key2text {
    my ($name, $input, $block) = @_;
    my ($value, $offset, $length, $format, $printf);

    $offset = $block->{$name}->{"offset"};
    $length = $block->{$name}->{"length"};
    $format = $block->{$name}->{"format"};
    $printf = $block->{$name}->{"printf"};
    $value  = $block->{$name}->{"value"};
    if ($value eq '') {
        $value = "\0" x $length;
    }
    else {
        if ($printf ne '' and $value ne '') {
            $value = sprintf "$printf", $value;
        }
        $value = pack("$format", $value);
        #$value =~ s/^(\S.*?)(\s+)$/$2$1/;
    }
    substr($input, $offset, $length) = $value;
    return $input;
}

#   print informations stored in the header
sub print_header {
    my ($block) = @_;
    my ($key, $value, $format, $printf);

    foreach $key (@{$block->{"keys"}}) {
        $value  = $block->{$key}->{"value"};
        $format = $block->{$key}->{"format"};
        $printf = $block->{$key}->{"printf"};
        if ($value ne '') {
            if ($printf ne '') {
                $value = sprintf "$printf", $value;
                $value = pack("$format", $value);
            }
        }
        &msgdebug("$key: $value");
    }
}

#   read contents of the file. In the archive, a file fills
#   entire blocks
sub read_contents {
    my ($header) = @_;
    my ($contents, $size);

    $size = $header->{"size"}->{"value"};
    if ($size % $blocksize != 0) {
        $size += $blocksize - ($size % $blocksize);
    }
    read($filein, $contents, $size) == $size
       || die "error occurred when reading";
    return $contents;
}

sub msgdebug {
    my ($string) = @_;
    print STDERR "tarcust:debug: " . $string . "\n";
}

#   Apply changes requested by command-line arguments
#   Options are kept in a hash table.
sub change_values {
    my ($values, $block) = @_;

    if ($values->{"uname"} ne '') {
        $block->{"uname"}->{"value"} = $values->{"uname"};
        $block->{"uid"}->{"value"}   = 0;
    }
    $block->{"uid"}->{"value"} = $values->{"uid"}
        if $values->{"uid"} ne '';

    if ($values->{"gname"} ne '') {
        $block->{"gname"}->{"value"} = $values->{"gname"};
        $block->{"gid"}->{"value"}   = 0;
    }
    $block->{"gid"}->{"value"} = $values->{"gid"}
        if $values->{"gid"} ne '';
}

sub msgverbose {
    my ($string) = @_;
    print STDERR "tarcust:verbose: " . $string . "\n";
}

sub showversion {
    print STDERR <<EOT;
tarcust $TarCust_VersionStr

Copyright (C) 1999 Denis Barbier <barbier\@imacs.polytechnique.fr>
This program is a free software released under the GNU GPL License; see
the source for copying conditions.  There is NO warranty; not even for
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
EOT
    exit(0);
}

sub usage {
    print STDERR <<EOT;
Usage: tarcust [options] < input.tar > output.tar
Options:
   -h, --help                 show this help and exit
   -V, --version              show the version of tarcust and exit
   -v, --verbose              processes more verbosely
   -u, --user-name=NAME       change owner of all files in the archive
       --owner=NAME           same as -u
   -U, --user-number=NUMBER   numerical id of user
   -g, --group-name=NAME      set the group name of all files in the archive
       --group=NAME           same -as -g
   -G, --group-number=NUMBER  numerical id of group
   -p, --prefix=STRING[:MODE] add a prefix directory name to every file
   -s, --transform-names=EXPR apply substitutions on file and directory names
   -x, --exclude=EXPR         delete file from the archive

EOT
    exit(1);
}

#   parse arguments
$opt_h = $opt_v = $opt_d = $opt_V = 0;
$opt_g = $opt_G = $opt_u = $opt_U = $opt_p = '';
@opt_x = @opt_s = ();

$longnames = {
   'user_name' => 'u',   'owner'  => 'u',
   'group_name' => 'g',  'group' => 'g',
   'user_number' => 'U', 'group_number' => 'G',
   'exclude' => 'x',     'transform_names' => 's',
   'prefix' => 'p',
};

#   i do not use getopt.pl or Getopt::Long because i want option
#   names to be compatible with tar and tardy. For instance,
#      -u <=> --owner <=> --user-name <=> --user_name
#
for (;;) {
    my ($option, $original_option, $argument);

    last if $#ARGV < 0;
    last if $ARGV[0] eq '-' or $ARGV[0] !~ m/^-/;

    if ($ARGV[0] eq '-h' || $ARGV[0] eq '-?' || $ARGV[0] eq '--help') {
        $opt_h = 1;
        last;
    }
    elsif ($ARGV[0] eq '-V' || $ARGV[0] =~ m/^--vers/ ) {
        $opt_V = 1;
        last;
    }
    elsif ($ARGV[0] eq '-d' || $ARGV[0] =~ m/--deb/ ) {
        $opt_d = 1;
    }
    elsif ($ARGV[0] eq '-v' || $ARGV[0] =~ m/^--verb/ ) {
        $opt_v = 1;
    }
    elsif ($ARGV[0] =~ m/^-([gGuUxsp])$/ ) {
        $option = $1;
        if ($#ARGV < 1) {
            print STDERR "Error:tarcust: missing value to option: $ARGV[0]\n";
            $opt_h = 1;
            last;
        }
        shift(@ARGV);
        if ($option =~ m/^[xs]$/) {
            eval "push(\@opt_$option, \$ARGV[0])";
        }
        else {
            eval "\$opt_$option = \$ARGV[0]";
        }
    }
    elsif ($ARGV[0] =~ m/^--([^=]*)=(.*)$/) {
        $option = $original_option = $1;
        $argument = $2;
        $option =~ s/[^a-zA-Z0-9]/_/g;
        if (not defined($longnames->{$option})) {
            print STDERR "Error:tarcust: unknown option: $original_option\n";
            $opt_h = 1;
            last;
        }
        if ($option =~ m/^exclude|transform_names$/) {
            eval "push(\@opt_$longnames->{$option}, \$argument)";
        }
        else {
            eval "\$opt_$longnames->{$option} = \$argument";
        }
    }
    else {
        print STDERR "Error:tarcust: unknown option: $ARGV[0]\n";
        $opt_h = 1;
        last;
    }
    shift(@ARGV);
}

&usage if $opt_h == 1;
&showversion if $opt_V == 1;

$filein  = *STDIN{IO};
$fileout = *STDOUT{IO};
if ($#ARGV == 0) {
    if ($ARGV[0] ne '-') {
        open($filein, "< $ARGV[0]");
    }
}
elsif ($#ARGV == 1) {
    if ($ARGV[0] ne '-') {
        open($filein, "< $ARGV[0]");
    }
    if ($ARGV[1] ne '-') {
        open($fileout, "> $ARGV[1]");
    }
}
elsif ($#ARGV > 0) {
    &usage;
}

#   keep these values in a hash table
$new_values = {
    'uname' => $opt_u,
    'uid'   => $opt_U,
    'gname' => $opt_g,
    'gid'   => $opt_G,
};

#   --prefix is a synonym for --transform-names=s,^,PATH/,
#   A directory will be inserted at the beginning of the archive,
#   its mode is $dir_mode
$dir_mode = 040755;
if ($opt_p =~ m|^(.*):([0-9]+)$|) {
    $opt_p = $1;
    $dir_mode = $2;
    $dir_mode += 40000 if $dir_mode =~ m|^\d{1,4}$|;
    $dir_mode = oct($dir_mode);
}
push(@opt_s, "s,^,$opt_p/,") if $opt_p ne '';

#   main loop
$total_size = 0;

while ( read($filein, $header_line, $blocksize) == $blocksize) {
    if ($opt_p ne '') {
        #   A prefix is required, so a directory must be added in
        #   the archive.
        #   Do not forget to apply changes
        $header = &init_block($header_line);
        &change_values($new_values, $header);

        my $dirs = '';
        foreach (split('/', $opt_p)) {
            $dirs .= $_ . "/";
            #   typeflag=5 for a directory
            $header->{"typeflag"}->{"value"} = 5;
            $header->{"name"}->{"value"} = $dirs;
            $header->{"size"}->{"value"} = 0;
            $header->{"mode"}->{"value"} = $dir_mode;
            &print_header($header) if $opt_d == 1;
            print $fileout &hash2string($header);
            $total_size += $blocksize;
            &msgverbose("  Write: $header->{'name'}->{'value'}") if $opt_v == 1;
        }
        #   clear $opt_p so that this test is wrong next time
        $opt_p = '';
    }
    last if $header_line eq "\0" x $blocksize;

    $header = &init_block($header_line);
    last if $header->{"name"}->{"value"} eq '';
    if ($header->{"typeflag"}->{"value"} !~ m/^[0-6]?$/) {
        #   this file is not a regular file
        $contents = &read_contents($header);
        print STDERR "Warning:tarcust: Don't know how to handle GNU extensions\n";
        print STDERR "                 $header->{'name'}->{'value'} unchanged\n";
        print $fileout &hash2string($header);
        print $fileout $contents;
        next;
    }

    &msgverbose("File " .  $header->{"name"}->{"value"}) if $opt_v == 1;
    &print_header($header) if $opt_d == 1;

    &change_values($new_values, $header);

    $contents = &read_contents($header);

    #   check if this file has been excluded by the -x flag
    my $skip   = 0;
    foreach (@opt_x) {
        eval {$skip=1 if ($header->{'name'}->{'value'} =~ m(^$_$));};
        last if $skip;
    }
    if ($skip) {
        &msgverbose("  Skipped") if $opt_v == 1;
        next;
    }
    foreach (@opt_s) {
        eval "\$header->{'name'}->{'value'} =~ $_";
    }
    &print_header($header) if $opt_d == 1;
    print $fileout &hash2string($header);
    print $fileout $contents;
    $total_size += $blocksize + length($contents);
    &msgverbose("  Write: $header->{'name'}->{'value'}") if $opt_v == 1;
}

#   Flushes input to avoid the ``broken pipe'' message
undef $/;
$_ = <$filein>;

#   And writes null chars.
print $fileout "\0" x ($blocksize);
$total_size += $blocksize;

#   In GNU tar, the total size is a multiple of 20 512-bytes blocks
if ($total_size % (20*$blocksize) != 0 ) {
    print $fileout "\0" x (20*$blocksize - ($total_size % (20*$blocksize)));
}

exit(0);

##EOF##
