#!/usr/bin/perl
#                              -*- Mode: Perl -*-
# dirsplit ---
# Author           : Eduard Bloch ( blade@debian.org )
# Last Modified On : Sun, 07 Mar 2004 11:55:54 +0100
# Status           : Working, but use with caution!
# License: GPLv2

my $version="0.0.1"; #fixme

require v5.8.1;
use strict;
use List::Util 'shuffle';
use Getopt::Long qw(:config no_ignore_case bundling);
use File::Basename;
use Cwd 'abs_path';

my $max="4480M";
my $prefix="vol_";
my $acc=100;
my $opt_help;
my $opt_sim;
my $opt_dir;
my $opt_flat;
my $opt_cor;
my $opt_move;
my $opt_ver;
my $get_ver;

my $msg="
dirsplit -spamdvcn < directory | content-list >
 -n|--no-act   Only print the commands, no action (implies -v)
 -s|--size     NUMBER - Size of the medium, default: $max
 -m|--move     Move files to directories named after prefix instead of
               creating catalog files
 -f|--flat     don't recreate directory structure, move objects to <medium>/
 -p|--prefix   STRING - first part of catalog/directory name (default: vol_)
 -a|--accuracy NUMBER (1=faster, large number=more efficient, default: 500)
 -c|--correct  Strip directory space summaries, eg. from con. list made with du
 -h|--help     Show this option summary
 -v|--verbose  More verbosity
                   
The content list is a file (use - for STDIN) with lines with file sizes and
file/directory names. Size numbers are expected to be in KiB, append modifier
letters to recalculate them, (B,m,M for bytes, MB (10^6), MiB (2^10)).
The default mode is creating file catalogs useable with:

    mkisofs -graft-points -path-list vol_3

Examples:
du -s mp3/Collections/Rock/* mp3/Singles/Pop/* | dirsplit -s 700M -
dirsplit -m -s 700M random_data_to_backup/
";

# -d|--dirhier  Attempt to recreate the directory structure on the target medium

my %options = (
   "h|help"                => \$opt_help,
   "d|dirhier"            => \$opt_dir,
   "f|flat"            => \$opt_flat,
   "n|no-act"            => \$opt_sim,
   "m|move"            => \$opt_move,
   "v|verbose"            => \$opt_ver,
   "s|size=s"             => \$max,
   "p|prefix=s"              => \$prefix,
   "c|correct"               => \$opt_cor,
   "a|accuracy=i"            => \$acc,
   "version"                 => \$get_ver
);

die $msg unless ( GetOptions(%options));

$opt_dir = !$opt_flat;

if($opt_help) {
   print $msg;
   exit 0;
}
if($get_ver) {
   print $version;
   exit 0;
}

$opt_ver = 1 if $opt_sim;

sub fixnr {
   # args: Number, optional: default multiplier
   my $fac;
   my $nr;
   if($_[0]=~/(\d+)(\D)/) {
      $nr=$1;
      $fac=$2;
      goto mult;
   }
   elsif(defined($_[1])) {
      $nr=$_[0];
      $fac=$_[1];
      goto mult;
   }
   else {
      return $_[0];
   }
   mult:
   return $nr*1000000 if($fac eq "m");
   return $nr*1048576 if($fac eq "M");
   return $nr*1000 if($fac eq "k");
   return $nr*1024 if($fac eq "K");
   return $nr if($fac eq "b" || $fac eq "B");
   die "$fac is not a valid multiplier!";
}

sub mkdirhier { 
   #  print "mkdirhier: $_[0]\n";
   return 1 if($_[0] eq ".");
   return 1 if(-d $_[0] && -w $_[0]);
   return 0 if !mkdirhier(dirname($_[0]));
   return mkdir $_[0];
}

my $l;
my @in;
my %names;
my @indata;

$max=fixnr($max);

# parse du -s output
if(-f $ARGV[0] || (-f readlink($ARGV[0])) || $ARGV[0] eq "-") {
   open($l, "<".$ARGV[0]);
   @indata=<$l>;
}
elsif(-d $ARGV[0] || (-d readlink($ARGV[0]))) {
   $opt_cor=1;
   @indata=`du -a $ARGV[0]/`;
}
else {
   die "Directory or contents listing needed!\n";
}

my %ntos;

for(@indata) {
   chomp;
   if(/^(\w+)\s+(.+)/) {
      $ntos{$2}=fixnr($1, "K");
   }
}

# sort and kill dupes/summaries
if($opt_cor) {
   my @intmp=sort(keys %ntos);
   my @newin;

   for(my $i=0;$i<=$#intmp;$i++) {
      $_=$intmp[$i];
      chomp;
      if ("$_/" ne substr($intmp[$i+1],0,length($_)+1)) { 
         # feed the final data holders
         push(@in, $ntos{$_});
         # linked list behind the hash entry
         my $realname=$_;
         $realname=~s!^\./!!;
         push(@{$names{$ntos{$_}}}, $realname);
      }
   }
}
else {
   # copy around
   for(keys %ntos) {
      push(@in, $ntos{$_});
      # linked list behind the hash entry
      push(@{$names{$ntos{$_}}}, $_);
   }
}

for(@in) {
   die "Too large object(s) ($_) for the given max size: ".join(", ",
   @{$names{$_}})."\n" if($_>$max);
}

if(1 || $opt_ver) {
   $a=0;
   for(@in) {$a+=$_};
   syswrite(STDOUT, "Total: $a\nCalculating");
}

my $i;
my @out; #FIXME

# Parms: bin size (int), input array (arr reference), output array (arr reference)
# Returns: wasted space (int)
sub bp_bestfit {
   my $max=$_[0];
   my @in = @{$_[1]};
   my $target = $_[2];
   my @out;
   my @bel;

   my @tmp;
   push(@tmp,$in[0]);
   push(@out, \@tmp);
   $bel[0] = $in[0];
   shift @in;

   for(@in) {
      my $bestplace=$#out+1;
      my $bestwert=$max;
      for($i=0;$i<=$#out;$i++) {
         my $rest;
         $rest=$max-$bel[$i]-$_;
         if($rest>0 && $rest < $bestwert) {
            $bestplace=$i;
            $bestwert=$rest;
         };
      }
      if($bestplace>$#out) {
         my @bin;
         $bel[$bestplace]=$_;
         push(@bin, $_);
         push(@out,\@bin);
      }
      else{
         $bel[$bestplace]+=$_;
         push(  @{$out[$bestplace]}    , $_);
      }
   }
   my $ret=0;
   # bis auf den letzten Bin zaehlen
   for($i=0;$i<$#out;$i++) {
      $ret+=($max-$bel[$i]);
   }
   @{$target} = @out;
   return $ret;
}

# Parms: bin size (int), input array (arr reference), output array (arr reference)
# Returns: wasted space (int)
sub bp_firstfit {
   my $max=$_[0];
   my @in = @{$_[1]};
   my $target = $_[2];
   my @out;
   my @bel;

   foreach my $obj (@in) {
      # first fit, packe in das erste passende
      for($i=0;$i<=$#out;$i++) {
         my $newsize=($bel[$i]+$obj);
#         print $bel[$i]."\n";
         if( $newsize <= $max ) {
            #print "F: $bel[$i]+$obj=$newsize\n";
            #das passt
            $bel[$i]=$newsize;
            push(  @{$out[$i]} , $obj);
            goto weiter; # das Nächste...
         }
      }
      # neues Bin
      my @bin;
      $bel[$i]=$obj;
      push(@bin, $obj);
      push(@out,\@bin);
      weiter:
   }
   my $ret=0;
   # bis auf den letzten Bin zaehlen
   for($i=0;$i<$#out;$i++) {
      #FIXME
#      print "D: uebrig: ".($max-$bel[$i])." bei max: $max und bel:".$bel[$i]."\n";
      $ret+=($max-$bel[$i]);
   }
   @{$target} = @out;
   return $ret;
}

my @erg;
my $globwaste=$max*$#in;
for(1..$acc) {
   syswrite(STDOUT,".");
   my @tmp;
   my $waste = bp_firstfit($max, \@in, \@tmp);
#   print "D: waste - $waste\n"; #FIXME
   @in=shuffle(@in);
   if($waste < $globwaste) {
      $globwaste=$waste;
      @erg=@tmp;
   }
}
print ", calculated, using ".($#erg+1)." volumes.\n";
   
print "Wasted: $globwaste\n";

# and the real work
$i=1;
for(@erg) {
   my $o;
   open($o, ">$prefix$i.list") if(! ($opt_move | $opt_sim));
   for(@{$_}) {
      #my $file=$names{$_}[0];
      my $file=shift(@{$names{$_}});
      my $target = "$prefix$i";
      $target = "$prefix$i/".dirname($file) if($opt_dir);
      #FIXME disabled because of the current Cwd breakage
      #$target=Cwd::abs_path($target);

      if($opt_move) {
         my $hin="$prefix$i/".( $opt_dir ? $file : "" );
         print "$file -> $hin\n" if($opt_ver);
         if(!$opt_sim) {
            mkdirhier $target || die "Problems creating $target\n";
            # last check
            die "Could not create $target?\n" if(!(-d $target && -w $target));
            rename($file, $hin);
         }
#         print "D: -d:".(-d $target)." von $target rename($file, $target/$file\n";
         # move to $prefix$i, the path component comes from the file itself
#         if($opt_dir) {
#            rename($file, "$prefix$i/$file");
#         }
#         else {
#            rename($file, "$prefix$i");
#         }
      }
      else {
         print $o "/".($opt_dir?$file : basename($file))."=$file\n" if(!$opt_sim);
         print "$i: /".($opt_dir?$file : basename($file))."=$file\n" if $opt_ver;
      }
   }
   $i++;
   close($o) if($o);
}

