#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
    if $running_under_some_shell;
##
##  htmlfix -- Fixup HTML markup code
##  Copyright (c) 1997,1998 Ralf S. Engelschall, All Rights Reserved. 
##

require 5.003;

BEGIN { $^W = 0; } # get rid of nasty warnings

use lib "/usr/lib/wml/perl/lib";
use lib "/usr/lib/wml/perl/lib/m68k-linux/5.004";
use lib "/usr/local/lib/site_perl";
use lib "/usr/local/lib/site_perl/m68k-linux";

use Getopt::Long 2.13;
use Image::Size;
use IO::File 1.06;

#
#   process command line
#
sub usage {
    print STDERR "Usage: htmlfix [options] [file]\n";
    print STDERR "\n";
    print STDERR "Options:\n";
    print STDERR "  -o, --outputfile=<file>  set output file instead of stdout\n";
    print STDERR "  -v, --verbose            verbose mode\n";
    exit(1);
}
$opt_v = 0;
$opt_o = '-';
$Getopt::Long::bundling = 1;
$Getopt::Long::getopt_compat = 0;
if (not Getopt::Long::GetOptions(
    "v|verbose",
    "o|outputfile=s")) {
    &usage;
}
sub verbose {
    my ($str) = @_;
    if ($opt_v) {
        print STDERR "** HTMLfix:Verbose: $str\n";
    }
}
sub warning {
    my ($str) = @_;
    if (not $opt_q) {
        print STDERR "** HTMLfix:Warning: $str\n";
    }
}

#
#   read input file
#
if (($#ARGV == 0 and $ARGV[0] eq '-') or $#ARGV == -1) {
    $in = new IO::Handle;
    $in->fdopen(fileno(STDIN), 'r');
    local ($/) = undef;
    $buffer = <$in>;
    $in->close;
}
elsif ($#ARGV == 0) {
    $in = new IO::File;
    $in->open($ARGV[0]);
    local ($/) = undef;
    $buffer = <$in>;
    $in->close;
}
else {
    &usage;
}

#
#   processing loop
#
$bytes = 0;

#
#   FIXUP1: add WIDTH/HEIGHT/ALT attributes to <img>-tags
#
sub ProcessImgTag {
    my ($tag) = @_;
    my ($begin, $attr, $end, $image, $width, $height);

    ($begin, $attr, $end) = ($tag =~ m|^(<IMG\s+)(.+)(\s*>)$|is);

    if (   $attr =~ m|SRC\s*=\s*"([^"]*)"|is
        or $attr =~ m|SRC\s*=\s*(\S+)|is    ) {
        $image = $1;

        ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
         $atime,$mtime,$ctime,$blksize,$blocks) = stat($image);
        $bytes += $size;

        #   add an ALT="" tag to make HTML lints happy
        if (    $attr !~ m|ALT\s*=\s*"[^"]*"|is
            and $attr !~ m|ALT\s*=\s*\S+|is) {
            &verbose("adding ALT for $image");
            $attr .= ' alt=""';
        }

        #   add WIDTH and HEIGHT to speed up display
        $width  = -1;
        $height = -1;
        $scale  =  1;
        if (   $attr =~ m/WIDTH\s*=\s*([0-9%]+|\*)/is
            or $attr =~ m/WIDTH\s*=\s*"([0-9%]+|\*)"/is) {
            $width = $1;
        }
        if (   $attr =~ m/HEIGHT\s*=\s*([0-9%]+|\*)/is
            or $attr =~ m/HEIGHT\s*=\s*"([0-9%]+|\*)"/is) {
            $height = $1;
        }
        if (   $attr =~ s/SCALE\s*=\s*([0-9]+)%//is
            or $attr =~ s/SCALE\s*=\s*"([0-9]+)%"//is) {
            $scale = $1 / 100;
        }
        if (   $attr =~ s/SCALE\s*=\s*([0-9.]+)//is
            or $attr =~ s/SCALE\s*=\s*"([0-9.]+)"//is) {
            $scale = $1;
        }
        if ($width  eq '*' or $width  == -1 or
            $height eq '*' or $height == -1   ) {
            if (-f $image) {
                ($Pwidth, $Pheight) = Image::Size::imgsize($image);

                #    width given, height needs completed
                if ((not ($width  eq '*' or $width  == -1)) and 
                         ($height eq '*' or $height == -1)     ) {
                    if ($width == $Pwidth) {
                        $Nheight = $Pheight;
                    }
                    else {
                        $Nheight = int(($Pheight/$Pwidth)*$width);
                    }
                }
                #   height given, width needs completed
                elsif ((not ($height eq '*' or $height == -1)) and 
                            ($width  eq '*' or $width  == -1)     ) {
                    if ($height == $Pheight) {
                        $Nwidth = $Pwidth;
                    }
                    else {
                        $Nwidth = int(($Pwidth/$Pheight)*$height);
                    }
                }
                #   both width and height needs completed
                elsif (($height eq '*' or $height == -1) and 
                       ($width  eq '*' or $width  == -1)    ) {
                    $Nwidth  = $Pwidth;
                    $Nheight = $Pheight;
                }

                #   optionally scale the dimensions
                if ($scale != 1) {
                    $Nwidth  = int($Nwidth  * $scale);
                    $Nheight = int($Nheight * $scale);
                }

                #   now set the new values
                if ($width eq '*') {
                    $attr =~ s|(WIDTH\s*=\s*)\S+|$1$Nwidth|is;
                    &verbose("substituting width for $image: ``width=$Nwidth''");
                }
                elsif ($width == -1) {
                    $attr .= " width=$Nwidth";
                    &verbose("adding width for $image: ``width=$Nwidth''");
                }
                if ($height eq '*') {
                    $attr =~ s|(HEIGHT\s*=\s*)\S+|$1$Nheight|is;
                    &verbose("substituting height for $image: ``height=$Nheight''");
                }
                elsif ($height == -1) {
                    $attr .= " height=$Nheight";
                    &verbose("adding height for $image: ``height=$Nheight''");
                }
            }
            else {
                #   complain
                &verbose("cannot complete size of $image: file not found");
                #   and make sure the =* placeholder constructs are removed
                $attr =~ s|WIDTH\s*=\s*\*||is;
                $attr =~ s|HEIGHT\s*=\s*\*||is;
            }
        }
    }

    $tag = $begin . $attr . $end;
    return $tag;
}
$bufferN = '';
while ($buffer) {
    if ($buffer =~ m|^(.*?)(<img\s+[^>]+>)(.*)$|is) {
        ($pre, $tag, $buffer) = ($1, $2, $3);
        $bufferN .= $pre . &ProcessImgTag($tag);
        next;
    }
    else {
        $bufferN .= $buffer;
        last;
    }
}
$buffer = $bufferN;

#
#   FIXUP 2: change <center>..</center> to <div align=center>..</div>
#
$buffer =~ s|<center>|<div align=center>|isg;
$buffer =~ s|</center>|</div>|isg;

#
#   FIXUP 3: add quotations to attribute values and
#            add missing '#' char to color attributes 
#
$bufferN = '';
while (($prolog, $tag, $epilog) = 
       ($buffer =~ m|^(.*?)(<[a-zA-Z_][a-zA-Z0-9-_]*.*?>)(.*)$|s)) {
    $tag =~ s|([A-Za-z_-]+=)([^\s\f\"\'>]+)([\s\n>])|$1"$2"$3|isg;
    $tag =~ s|([A-Za-z_-]+=")([0-9A-Fa-f]{6}"[\s\n>])|$1#$2|sg;
    $bufferN .= $prolog.$tag;
    $buffer = $epilog;
}
$buffer = $bufferN.$buffer;

#
#   FIXUP 4: paragraph indentation 
#
sub ProcessIndentContainer {
    my ($attr, $data) = @_;
    my ($num, $size, $pad, $prefix);
    
    #   determine amount of padding
    $num  = 0;
    $size = 4;
    $attr =~ s/num\s*=\s*"?(\d+)"?/$num = $1, ''/ige;
    $attr =~ s/size\s*=\s*"?(\d+)"?/$size = $1, ''/ige;

    #   pad the data
    if ($num > 0) {
        $pad = ' ' x ($num * $size);
        $data =~ s/^/$pad/mg;
    }
    elsif ($num == 0) {
        ($prefix) = ($data =~ m|^\n*([ \t]+)|s);
        if (length($prefix) > 0) {
            $data =~ s/^$prefix//mg;
        }
    }
    return $data;
}
$bufferN = '';
while ($buffer) {
    if ($buffer =~ m|^(.*?)<indent(.*?)>(.*?)</indent>(.*)$|is) {
        ($pre, $attr, $data, $buffer) = ($1, $2, $3, $4);
        $bufferN .= $pre . &ProcessIndentContainer($attr, $data);
        next;
    }
    else {
        $bufferN .= $buffer;
        last;
    }
}
$buffer = $bufferN;

#
#   FIXUP 5: out-commenting tags
#
$buffer =~ s|<[a-z_][a-z0-9-]*#.*?>||isg;
$buffer =~ s|</[a-z_][a-z0-9-]*#>||isg;

#
#   FIXUP 6: tag case translation
#
sub doit_upper {
    ($prefix, $body) = @_;
    $prefix =~ s/^(.+)$/\U$1\E/;
    $body =~ s/(\s+[a-zA-Z][a-zA-Z0-9_-]*)(\s*=\s*[^"\s]+|\s*=\s*"[^"]*"|>|\s)/\U$1\E$2/sg;
    return $prefix.$body;
}
sub doit_lower {
    ($prefix, $body) = @_;
    $prefix =~ s/^(.+)$/\L$1\E/;
    $body =~ s/(\s+[a-zA-Z][a-zA-Z0-9_-]*)(\s*=\s*[^"\s]+|\s*=\s*"[^"]*"|>|\s)/\L$1\E$2/sg;
    return $prefix.$body;
}
sub ProcessTagConv {
    my ($attr, $data) = @_;
    my ($case);
    
    #   determine case translation type
    $case = 'upper';
    $attr =~ s/case\s*=\s*"?(upper|lower)"?/$case = lc($1), ''/ige;

    #   and then translate the data
    if ($case eq 'upper') {
        $data =~ s/(<[a-zA-Z][a-zA-Z0-9_-]*\s*>)/\U$1\E/sg;
        $data =~ s/(<[a-zA-Z][a-zA-Z0-9_-]*)(\s+.*?>)/&doit_upper($1,$2)/sge;
        $data =~ s/(<\/[a-zA-Z][a-zA-Z0-9_-]*\s*>)/\U$1\E/sg;
    }
    else {
        $data =~ s/(<[a-zA-Z][a-zA-Z0-9_-]*\s*>)/\L$1\E/sg;
        $data =~ s/(<[a-zA-Z][a-zA-Z0-9_-]*)(\s+.*?>)/&doit_lower($1,$2)/sge;
        $data =~ s/(<\/[a-zA-Z][a-zA-Z0-9_-]*\s*>)/\L$1\E/sg;
    }
    return $data;
}
$bufferN = '';
while ($buffer) {
    if ($buffer =~ m|^(.*?)<tagconv(.*?)>(.*?)</tagconv>(.*)$|is) {
        ($pre, $attr, $data, $buffer) = ($1, $2, $3, $4);
        $bufferN .= $pre . &ProcessTagConv($attr, $data);
        next;
    }
    else {
        $bufferN .= $buffer;
        last;
    }
}
$buffer = $bufferN;

#
#   statistic
#
&verbose("Total amount of images: $bytes bytes");

#
#   write to output file
#
if ($opt_o eq '-') {
    $out = new IO::Handle;
    $out->fdopen(fileno(STDOUT), "w");
}
else {
    $out = new IO::File;
    $out->open(">$opt_o");
}
$out->print($buffer);
$out->close;

exit(0);

##EOF##
