#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
    if $running_under_some_shell;

#========================================================================
# linklint  - a fast link checker and web site maintenance tool.
# Copyright (C) 1997 James B. Bowlin.  All rights reserved.
# This is a total rewrite of Rick Jansen's 4/15/96 version of webxref.
# Thanks to Scott Perry, Patrick Meyer, Ernst Went, Cathy Sandifer,
# and Brian Kaminer for many excellent suggestions.
# Bugs, comments, suggestions welcome: bowlin@sirius.com
# Updates available at http://www.goldwarp.com/bowlin/linklint/
#========================================================================

$version = "1.35";
$date    = "May 24, 1997";

$ErrUsage = qq~

Usage:   linklint [-flags] [-option expression] filelist
flags:   -A -a -c -f -h -l -n -nocache -o -p -r -s -u -w -x
options: -g -t -tr -i -ir -delay -map -root -server -timeout
Use linklint with no arguments for detailed usage. Use -h for help.~;

$Usage = qq~
linklint version $version $date by Jim Bowlin (bowlin\@sirius.com)
Usage: linklint [-flags] [-option expression] filelist
Flags and options:
 -A -All        Print everything (same as -list -anchor -xref -warn).
 -a -anchor     Print a list of named anchors.
 -c -case       Check case of all linked local files (Windows).
 -f -forward    Print forward links for each html file.
 -h -help       Print a help page.
 -l -list       Print lists of found files and links.
 -n -net        Check remote "http://" URL's via network.
 -nocache       Turn off caching of robots.txt info in linklint.bot file.
 -o -one        Only check files on command line (no recursion).
 -p -progress   Print "checking file" to STDERR as each file is checked.
 -r -redirect   Check for redirects in remote html headers.
 -s -summary    Print only a summary of files found/missing.
 -u -unused     Print all unreferenced (orphan) files.
 -w -warn       Print warning messages.
 -x -xref       Print cross references for each link.
 -g       file  Send output to file instead of STDOUT
 -t/tr    exp   Only print files that match exp/regexp
 -i/ir    exp   Ignore files with fullpath matching exp/regexp.
 -delay   ##    Set delay between http requests to ## seconds (default 2).
 -root    dir   Use dir as root directory instead of CWD.
 -server  host  Treat remote http://host/file as local file.
 -timeout ##    Set network timeout to ## seconds (default 15).
 -map file[=replace]  Used to identify server imagemap CGI.
~;

$Help = qq~
Updates are available at: http://www.goldwarp.com/bowlin/linklint/
Linklint checks all links on a web site and provides cross reference lists for
all files found.  It must be run from the web server root directory otherwise
links starting with "/" will be interpreted incorrectly. Run linklint with no
arguments to get detailed usage.  Single character flags may be combined.
By default the file linklint.bot will be created in your home directory to
cache robot exclusion information. Examples:

Print a brief summary             linklint -s
Check all links on site           linklint .
... and print cross references    linklint -x
... and check remote links        linklint -xn
Check www.goldwarp.com            linklint  http://www.goldwarp.com
Check a list of links or files    linklint  \@links.txt
(one per line)
Make a list of all files \& links  linklint -l     > linklint.log
Make a complete listing of site   linklint -A     > linklint.log
(same as above)                   linklint -laxw  > linklint.log
Re-check remote links             linklint -n     \@\@linklint.log
Only check files in subdir/       linklint -o     subdir/*.html
Print all links to foo.html       linklint -xlt   foo.html
List all unused files             linklint -u
~;

#===========================================================================

# Files to try in case of a directory reference like "subdir/"
# These are checked in order.  Please modify as appropriate for your system.

@DefaultFiles = ('home.html', 'index.html', 'index.shtml', 'index.htm',
    'index.cgi', 'wwwhome.html', 'welcome.html');

$htmlexts  = 'html|shtml|htm';          #----- html extensions

$applicexts =
    'tar|dump|readme|bin|exe|hqx|pdf|ps|eps|ai|rtf|dvi|tex|doc';

$AnyCgi = '(/cgi-bin/)|(\.(cgi|pl)$)';  #----- regexp to find CGI files

#---------------------------------------------------------------------------
# Formats for printing various lists
#---------------------------------------------------------------------------

@fileformat = (
     "html##\.($htmlexts)\$",
     "image##\.(gif|jpg|jpeg|tif|tiff|pic|pict|hdf|ras|xbm)\$",
     "audio##\.(au|snd|wav|aif|aiff|midi|mid)\$",
     "video##\.(mpg|mpeg|avi|qt|mov)\$",
     "text##\.txt\$",
     "shockwave##\.dcr\$",
     "map##\.map\$",
     "applet##\.class\$",
     "gzip##\.gz\$",
     "zip##\.zip\$",
     "cgi##$AnyCgi",
     "application##\.($applicexts)\$",
     "default index##\(index.html\)",
);

foreach (@fileformat) {
    local($name,$data) = split("##", $_);
    push(@filesplit,"found %d $name file%s<->$data");
    push(@lostsplit,"ERROR: missing %d $name file%s<->$data");
}
push(@filesplit,"unknown<->found %d unknown file%s");
push(@filesplit,"unknown<->ERROR: missing %d unknown file%s");

@schema = ('http','gopher','mailto','ftp','file','news','telnet');

foreach (@schema) {
    push(@schemesplit, "found %d remote $_ link%s<->^$_:");
}
push(@schemesplit, "unknown<->found %d remote unknown link%s");

@orphform = ("%d director%y with unused files/subdirectories",1,
    "contains %d unused file%s/subdirector%y:");

@forwardform = ("%d file%s with forward links",1, "contains %d link%s:");

#---------------------------------------------------------------------------
# HTTP status codes and messages
#---------------------------------------------------------------------------

%HttpOk = (
    200, 'OK',
    201, 'Created',
    202, 'Accepted',
);

%HttpMoved = (
    301, 'Moved permanently',
    302, 'Moved temporarily',
    350, 'Redirected',
);

%HttpFail = (
     -1, 'Could not find host. Network connection ok?',
     -2, 'Could not open socket',
     -3, 'Could not bind socket',
     -4, 'Could not connect',
     -5, 'Timed out waiting for response',
     -6, 'Not an http link',
     -7, 'No status. Will try GET method',
     -8, 'Malformed status line',
    -10, 'Not checked. Disallowed by robots.txt',
    -11, 'Infinite redirect loop',
    204, 'No content',
    301, 'Moved permanently but no new URL given',
    302, 'Moved temporarily but no new URL given',
    400, 'Bad request',
    401, 'Access not authorized',
    403, 'Access forbidden',
    404, 'Not found',
    500, 'Internal server error',
    501, 'Service not implemented on server',
    502, 'Server temporarily overloaded',
    503, 'Gateway timeout',
);

#---------------------------------------------------------------------------
# Misc. startup values
#---------------------------------------------------------------------------

$Timeout    = 15;             # how long to wait for a response
$Delay      = 2;              # how long to delay between requests
$Retry      = 4;              # max number of server ip's to try
$Expire     = 30;             # update robots.txt entries every 30 days

$BasePath   = '';             # these are set by &BaseTag()
$BaseHost   = '';             # they are used in &UniqueUrl()

$ServerUrl  = '';             # ie http://www.goldwarp.com
$ServerRoot = '';             # ie /usr/local/wwwdocs/goldwarp
$referer    = '';             # used by UniqueUrl()
$MapFile    = '';             # used by UniqueUrl() to get pathinfo
$MapReplace = '';

$arg_A = $arg_c = $arg_h = $arg_w = $arg_r = 0; # prevent warnings

$Headline   = "#" . "-" x 60 . "\n"; # used to print lists

#==========================================================================
# 'Code' starts here
#==========================================================================

    @ARGV || ((print $Usage), exit );

    $DOS = ($ENV{'windir'}) ? 1 : 0;           # Detect Windows/DOS ?
    $pwdprog = $DOS ? 'cd' : 'pwd';            # program to pwd

    $Home = &GetCwd;

    #----- Cache excluded directories in $BotLog

    $BotLog = $ENV{'LINKLINTBOT'} ||
        ($ENV{'HOME'} && "$ENV{'HOME'}/linklint.bot") || "$Home/linklint.bot";

    (@infiles) = &ReadArgs(@ARGV);

    #----- do the simple flag things here

    $arg_h && ( (print $Help), exit );
    $arg_A && ( $arg_a = $arg_l = $arg_w = $arg_x = 1 );
    $arg_t && ( $arg_tr = $arg_t, $arg_tr =~ s/(\W)/\\$1/g );
    $arg_i && ( $arg_ir = $arg_i, $arg_ir =~ s/(\W)/\\$1/g );

    $MapFile && $MapFile =~ m/^([^=]+)=([^=]*)$/ &&
        ( $MapFile = $1, $MapReplace = $2);

    if ($arg_g) {
        open($arg_g,">$arg_g") || &Error("Could not open $arg_g for output");
        select($arg_g);
    }

    &WantNumber($Timeout, "-timeout");
    &WantNumber($Delay,   "-delay");
    &WantNumber($Expire,  "-expire");

    $DOS && $Timeout != 15 &&
        &Warn("Timeout does not work under DOS/Windows");

    #----- Add http:// to ServerUrl if needed

    $ServerUrl && $ServerUrl !~ m#^http://#i &&
        ($ServerUrl = "http://$ServerUrl");

    #----- Default file is "./"

    @infiles || ( @infiles = ("./") );

    #----- read in @@linklint.log if specified

    if (@infiles == 1 && $infiles[0] =~ s/^\@\@// ) {
        &ReadBack( shift(@infiles) || 'linklint.log');
        $ReadBack = 1;
    }

    #----- read in @links.txt if specified

    elsif (@infiles == 1 && $infiles[0] =~ s/^\@// ) {
        $_ = shift @infiles;

        open(FILE, $_) || &Error("could not open file <$_> $!");

        while (<FILE>) {
            next if /^#/;                # allow '#' to start comment line
            s/\s+$//;                    # trailing whitespace & \n
            s/^\s+//;                    # leading whitespace
            next unless $_;              # skip blank lines
            $_ = $1 if m#(http://\S+)#i; # strip any junk around an http://
            push(@infiles, $_);
            $arg_db && print "[$_]\n";
        }
        close(FILE);
    }

    #----- Chdir to $ServerRoot directory AFTER @list & @@site are read.

    $ServerRoot = $ServerRoot || $Home;   # Default to CWD.
    chdir($ServerRoot) || &Error("Invalid root directory $ServerRoot");
    $CWD = $ServerRoot;

    #----- check for file existence or http:// reference (unless readback)

    unless ($ReadBack) {

        foreach (@infiles) {
            m#^http://#i && (($arg_n = 1), next);
            local($temp) = m#(.+)/$# ? $1 : $_;          # strip trailing '/'
            $temp =~ m#^/# && ( $temp = "$ServerRoot$temp" );
            -e $temp ||
                &Error("could not find file $_\nin directory $ServerRoot");
            $_ = &UniqueUrl($_);
            $arg_o && ($NeedCheck{$_} = 1);
        }

        #----- All the searching happens in this loop

        $Time = -time;
        foreach (@infiles) { &linklint(0, &UniqueUrl($_), "<"); }
        $Time += time;
    }

    $arg_u && &DoOrphs(*OrphList,*DirList);     # Check for orphans
    $arg_c && &CheckCase;

    #----- Here comes the LOCAL printout

    ($arg_l || $arg_s) && do {
        print "\n";
        &PrintList(*DirList,   "found %d director%y", 0);
        &PrintLists(*FileList, *filesplit);
        &PrintLists(*ExtLink,  *schemesplit);
    };

    ($arg_a || $arg_s) && &PrintList(*Anchor,   "found %d named anchor%s");
    ($arg_u || $arg_s) && &PrintList(*OrphList, @orphform);
    ($arg_f || $arg_s) && &PrintList(*Forward,  @forwardform);

    #----- Local error messages

    &PrintLists(*LostFile,  *lostsplit);
    &PrintList(*BadCase,    "ERROR: %d file%s with case mismatch");
    &PrintList(*LostAnchor, "ERROR: missing %d named anchor%s");

    #----- Check remote links

    $arg_n && ((print "\n"), &CheckUrls(*ExtLink));

    #----- Print summary

    $DirCnt  = keys %DirList;
    $FileCnt = keys %FileList;
    $MissCnt = keys %LostFile;
    $HtmlCnt = keys %Checked;
    $OkCnt   = keys %UrlOk;
    $FailCnt = keys %UrlFail;
    $HttpCnt = keys %UrlChecked;
    $WarnCnt = keys %Warn;
    $ErrCnt  = keys(%LostFile) + keys(%LostAnchor) + keys(%BadCase) + $FailCnt;

    print "\n";
    $WroteCache && print "Wrote robots.txt cache to $BotLog\n";
    $FileCnt && ( print "Linklint ", $ReadBack ? "read back " : "found ",
        &Plural($FileCnt, "%d file%s in "),
        &Plural($DirCnt,  "%d director%y"), $HtmlCnt ?
        &Plural($HtmlCnt, " and parsed %d file%s.\n") : ".\n" );

    $MissCnt && print &Plural($MissCnt,"There %w %d missing file%s.\n");

    $HttpCnt && ( print "Linklint checked ",
        &Plural($HttpCnt, "%d remote link%s, "),
        $OkCnt, " checked ok, ", $FailCnt, " failed.\n" );

    print &Plural($ErrCnt,"%N error%s, "), &Plural($WarnCnt,"%n warning%s.");

    $Time && $Time > 4 &&
        printf("  Parsed ~ %1.1f files/second.", $HtmlCnt /$Time);

    print "\n";
    0 && &GetText && &GetRedirect;  # prevent warning messages
    exit;

#==========================================================================
# End of Main code
#==========================================================================

#--------------------------------------------------------------------------
# linklint($level,$link,$referer)
#
#     $level    keeps track of depth of recursion.
#     $link     is the URL or file to check
#     $referer  is the file that referenced $link.
#     Recursively get all referenced files from a file.
#--------------------------------------------------------------------------

sub linklint
{
    local($level,$link,$referer) = @_;
    local(%newlist,%wantanch);

    $arg_f && $Forward{$referer} && ($Forward{$referer} .= "<$link");

    local($scheme)  = ($link =~ m#^([.+\-\w]+:)(.*)#) ? $1 : '';  # get scheme
    local($path)    = $scheme ? $2 : "$ServerRoot$link";          # get path

    if ($scheme && $scheme ne 'FILE:' ) {
        $ExtLink{$link} = ($arg_x && $ExtLink{$link}) ?
            "$ExtLink{$link}<$referer" : $referer;
        return;
    }

    ($arg_ir && $link =~ m/$arg_ir/o) && return;   # ignore these files
    $arg_db && printf("%4d %-30s %s\n", $level, $link, $referer);

    if ( $path =~ s#/$## ) {                       # directory reference?
       $path = &LookupDir($path,$referer);
       $path && &linklint($level,$path,$referer);
       return;
    }

    local($dir)  = $path;                          # split path into dir/file
    local($file) = $dir =~ s#/([^/]+)$## ? $1 : '';
    $dir = $dir || '/';

#----- Check our caches for previous visits.

PRE_CHECK:
{
    if ( $FileList{$link} ) {
        $arg_x && ($FileList{$link} .= "<$referer");
        ( $arg_o && ($NeedCheck{$link} || $NeedAnchor{$link})) || return;
        $arg_db && print "rechecking $link\n";
        last PRE_CHECK;
    }
    elsif ( $LostFile{$link} ) {
        $arg_x && ($LostFile{$link} .= "<$referer");
        return;
    }

    unless ( -f $path ) {                 # this will be a lost file
        if ( -d _ ) {                     # unless it is a directory
           $referer ne "<" &&
               &Warn($referer, qq~directory $path should end with "/"\n~);
           $path = &LookupDir($path,$referer);
           $path && &linklint($level,$path,$referer);
           return;
        }

        #----- check here for added path info ie /path/file/pathinfo

        $path =~ m#^(.*\.(cgi|exe|pl))/# && -f $1 &&
            (&linklint($level, $1, $referer), return);

        $arg_db && print "ERROR: missing $path\n";
        $LostFile{$link} = $referer;
        return;
    }

    $DirList{$dir}   = 1;        # Cache the directory ( -f $path worked above)
    $FileList{$link} = $referer; # Cache the file (ditto)

    ((stat(_))[2] & 4 == 0) && &Warn($link, "not world readable");

    #----- Exit early if we are checking -one file.

    $arg_o && $level > 0 && ! $NeedAnchor{$link} && ! $NeedCheck{$link} &&
        (($arg_db && print "skipping $link\n"), return);

} # PRE_CHECK:

    # We jump here to parse a file that was found and not parsed
    # because we were in -one mode and previous links had no #fragment.

    #----- Only parse html files and only parse once

    $link =~ m/\.($htmlexts|map)$/io || return;

    $Checked{$link}++ && return;

    $arg_p && print STDERR "checking $link\n";

    &PushDir($dir) || return;                # Move to the directory

    unless (open($path, $path)) {
        print STDERR "Error: Could not open file <$path> $!\n";
        &PopDir;
        return;
    };

    if ($link =~ m/\.map$/ ) {
        &ParseMap($path, *newlist);
    }
    else {
        &ParseFile($path,$link,*newlist,*Anchor,*Redirect,*wantanch);
    }
    close($path);
    
    #---- No recursion in -one mode unless ...

    if ($arg_o && $level > 0 && ! $NeedCheck{$link} ) {
        $arg_db && print "no recursion into $link\n";
        &PopDir;
        return;
    }

    $Forward{$link} = "<" if $arg_f;          # this primes forward

    #----- recurse into all links found in this file

    foreach (keys %newlist) { &linklint($level+1, $_, $link); }

    #----- check anchors in $link after all files it refered to are parsed.

    foreach (keys %wantanch) {
        $Anchor{$_} && ($Anchor{$_} .= "<$link", next);
        $LostAnchor{$_} = ($arg_x && $LostAnchor{$_}) ?
            "$LostAnchor{$_}<$link" : $link;
    }
    &PopDir;
}

#--------------------------------------------------------------------------
# ParseMap($handle,*list)
#     Reads a map file and tries to extract all links.
#--------------------------------------------------------------------------

sub ParseMap
{
    local($handle,*list) = @_;

    while (<$handle>) {
        next unless m#(http://\S+)#i; # strip any junk around an http://
        $list{&UniqueUrl($1)} = 1;
    }
}
#--------------------------------------------------------------------------
# ParseFile($handle, $link, *list, *anchlist, *redirect, *wantanchor)
#
#     Extracts all (?) links from the file by setting %newlist{link} = "1".
#     Links are expanded to full unique URL's or paths.
#     %anchlist   filled with list of named anchors found
#     %redirect   filled with redirected url
#     %wantanchor filled with anchors to find
#     %NeedAnchor filled with URLs in wantanchor
#--------------------------------------------------------------------------

sub ParseFile
{
    local($handle,$link,*list,*anchlist,*redirect,*wantanchor) = @_;
    local($tag,$code,$temp,$url);

    $/ = "<";                                # use "<" as newline seperator

TAG:
    while (<$handle>) {

        if (m/^\!\-\-/) {
            while ($_ !~ /\-\-\>/ ) {        # ignore tags inside comments
               ($_ = <$handle>) && next;
               &Warn($link, "unterminated comment");
               last TAG;
            }
            next TAG;
        }
        ($tag, $_) = m/^(\w+)(\s+[^>]+)>/;
        $_ || next;
        $tag =~ tr/A-Z/a-z/;                 # convert tag to lower case
        if ( $tag =~ m/^a$/ ) {
            ( /\sname\s*=\s*"([^"]*)"/i || /\sname\s*=\s*(\S+)/i ) && (
                ($anchlist{"$link#$1"} = "<") );

            ( /\shref\s*=\s*"([^"]*)"/i || /\shref\s*=\s*(\S+)/i )|| next;
            $temp = $1;
            $temp =~ m/^#/ && ($temp = "$link$temp");
            $url = &UniqueUrl($temp);
            $list{$url} = 1;
            next if ($temp !~ m/(#.*)$/ ) ||
                ($arg_ir && $url =~ m/$arg_ir/o) ||            # ignored file
                ($url =~ m#^([.+\-\w]+:)#  && $1 ne 'FILE:');  # remote file
            $wantanchor{"$url$1"} = 1,
            $NeedAnchor{$url}     = 1;
        }
        elsif ( $tag =~ m/^base$/ ) {
            /\shref\s*=\s*"?([^\s"]+)/i && &BaseTag($1);
        }
        elsif ( $tag =~ m/^(bgsound|frame|input|script|embed)$/ ) {
            $list{&UniqueUrl($1)} = 1 if /\ssrc\s*=\s*"?([^\s"]+)/i;
        }
        elsif ( $tag =~ m/^area$/ ) {
            $list{&UniqueUrl($1)} = 1 if /\shref\s*=\s*"?([^\s"]+)/i;
        }
        elsif ( $tag =~ m/^body$/ ) {
            $list{&UniqueUrl($1)} = 1 if /\sbackground\s*=\s*"?([^\s"]+)/i;
        }
        elsif ( $tag =~ m/^img$/ ) {
            $list{&UniqueUrl($1)} = 1 if /\ssrc\s*=\s*"?([^\s"]+)/i;
            $list{&UniqueUrl($1)} = 1 if /\slowsrc\s*=\s*"?([^\s"]+)/i;
            $list{&UniqueUrl($1)} = 1 if /\sdynsrc\s*=\s*"?([^\s"]+)/i;
        }
        elsif ( $tag =~ m/^form$/ ) {
            $list{&UniqueUrl($1)} = 1 if /\saction\s*=\s*"?([^\s"]+)/i;
        }
        elsif ( $tag =~ m/^applet$/ ) {
            next unless m/\scode\s*=\s*"?([^\s"]+)/i;
            $code = "$1.class";
            if (/\scodebase\s*=\s*"?([^\s"]+)/i ) {
                ($temp = $1) =~ s#/$##;
                $code = "$temp/$code";
            }
            $list{&UniqueUrl($code)} = 1;
        }
        elsif ( $tag =~ m/^meta$/ ) {
             next unless /\shttp-equiv\s*=\s*"?refresh/i;
             next unless /\scontent\s*=\s*"[^"]*\surl\s*=\s*([^\s"]+)/i;
             $url = &UniqueUrl($1);
             $list{$url} = 1;
             $redirect{$link} = $url;
             $url =~ m#^\w+://# && next;
             &Warn($link, "re-direct $url should be absolute");
        }
    }
    $BasePath = '';     # reset to null string for next file's use
    $BaseHost = '';
    $/ = "\n";          # reset line seperator to "\n"
}

#--------------------------------------------------------------------------
# ParseRedirect(FILE, $link, $url)
#
#     Reads text from FILE until end of <head> element.  Uses $link for
#     error messages.  Fills $url with redirected $url if given otherwise
#     fills $url with ''.
#--------------------------------------------------------------------------

sub ParseRedirect
{
    local(*FILE, $link) = @_;
    local($tag,$url) = ('','');
    $/ = "<";                           # use "<" as newline seperator
    while (<FILE>) {
        $arg_db && $arg_p && print;
        last if m#^(/head|body|h\d|font)#i;
        next unless m/^meta(\s+[^>]+)>/;
        $_ = $1;
        /\shttp-equiv\s*=\s*"?refresh/i &&
           m/\scontent\s*=\s*"[^"]*\surl\s*=\s*([^\s"]+)/i || next;
        $url = &UniqueUrl($1);
        $url =~ m#^\w+://# ||
            &Warn($link, "re-direct $url should be absolute");
        last;
    }
    $arg_db && $arg_p && print "\n";
    $/ = "\n";                          # use "\n" as newline seperator
    $_[2] = $url;                       # return value via 3rd argument
}

#--------------------------------------------------------------------------
# $path = LookupDir($dir, $referer)
#
#     Trys to find a default file in the directory $dir.  If not found we
#     record this and return 0.  If found we return the path to the found
#     file.  %DefDir contains $dir keys and $dir/$file values.  It's used
#     to prevent checking for default files twice in the same directory.
#     It's also used to add $referer to the cross reference list of
#     $dir/$file without having to look for $file again.
#--------------------------------------------------------------------------

sub LookupDir
{
    local($dir,$referer) = @_;
    local(%file);
    $DefDir{$dir} && return $DefDir{$dir};
    local($errdir) = &Inbounds("$dir/(index.html)", 1);
    $LostFile{$errdir} && ($LostFile{$errdir} .= "<$referer", return 0);
    $arg_db && print "looking for index.html in $dir/\n";
    opendir(DIR,$dir) || ($LostFile{$errdir} = $referer, return 0);
    grep($file{$_} = 1, readdir(DIR));
    closedir(DIR);
    foreach (@DefaultFiles) {
        next unless $file{$_} && -f "$dir/$_";
        return $DefDir{$dir} = &Inbounds("$dir/$_");
    }
    $LostFile{$errdir} = $referer;
    return 0;
}

#--------------------------------------------------------------------------
# DoOrphs(*orphlist, *dirlist)
#
#     Checks every directory in DirList and creates a list of
#     all files that have not been checked by linklint
#--------------------------------------------------------------------------

sub DoOrphs
{
    local(*orphlist,*dirlist) = @_;
    local(@files,$link,$redir);
    if ($DOS) {
        local($lcfile, $lcdir, $lclink);
        local(%lcfile);
        grep((tr/A_Z/a-z/, $lcfile{$_} = 1), keys %FileList);
        local(%lcdir);
        grep((tr/A_Z/a-z/, $lcdir{$_} = 1), keys %DirList);
    }
    foreach $dir (sort keys %dirlist) {
        $DOS && ($lcdir = $dir) =~ tr/A-Z/a-z/;
        $arg_p && print STDERR "checking for orphans: $dir\n";
        &PushDir($dir) || next;
        opendir(DIR,".") || (
            (print STDERR "Error: could not read directory $dir\n"), next);

        @files = grep(!/^\./, readdir(DIR));
        closedir(DIR);

        foreach $file (sort @files) {
            $link = &Inbounds("$dir/$file");
            next if $FileList{$link} || $DirList{"$dir/$file"};
            if ($DOS) {
                ($lcfile = $file) =~ tr/A-Z/a-z/;
                ($lclink = $link) =~ tr/A-Z/a-z/;
                ($lcfile{$lclink} || $lcdir{"$lcdir/$lcfile"}) && next;
            }
            -d $file && ($file .= "/");
            $orphlist{$dir} = $orphlist{$dir} ?
                "$orphlist{$dir}<$file" : $file;
            next unless $link =~ m/\.($htmlexts)$/io;

            #----- parse html files for possible redirects

            open(FILE,$file) || (&Warn($link,"could not open $file"), next);
            &ParseRedirect('FILE', $link, $redir);
            close(FILE);
            $redir && ($orphlist{$dir} .= " => $redir");
        }
        &PopDir;
    }
}

#--------------------------------------------------------------------------
# CheckCase()
#
#     Checks every directory in DirList and creates a list of
#     all files that have not been checked by linklint
#--------------------------------------------------------------------------

sub CheckCase
{
    local(%files, $file);
    foreach $dir (sort keys %DirList) {
        $arg_p && print STDERR "checking for case: $dir\n";
        &PushDir($dir) || next;
        opendir(DIR,".") || (
            (print STDERR "Error: could not read directory $dir\n"), next);
        grep($files{&Inbounds("$dir/$_")} = 1, readdir(DIR));
        closedir(DIR);
        &PopDir;
    }
    foreach $link (keys %FileList) {
        next if $files{$link};
        ($file) = grep(/$link/i, keys %files);
        $file = $file || $link;                   
        $BadCase{$file} = $FileList{$link};      
    }
}

#--------------------------------------------------------------------------
# $url = UniqueUrl($url)
#
#    Makes a URL Unique.  Decodes "/../" and "." in the path of full URL's
#    Does the same for relative URL's But also completes the path
#    if it does not start with a "/".  Strips off ?text and Strips off
#    #text if we are not checking anchors.
#--------------------------------------------------------------------------

sub UniqueUrl
{
    local($_) = @_;
    s#\\#/# && $referer &&                         # de-dos and warn
        &Warn($referer, qq~"\" should be "/" in "$_[0]"~);

    $MapFile && s/^$MapFile/$MapReplace/oi;        # get pathinfo

    local($scheme) = s#^([.+\-\w]+:)## ? $1 : '';  # strip off scheme
    local($host)   = s#(^//[^/]*)##    ? $1 : '';  # strip address & port
    $scheme =~ tr/A-Z/a-z/;                        # all schema lower case
    $host = "$scheme$host";
    $host = $host || $BaseHost;            # default to base
    $host = '' if $host eq $ServerUrl;     # check these locally
    s/\?.*$// unless $host;                # strip  query if local
    s/\#.*$//;                             # strip  anchor !!

    $_ = "$BasePath$_" unless m#^/#;       # add base path

    if ( m/&/ ) {
        s/&#(\d\d?\d?);/pack("c",$1)/ge;   # Thanks Scott!
        s/&amp;/&/g;
    }

    $host || ($_ = (m#^/#) ? "$ServerRoot$_" : "$CWD/$_");

    if ( m#/\.#) {
        $_ .= "/";                             # now contains path/
        while (s#/\./#/#) {;}                  #  /./        ->  /
        while (s#/[^/]*/\.\./#/#) {;}          #  /dir/../   ->  /
        s#/$##;                                # remove our trailing /
    }
    $host || ($_ = &Inbounds($_));
    return "$host$_";
}

#--------------------------------------------------------------------------
# Inbounds($link,nowarn)
#
#     Makes sure $link is under $ServerRoot.  Strips off $ServerRoot
#     if it is, otherwise warn and prepend "FILE:" to avoid confusion.
#     Give a 2nd argument to prevent warning message.
#--------------------------------------------------------------------------

sub Inbounds
{
    return $_[0] if $_[0] =~ s#^$ServerRoot##o;
    local($new) = "FILE:$_[0]";
    $arg_ir && $new =~ m/$arg_ir/o &&  return $new;
    @_ < 2 && &Warn($_[0], "is not under server root $ServerRoot");
    return $new;
}

#--------------------------------------------------------------------------
# Warn(@msg)
#     Registers this warning in %Warn.  Prints the warning if $arg_w.
#--------------------------------------------------------------------------

sub Warn
{
    local($msg) = join("\n    ", @_);
    return if $Warn{$msg}++;
    $arg_w && print "WARNING: ", $msg, "\n";
}

#--------------------------------------------------------------------------
# BaseTag($url)
#
#     Sets Global $BaseHost and $BasePath as defined in $url
#     My basepath is absolute to root of machine.  Theirs are not.
#     Remove $scheme$host if it matches mine.
#--------------------------------------------------------------------------

sub BaseTag
{
    local($_) = @_;
    local($scheme)  = s#^([.+\-\w]+:)##  ? $1 : '';
    local($host)    = s#^(//[^/]*)##     ? $1 : '';
    $BasePath  = $_;
    $scheme    =~ tr/A-Z/a-z/;
    $BaseHost  = "$scheme$host";
    $BaseHost  = '' if $BaseHost eq $ServerUrl;
}


#==========================================================================
#
#  NETWORK ROUTINES
#
#==========================================================================


#--------------------------------------------------------------------------
# CheckUrls(*list)
#
#    %HttpOk, %HttpMoved, %HttpError  {$status} = $message.
#    %UrlOk, %UrlFail                 {$url}    = $status
#--------------------------------------------------------------------------

sub CheckUrls
{
    local(*list) = @_;
    local($flag, $errmsg, @http);
    (%list && (@http = grep(/^http:/, keys %list) )) || return;

    &SetupSocket;
    $Now  = time;                                 # used for cache expiration
    $Then = $Now - (60 * 60 * 24 * $Expire);

    $arg_nocache || $arg_nobot || &ReadBotCache;

    foreach $url (sort @http) {
        ($arg_ir && $url =~ m/$arg_ir/o) && next;
        next if $UrlChecked{$url}++;
        $arg_p && print STDERR "checking $url\n";
        $arg_g && print "checking $url\n";
        $flag = &CheckUrl($url);
        $HttpMoved{$flag} && ($flag = &UrlMoved($flag, $url));
        $HttpOk{$flag}    && ($UrlOk{$url} = $flag, next);
        &UrlError($flag,$url);
    }
    $arg_nocache || $arg_nobot || &WriteBotCache;
}

#--------------------------------------------------------------------------
# $flag = UrlMoved($flag, $url)
#
#     Process 3XX status.  Recheck $url given back in Location field
#     of HttpHeader.  Continue until
#     a) non-3XX status,  b) infinite loop  c) already checked.
#--------------------------------------------------------------------------

sub UrlMoved
{
    local($flag, $url) = @_;
    local(%checked,$next);

    while ( $HttpMoved{$flag} ) {
        $checked{$url}++ && return -11;                     # infinite loop
        ($next = $HttpHeader{'Location'}) || return $flag;  # this is an error
        &UrlWarn($url, "$HttpMoved{$flag} ($flag) to", $next);
        $UrlOk{$next} && return $UrlOk{$next};
        $UrlFail{$next} && return $UrlFail{$next};
        $flag = &CheckUrl($url = $next);
    }
    return $flag;
}

#--------------------------------------------------------------------------
# UrlError($status, $url)
#
#     Gets error message associated with $status. Records $url as a failed
#     URL, prints out the error message and prints cross-refs if $arg_x.
#--------------------------------------------------------------------------

sub UrlError
{
    local($flag,$url) = @_;
    local($errmsg) = $HttpFail{$flag} || "unknown error";
    $errmsg .= " ($flag)" if $flag >= 0;
    !$HttpFail{$flag} && $HttpHeader{'status-msg'} &&
        ($errmsg .= " $HttpHeader{'status-msg'}");

    $UrlFail{$url} = $flag;
    print "ERROR: $url\n    $errmsg\n";
    $arg_x && $ExtLink{$url} && &PrintSubList($ExtLink{$url});
}

#--------------------------------------------------------------------------
# UrlWarn(@msg)
#
#     Prints out @msg with all lines following 1st indented 4 spaces.
#     Records the warning and prints cross references if $arg_x.
#--------------------------------------------------------------------------

sub UrlWarn
{
    local($msg) = join("\n    ", @_);
    return if $Warn{$msg}++;
    print "WARNING: ", $msg, "\n";
    $arg_x && $ExtLink{$url} && &PrintSubList($ExtLink{$url});
}

#--------------------------------------------------------------------------
# $flag = CheckUrl($url)
#
#     Returns status of a remote URL.  Uses the robots.txt protocol.
#     If $arg_r uses "GET" with html files otherwise tries a "HEAD" first
#     and if that fails tries "GET".
#--------------------------------------------------------------------------

sub CheckUrl
{
    local($url) = @_;
    local($scheme, $host, $port, $path, $query) = &SplitUrl($url);
    return -6 if $scheme ne 'http';
    $path = $path || '/';
    $path = "$path?$query" if $query;
    local($flag);
    $arg_nobot || (($flag = &Disallowed($url)) && return $flag);

    if ($arg_r && $url =~ m/\.($htmlexts)$/io) {
        $flag = &HttpRequest($host,$port,'GET',$path,'GetRedirect',$url);
        return $HttpHeader{'redir'} ? 350 : $flag;
    }

    $flag = &HttpRequest($host, $port, "HEAD", $path);
    return $flag unless $flag == -7;                         # -7: no status
    $arg_db && print "Method HEAD failed. Trying GET ...\n";
    return &HttpRequest($host, $port, "GET", $path);
}

#--------------------------------------------------------------------------
# GetText('S', $lines)
#
#     Passed to HttpRequest_() to read $lines of text into @data.
#     For now @data is a global (or defined in a calling subroutine).
#--------------------------------------------------------------------------

sub GetText
{
    ($HttpHeader{'Content-type'} && $HttpHeader{'Content-type'} =~ /^text/) ||
        return;
    local($file, $lines) = @_;
    local($line) = 0;
    while ( <$file> ) {                    # read $lines into @data
        push(@data, $_);
        $arg_db && $arg_p && print;
        $lines && ($line++ >= $lines) && last;
    }
    $arg_db && $arg_p && print "\n";
}

#--------------------------------------------------------------------------
# GetRedirect('S', $url)
#
#     Passed to HttpRequest_() by CheckUrl() to parse for redirects in header.
#--------------------------------------------------------------------------

sub GetRedirect
{
    return unless $HttpHeader{'Content-type'} &&
        $HttpHeader{'Content-type'} eq 'text/html';
    local($redir);
    &ParseRedirect(@_, $redir);
    return unless $redir;
    $HttpHeader{'Location'} = $HttpHeader{'redir'} = $redir;
}

#--------------------------------------------------------------------------
# $flag = Disallowed($url)
#
#     Checks robots.txt file for $url.  Results are cached for each host.
#     Returns:
#        -10  if access is excluded by robots.txt
#          0  if access is allowed
#        < 0  if < 0 (non-http) error occured
#--------------------------------------------------------------------------

sub Disallowed
{
    local($scheme, $host, $port, $path) = &SplitUrl($_[0]);
    local(@data,$flag);

    if ($Robots{$host} ) {
        $Robots{$host} eq 'ok' && return 0;
    }
    else {
        $flag = &HttpRequest($host, $port, "GET", "/robots.txt",
            'GetText', 100);

        return $flag if $flag < 0;

        $Robots{$host}    = 'ok';    # default value
        $RobotTime{$host} = $Now;    # date we checked robots.txt
        $RobotModified    = 1;       # need to write a new file

        return 0 unless $flag == 200;

        $_ = join("", @data);
        s/\r\n?/\n/g;                   # end-of-line = \r | \n | \r\n
        @data = split(/\n/, $_);
        local(@agents,@disallow);
        push(@data, " ");
        foreach (@data) {
            next if m/^\s*#/;
            s/\s+$//;
            if ( /^$/ ) {
               if (@disallow && @agents) {
                   $_ = join(" ", @disallow);   # prepare for use as regexp
                   s#([^\w\s/])#\\$1#g;         # literal search (pretty)
                   s/\s+/\|/g;
                   $Robots{$host} = $_;
                   last if grep(/linklint/i,@agents);
               }
               @agents = @disallow = ();
               next;
            }
            s/\s*#.*$//;
            if ( m/^\s*(User.?Agent|Robot)s?\s*:\s+(\S+.*\S?)\s*$/i) {
                push(@agents, $2);
            }
            elsif ( m/^\s*Disallow\s*:\s+(\S+.*\S?)\s*$/i) {
               next unless grep(/(linklint|\*)/i, @agents);
               push(@disallow, $1);
            }
        }

        $Robots{$host} eq 'ok' && return 0;

        $arg_db && print "disallow = $Robots{$host}\n";
   }
   return ($path =~ m/^($Robots{$host})/) ? -10 : 0;
}

#--------------------------------------------------------------------------
# $flag = HttpRequest($host,$port,$method,$path, *getmethod, @params)
#
#     Handles host errors. Flags bad hosts. Caches host errors.
#     Calls HttpRequest_(). Will retry if more than one ip-address is given
#     by gethostbyname() and we get 500, 502, -4 or -5 errors.
#
#     If *getmethod is supplied, we use the subroutine &getmethod('S',@params)
#     to read data after the header. I don't have a great method for
#     sending data back at the moment.  Use globals for now.
#--------------------------------------------------------------------------

sub HttpRequest
{
    local($host) = @_;
    $HttpHostError{$host} && return $HttpHostError{$host};

    local($flag) = &HttpRequest_( @_ );
    return $flag unless ($flag =~ /(500|502|-5|-4)/) && $IpAddr2{$host};

    #----- for 500, 502, -5 or -4 errors we try alternate ip addresses

    foreach (split(";", $IpAddr2{$host})) {
        &Warn("Host $host server $HttpHeader{'ip-address'} error ($flag)");
        $flag = &HttpRequest_($_, @_[1 .. $#_]);
        ($flag =~ /(500|502|-5|-4)/) && next;

        #----- save good one as the default for this host

        $IpAddr{$host} = pack("C4", m/(\d+)\.(\d+)\.(\d+)\.(\d+)/);
        return $flag;
    }
    $HttpHostError{$host} = $flag;    # don't check this host again.
    return $flag;
}

#--------------------------------------------------------------------------
# $flag = HttpRequest_($host,$port,$method,$path, 'getmethod', @params)
#
#     Fills %HttpHeader with header info,
#     Uses globals set by &SetupSocket().
#     $flag is error (or success) flag see %Httpxxx for details.
#     Will use &getmethod('S',@params) to read data.
#--------------------------------------------------------------------------

sub HttpRequest_
{
    local($host, $port, $method, $path, *getmethod) = @_;
    local($thataddr, $flag);
    $port = $port || 80;

    #----- use globals %HttpHeader to hold header info.

    %HttpHeader = ();

    if ( $host =~ m/(\d\d?\d?)\.(\d\d?\d?)\.(\d\d?\d?)\.(\d\d?\d?)/) {
        $thataddr = pack("C4", $1, $2, $3, $4);
    }
    elsif ($IpAddr{$host} ) {
        $thataddr = $IpAddr{$host};     # use cached compact ip address
    }
    else {
        ($_,$_,$_,$_, $thataddr, @addrlist) = gethostbyname($host);
        $thataddr ||  return -1;        # -1: could not find host
        $IpAddr{$host} = $thataddr;     # cache it for next time

        #----- convert up to $Retry extra addresses into ip addresses
        #----- cache them into $IpAddr2{$host} for use in HttpRequest().

        $#addrlist >= $Retry && ( @addrlist = @addrlist[0 .. $Retry-1]);
        grep($_ = join(".", unpack("C4",$_)), @addrlist);
        $IpAddr2{$host} = join(";", @addrlist);
    }

    if ($Delay) {
        local($time) = time;
        if ( $TimeOut{$host} && $time < $TimeOut{$host} ) {
            sleep($TimeOut{$host} - $time);
            $TimeOut{$host} += $Delay;
        }
        else  { $TimeOut{$host} = $time + $Delay; }
    }

    $HttpHeader{'ip-address'} = join(".", unpack("C4",$thataddr));

    $arg_db && print
        "\n$method http://$host$path\n",
        "host ip: $HttpHeader{'ip-address'}\n";

    local($that) = pack($SOCKADDR, $AF_INET, $port, $thataddr);
    socket(S, $AF_INET, 1, $PROTO) || socket(S, $AF_INET, 2, $PROTO) ||
        return -2;                     # -2: could not open

    bind(S, $THIS)   || return -3;     # -3: could not bind
    connect(S,$that) || return -4;     # -4: could not connect

    $arg_db && $arg_p && print "Connected\n";

    local($lastsel) = select(S); $| = 1; select($lastsel);
    print S "$method $path HTTP/1.0\nUser-Agent: LinkLint\n\n";

    if ($Timeout && ! $DOS ) {         # select(,,,) no good on Windows 95
        local($rin) = '';
        vec($rin, fileno(S), 1 ) = 1;
        select($rin, undef, undef, $Timeout) || return -5; # -5: timed out
    }

    $arg_db && $arg_p && print "Got data\n";

    $_ = <S>;                          # read status line
    $_ || (close(S), return -7);       # -7: no status  (will try GET)

    if ( /\S+\s+(\d\d\d)\s+(\S+.*\S+)\s*$/) {
        $HttpHeader{'status'} = $flag = $1;
        $HttpHeader{'status-msg'}     = $2;
        $arg_db && print "Status: $1 ($2)\n";
    }
    else {
        close(S);
        return -8;                        # -8: Malformed status line
    }
    while  ( <S> ) {                      # put header info into %HttpHeader
        last unless m/\w/;
        next unless m/(\S+):\s+(\S+.*\S+)\s*$/;
        $HttpHeader{$1} = $2;
        $arg_db && $arg_p && print "$1: $2\n";
    }

    &getmethod('S', @_[5 .. $#_]) if $HttpOk{$flag} && $#_ >= 4 &&
        defined &getmethod;

    close(S);
    return $flag;
}

#--------------------------------------------------------------------------
# SetupSocket()
#
#     Does all the things we need only do once for opening a socket.
#     Creates a bunch of globals used by &HttpRequest() for opening sockets.
#--------------------------------------------------------------------------

sub SetupSocket
{
    local($thishost,$thisaddr);
    $AF_INET     = 2;
    $SOCKADDR    = 'S n a4 x8';
    $PROTO = (getprotobyname('tcp'))[2];
    if ($DOS) {
        $thisaddr = "";
    }
    else {
        $thishost =  `hostname`;
        chop($thishost);
        $thisaddr = (gethostbyname($thishost))[4];
    }
    $THIS = pack($SOCKADDR, $AF_INET, 0, $thisaddr);
}

#--------------------------------------------------------------------------
# ReadBotCache()
#
#     Reads the $BotLog file to get cached excluded directories. Format:
#     host data/time regexp
#     Skips entries that have expired.
#--------------------------------------------------------------------------

sub ReadBotCache
{
    open(CACHE, $BotLog) || return;
    (stat(CACHE))[9] < $Then && (close(CACHE), return);
    foreach (<CACHE>) {
        (m/^#/ || ! m/^(\S+)\s+(\d+)\s+(\S+)/ || $2 < $Then) && next;
        $Robots{$1}    = $3;
        $RobotTime{$1} = $2;
    }
    close(CACHE);
}

#--------------------------------------------------------------------------
# WriteBotCache
#
#     Writes the $botLog file.
#--------------------------------------------------------------------------

sub WriteBotCache
{
    return unless $RobotModified;
    open(CACHE, ">$BotLog") ||
        ( &Warn("Could not write $BotLog"), return);

    print CACHE
"# Robots.txt cache file created by linklint $version\n",
"# Use the -nocache flag to prevent this file from being generated.\n",
"# Use the environment variable LINKLINTBOT to set /path/file of this file.\n",
"# host date excluded-directories\n\n";

    foreach ( sort keys %Robots) {
        print CACHE "$_ $RobotTime{$_} $Robots{$_}\n";
    }
    close CACHE;
    $WroteCache = 1;
}

#--------------------------------------------------------------------------
# ($scheme, $host, $port, $path, $query, $frag) = SplitUrl($url)
#
#     Split the given URL into its component parts according to HTTP rules.
#--------------------------------------------------------------------------

sub SplitUrl
{
    local($_)     = $_[0];
    local($scheme)  = s#^([.+\-\w]+):##     ? $1 : '';
    local($host)    = s#^//([^/]*)##        ? $1 : '';
    local($port)    = $host =~ s/:(\d*)$//  ? $1 : '';
    local($frag)    = s/#([^#]*)$//         ? $1 : '';
    local($query)   = s/\?(.*)$//           ? $1 : '';
    $scheme =~ tr/A-Z/a-z/;
    return ($scheme, $host, $port, $_, $query, $frag);
}

#==========================================================================
#
#  PRINTING ROUTINES
#
#==========================================================================


#--------------------------------------------------------------------------
# Plural($cnt,$msg)
#
#     Returns a pluralized version of $msg.
#
#     %w -> was : were           %d ->       $cnt
#     %s ->     :    s           %n ->  no : $cnt
#     %y ->   y :  ies           %N ->  No : $cnt
#--------------------------------------------------------------------------

sub Plural
{
    local($cnt,$_) = @_;
    $cnt == 1 ? s/\%w/was/g : s/\%w/were/g;   # %w -> 'was' or 'were'
    $cnt == 1 ? s/\%s//g    : s/\%s/s/g;      # %s -> ''    or 's'
    $cnt == 1 ? s/\%y/y/g   : s/\%y/ies/g;    # %y -> 'y'   or 'ies'
    s/\%n/\%d/ && ($cnt = $cnt || 'no' );     # %n -> "no"  or $cnt
    s/\%N/\%d/ && ($cnt = $cnt || 'No' );     # %N -> "No"  or $cnt
    s/\%d/$cnt/;                              # %d -> $cnt
    return $_;
}

#--------------------------------------------------------------------------
# PrintList(*list, $header, $xref,$subhead)
#
#     Prints out files (and references if $arg_x) in %list.
#     Use $xref to overide global value.
#     $subhead can contain %d for printing count of files.
#--------------------------------------------------------------------------

sub PrintList
{
    local(*list, $header, $xref, $subhead) = @_;
    return unless %list;
    $xref = $arg_x unless defined $xref;     # can overide $arg_x w/ 3rd arg
    local(@major) = sort keys %list;
    @major = grep(/$arg_tr/,@major) if $arg_tr;
    return unless @major;
    local($headtext) = &Plural($#major + 1,$header);
    $arg_s && ( (print "$headtext\n"), return);
    print "$Headline# $headtext\n$Headline";
    foreach (@major) {
        print $_, $Redirect{$_} ? " => $Redirect{$_}\n" : "\n";
        $xref && &PrintSubList($list{$_}, $subhead);
    }
    print "\n" unless $xref;
}

#--------------------------------------------------------------------------
# PrintSubList
#
#     Prints out all elements of $sublist split by "<".  Prints out the
#     number of elements in pluralized $subhead.
#--------------------------------------------------------------------------

sub PrintSubList
{
    local($sublist, $subhead) = @_;
    $sublist || return;
    $sublist =~ s/^\<+//;                         # strip off preceding <'s
    $subhead = $subhead || "used in %d file%s:";
    local(@items) = sort split(/\<+/,$sublist);
    return unless @items;
    print "    ";
    $subhead ne 'none' &&  print &Plural($#items + 1, $subhead), "\n    ";
    print join("\n    ", @items) , "\n\n";
}

#--------------------------------------------------------------------------
# PrintLists(*list, *heads)
#
#     Splits %list into sublists and then prints each sublist.  The
#     splitting is controlled by @heads. Each line of $heads must be in the
#     form "$heading<->$regexp" except the last which is in the form
#     "unknown<->$heading".  All keys of %list that match %regexp are
#     printed out under the heading $heading.  If the 'unknown' element
#     exists then all remaining items in %list are printed out under it's
#     $heading.
#--------------------------------------------------------------------------

sub PrintLists
{
    local(*listname, *heads) = @_;
    local(%temp, @files, $heading, $regexp);
    local(%list) = %listname;

    foreach (@heads) {
        ($heading,$regexp) = split(/<->/,$_);
        last if $heading eq 'unknown';
        @files = grep(/$regexp/i, keys %list);
        next unless @files;
        foreach ( @files) {
            $temp{$_} = $list{$_};
            delete $list{$_};
        }
        &PrintList(*temp,$heading);
        %temp = ();
    }
    %list && $heading eq 'unknown' && &PrintList(*list,$regexp);
}

#--------------------------------------------------------------------------
# ReadArgs(@args)
#
#     Reads arguments from @args (all start with "-") returns the
#     remainder of @args.   We first check for full flags and options.
#     If these don't match exactly we go through the argument looking
#     for short flags and options 'globbed' together.  Flags set
#     $arg_X to 1.  For short flags X is the flag, for full flags X
#     is the first character.  Short options set $arg_X to the next
#     argument.  Full options set $X to the next argument where X
#     is the value from %fullopts.  Done w/o eval's.
#--------------------------------------------------------------------------

sub ReadArgs
{
    local($flags, $opts) = ('A|a|c|db|f|h|l|n|o|p|r|s|u|w|x', 'g|ir|i|tr|t');
    local($miscflags) = 'nocache|nobot';
    local($fullflags) = 'All|anchor|case|forward|help|list|net|one|progress|'
        . 'redirect|summary|unused|warn|xref';

    local(%fullopts) = (
        'delay',      'Delay',
        'expire',     'Expire',
        'ignore',     'arg_i',
        'ignorereg',  'arg_ir',
        'map',        'MapFile',
        'root',       'ServerRoot',
        'server',     'ServerUrl',
        'target',     'arg_t',
        'targetreg',  'arg_tr',
        'timeout',    'Timeout',
    );

    local($fullopts) = join("|", keys %fullopts);

    while ( @_ && ($_ = shift)) {
        s/^-// || ( unshift (@_, $_), return (@_));
        if ( /^($fullflags)$/o) {
            &SetArg("arg_" . substr($_,0,1), 1);
        }
        elsif (/^($fullopts)$/o)  {
            (@_ < 1 || $_[0] =~/^-/) &&
                &Error("expected parameter after -$_", $ErrUsage);
            &SetArg( "$fullopts{$_}", shift);
        }
        elsif ( /^($miscflags)$/ ) { &SetArg("arg_$1",1);}
        else {
            while ($_) {
                s/^($flags)//o && ( &SetArg("arg_$1", 1), next);
                s/^($opts)//o || &Error("unknown flag -$_", $ErrUsage);
                ($_ || @_ < 1 || $_[0] =~ /^-/) &&
                    &Error("expected parameter after -$1", $ErrUsage);
                &SetArg ("arg_$1", shift);
            }
        }
    }
    return (@_);
}

sub Error { print STDERR "\nError: ", @_, "\n"; exit; }

sub WantNumber
{
    $_[0] =~ /^\d+$/ ||
        &Error($_[1], " must be followed by an integer", $ErrUsage);
}

sub SetArg { local(*name,$val) = @_; $name = $val; }

#--------------------------------------------------------------------------
# PushDir($newdir)
#
#     Pushes current directory onto a stack @DIRS and then
#     chdir's to $newdir.  We Assume that $newdir is a full path!
#     Returns O if there is an error.
#--------------------------------------------------------------------------

sub PushDir
{
    local($new) = @_;
    push(@DIRS,$CWD);
    return $CWD if $new eq $CWD;
    chdir($new) || (
        (print STDERR "Error: PushDir could not chdir $new\n$!\n"),
        return 0);
    $arg_db && print "--Pushed $CWD $new\n";
    return($CWD = $new);
}

#--------------------------------------------------------------------------
# PopDir
#
#     Pops most recent directory off of stack @DIRS and
#     changes $CWD and current directory accordingly.
#--------------------------------------------------------------------------

sub PopDir
{
    local($new) = pop(@DIRS);
    return $CWD if $new eq $CWD;
    chdir($new) || (
        (print STDERR "Error: PopDir could not chdir $new\n"),
         return 0 );
    $arg_db && print "--Popped $new\n";
    $CWD = $new;
}

#--------------------------------------------------------------------------
# GetCwd
#
#     Returns a string containing the current working directory
#     "\" is changed to "/" for consistency if $DOS.
#     Sets $CWD to the current working directory.
#--------------------------------------------------------------------------

sub GetCwd
{
    local($_) = `$pwdprog`;          # different prog's for Dos/Unix
    s|\\|\/|g      if $DOS;          # replace \ with /
    s/^[a-zA-Z]:// if $DOS;          # remove drive:
    chop;                            # remove trailing \n
    $CWD = $_;
}

#--------------------------------------------------------------------------
# ReadBack($file)
#
#     Reads back the output of this program.
#--------------------------------------------------------------------------

sub ReadBack
{
    local($file)   = @_;
    open(FILE,$file) || &Error("could not open <$file>");

    while (<FILE>) {
        next unless s/^# //;                  # get a header line
        chop;
        return if m/Done/;
        return unless <FILE>;                 # ignore "#--- ..."
        $arg_db && print "header = $_\n";

        if    (m/^found \d+ remote/)         {&ReadList(FILE,'ExtLink');   }
        elsif (m/^found \d+ director/)       {&ReadList(FILE,'DirList');   }
        elsif (m/^found \d+ named anchor/)   {&ReadList(FILE,'Anchor');    }
        elsif (m/^found \d+ .* file/)        {&ReadList(FILE,'FileList');  }
        elsif (m/^ERROR: missing \d+ .* file/) {
            &ReadList(FILE,'LostFile');  }
        elsif (m/^ERROR: missing \d+ named anchor/) {
            &ReadList(FILE,'LostAnchor');}
        else  { $arg_db && print "Missed\n"; }
    }
    close FILE;
}

#--------------------------------------------------------------------------
# ReadList(FILE,*list)
#
#     Used by ReadBack to read back output from one list.
#--------------------------------------------------------------------------

sub ReadList
{
    local(*FILE,*list) = @_;
    local($major);
    local($mode) = 1;

    while (<FILE>) {
        return if m/^(#|Linklint )/;
        s/\n$//;
        return if ($mode == 1) && ! m/\S/;
        m/^(\s*)(\S+)(.*)$/ || ( $mode = 1, next );
        if ( $mode == 1 ) {
            $1 && ( $mode = 2, next );
            $list{$2} = "<";
            $major = $2;                      # holds name of file
        }
        elsif ( $mode == 2 ) {
            $1 || print STDERR "Read Error 2:$_ \n";
            $list{$major} .= "<$2$3";
        }
    }
}

#==========================================================================
# End of linklint
#==========================================================================
