#! /usr/bin/perl
###############################################################################
#
# Ferret user interface program
#
# Written by Brian White <bcwhite@verisim.com>
# Copyright (c) 1996 by Verisim, Inc.
#
###############################################################################



# Load the big guns...
use Ferret;



###############################################################################
#
# Program-wide variable declarations (and defaults)
#
$search;
$action= '';
$imode = 1;
@parms = ();
%opts  = (
	addprefix	=> '',
	doctype		=> '',
	index		=> 'ferret.index',
	stripprefix	=> '',
	summary		=> '',
);
%flags = (
	filter		=> 0,
	help		=> 0,
);

$opts{index} = $ENV{FERRET_INDEX} if $ENV{FERRET_INDEX};



###############################################################################
#
# Usage message
#
my $usage = qq"
Use: $0 [--index=<index-file>] <action> [action parameters] [...]

Actions:
     addfile        adds named file(s) to index
     addstoppers    adds to list of stopper (non-indexed) words
     commonwords    reports words that exist in many documents
     query          searches for specified words (put in 'quotes')
     removefile     removes named file(s) from index
     removestoppers removes from list of stopper (non-indexed) words
     setoption      set specified options
     shrink         reduce index size after adds or removes
     unsetoption    unset specified options

Type \"$0 --help <action>\" for more information.

";


###############################################################################
#
# Work ("action") subroutines follow
#
###############################################################################


###############################################################################
#
# Action:  ADDFILE <files>
#

sub AddFile {
	if ($flags{help} || @parms < 1) {
		print STDERR "\nUse: $0 [...] addfile [--addprefix=<path>] [--stripprefix=<path>]\n";
		print STDERR "     [--summary=<size>] [--lines=<number>] [--filter='<program & args>']\n";
		print STDERR "     [--doctype=<type>] <file-to-add> [...]\n";
		print STDERR "\nOptions:\n";
		print STDERR "     addprefix    add to each filename after loading but before indexing\n";
		print STDERR "     doctype      process as documents of this type:  HTML, Text, Code\n";
		print STDERR "     filter       run row data through this program (+args) before indexing\n";
		print STDERR "     lines        limit summary to this number of lines in length\n";
		print STDERR "     stripprefix  strip from each filename after loading but before indexing\n";
		print STDERR "     summary      store maximum of this number of bytes as a document summary\n";
		die "\n";
	}

	foreach (@parms) {
		my($filter,$fixed,$data,$title,$summary,$dtype);
		$summary= "";
		if ($opts{filter}) {
			$filter = $opts{filter};
		} else {
			$filter = Ferret::StdExtFilters($_,\$fixed);
		}
		$data   = Ferret::LoadFile($_,$filter);
		$fixed  =~s!^$opts{stripprefix}!! if $opts{stripprefix};
		$fixed  = $opts{addprefix} . $fixed;
		$dtype  = $opts{doctype};

		unless ($dtype) {
			$dtype = "HTML"	if ($fixed =~ m/\.html?$/i || $data =~ m/^\s*<html>/si);
			$dtype = "Code" if ($fixed =~ m/\.(h|hpp|h\+\+|c|cpp|c\+\+)$/i || $data =~ m/^\#\!/);
			$dtype = "MIF"	if ($fixed =~ m/\.mif$/i || $data =~ m/^<MIFFile[\s\d\.]*>/);
			$dtype = "Text" unless $dtype;
		}

		print "Adding '$_' ";
		print "as '$fixed' " if $_ ne $fixed;
		print "...\n";
		eval 'Ferret::Strip' . $dtype . '(\$data,\$title,\$summary,$opts{summary})';
		die "$0: Unknown document type '$dtype'\n" if $@;
		Ferret::MakeHTMLSummary(\$summary, $dtype eq "HTML");
		Ferret::LimitLineCount(\$summary,$opts{lines}) if $opts{lines} && $dtype ne "HTML";
		$search->AddDocument($fixed,$data);
		$search->DBPutSummary("$fixed",$summary) if $summary && $opts{summary};
	}
}



###############################################################################
#
# Action:  ADDSTOPPERS <words>
#

sub AddStoppers {
	if ($flags{help} || @parms < 1) {
		print STDERR "\nUse: $0 [...] addstoppers <word> [...]\n";
		die "\n";
	}

	$search->AddStoppers(@parms);
}



###############################################################################
#
# Action:  COMMONWORDS <min> [max]
#

sub CommonWords {
	if ($flags{help} || @parms < 1 || @parms > 2) {
		print STDERR "\nUse: $0 [...] commonwords <min> [max]\n";
		print STDERR "\nBoth <min> and <max> can be either a number between 0 and 1 to indicate a\n";
		print STDERR "a frequency (eg. 0.90 = 90% of all documents) or a whole number greater than\n";
		print STDERR "1 to indicate an exact number of documents.  Running \"commonwords 0.90\"\n";
		print STDERR "will return a list of words in more than 90% of the documents.  This is useful\n";
		print STDERR "for determining words to be added to the \"stopper\" list.  (see: \"addstoppers\"\n";
		print STDERR "and \"removestoppers\")  Running \"commonwords 0.00 0.10\" will list all the words\n";
		print STDERR "that are in less than 10% of all documents.\n";
		die "\n";
	}

	my $words;

	if (@parms == 1) {
		$words = $search->CommonWords($parms[0]);
	} else {
		$words = $search->CommonWords($parms[0],$parms[1]);
	}

	my @words = sort(split(/\n/,$words));

	print "common words: @words\n";
}



###############################################################################
#
# Action:  QUERY 'query-string'
#

sub Query {
	if ($flags{help} || @parms != 1) {
		print STDERR "\nUse: $0 [...] query 'query-string'\n";
		print STDERR "\nThe query string should be enclosed in single quotes so that double quotes\n";
		print STDERR "can be passed as part of the query.  Any single quotes in the query (for\n";
		print STDERR "apostrophes in contractions) will have to be escaped with a backslash.\n";
		die "\n";
	}

	my @results = $search->Query($parms[0]);
	die "$@\n" if $@;

	print "Score Match\n~~~~~ ~~~~~\n";
	foreach (@results) {
		my($score,$match) = (m/^(\d+) (.*)$/);
		printf " %3d  %s\n",$score,$match;
	}
}



###############################################################################
#
# Action:  REMOVEFILE <files>
#

sub RemoveFile {
	if ($flags{help} || @parms < 1) {
		print STDERR "\nUse: $0 [...] removefile [--addprefix=<path>] [--stripprefix=<path>]\n";
		print STDERR "     <file-to-remove> [...]\n";
		print STDERR "\nOptions:\n";
		print STDERR "     addprefix    add to each filename after loading but before indexing\n";
		print STDERR "     stripprefix  strip from each filename after loading but before indexing\n";
		die "\n";
	}

	foreach (@parms) {
		my($filter,$fixed);
		$filter = Ferret::StdExtFilters($_,\$fixed);
		$fixed  =~s!^$opts{stripprefix}!! if $opts{stripprefix};
		$fixed  = $opts{addprefix} . $fixed;

		print "Removing '$_' ";
		print "as '$fixed' " if $_ ne $fixed;
		print "...\n";
		$search->RemoveDocument($fixed);
		$search->DBDelSummary("$fixed");
	}
}



###############################################################################
#
# Action:  REMOVESTOPPERS <words>
#

sub RemoveStoppers {
	if ($flags{help} || @parms < 1) {
		print STDERR "\nUse: $0 [...] removestoppers <words> [...]\n";
		die "\n";
	}

	$search->RemoveStoppers(@parms);
}



###############################################################################
#
# Action:  SETOPTION <options>
#

sub SetOption {
	if ($flags{help} || @parms < 1) {
		print STDERR "\nUse: $0 [...] setoption <option> [...]\n";
		print STDERR "\nOptions:\n";
		print STDERR "     tiny         make index as small as possible (loses proximity searching)\n";
		print STDERR "     nostoppers   don't remove any stopper (non-content) words\n";
		die "\n";
	}

	foreach (@parms) {
		my $option=0;

		/^tiny$/i		and $option=&Ferret::OPT_TINY;
		/^nostoppers$/i	and $option=&Ferret::OPT_NOSTOPPERS;

		print "Setting option '$_' ...\n";
		$search->SetOption($option);
	}
}



###############################################################################
#
# Action:  SHRINK
#

sub Shrink {
	if ($flags{help} || @parms != 0) {
		print STDERR "\nUse: $0 [...] shrink\n";
		die "\n";
	}

	print "Shrinking index...\n";
	$search->Shrink();
}



###############################################################################
#
# Action:  UNSETOPTION <options>
#
sub UnsetOption {
	if ($flags{help} || @parms < 1) {
		print STDERR "\nUse: $0 [...] unsetoption <option> [...]\n";
		print STDERR "\nOptions:\n";
		print STDERR "     tiny         make index as small as possible (cannot be unset)\n";
		print STDERR "     nostoppers   don't remove any stopper (non-content) words\n";
		die "\n";
	}

	foreach (@parms) {
		my $option=0;

		/^tiny$/i		and $option=&Ferret::OPT_TINY;
		/^nostoppers$/i	and $option=&Ferret::OPT_NOSTOPPERS;

		print "Unsetting option '$_' ...\n";
		$search->UnsetOption($option);
	}
}



###############################################################################
#
# Main program follows
#
###############################################################################


###############################################################################
#
# Parse command line arguments
#
die $usage unless @ARGV > 0;

foreach (@ARGV) {
	if (/^--(.*?)=(.*)$/) {
		if (defined $opts{$1}) {
			$opts{$1} = $2;
		} else {
			die "$0: Unknown option '--$1'\n" . $usage;
		}
	} elsif (/^--(.*?)$/) {
		if (defined $flags{$1}) {
			$flags{$1} = 1;
		} else {
			die "$0: Unknown flag '--$1'\n" . $usage;
		}
	} elsif ($action) {
		push @parms,$_;
	} else {
		$action = $_;
		if (/^addfile$/)		{ 			next; }
		if (/^addstoppers$/)	{ 			next; }
		if (/^commonwords$/)	{ $imode=0;	next; }
		if (/^query$/)			{ $imode=0;	next; }
		if (/^removefile$/)		{ 			next; }
		if (/^removestoppers$/)	{ 			next; }
		if (/^setoption$/)		{ 			next; }
		if (/^shrink$/)			{ 			next; }
		if (/^unsetoption$/)	{ 			next; }

		die "$0: Unknown action '$_'\n" . $usage;
	}
}

die $usage unless $action;



###############################################################################
#
# Create a Ferret and open the specified index (only if not in "help" mode)
#
unless ($flags{help}) {
	$search = new Ferret;
	if ($imode) {
		$search->Update($opts{index});
	} else {
		$search->Open($opts{index});
	}
}



###############################################################################
#
# Call the appropriate routine for the requested action
#
AddFile()		if ($action eq "addfile");
AddStoppers()	if ($action eq "addstoppers");
CommonWords()	if ($action eq "commonwords");
Query()			if ($action eq "query");
RemoveFile()	if ($action eq "removefile");
RemoveStoppers()if ($action eq "removestoppers");
SetOption()		if ($action eq "setoption");
Shrink()		if ($action eq "shrink");
UnsetOption()	if ($action eq "unsetoption");



###############################################################################
#
# Put things away when done...
#
print "Writing database...\n" if $imode;
$search->Close();



