#!/bin/sh
#
#  Interface to a glimpse search of the man pages.
#  Michael Hamilton <michael@actrix.gen.nz>
#  Small changes - aeb, 980109
#
# $Id: mansearch,v 1.16 2004/08/08 16:54:55 robert Exp $


type search++ > /dev/null 2> /dev/null || {
	cat <<-EOF
	Content-type: text/html

	<title>Swish++ not installed</title>
	<body>
	<h1>Swish++ not installed</h1>
	Search is only enabled if the <b>Swish++</b> package is installed.
	You may <a href="http://packages.debian.org/swish++">download</a> it from
	the <a href="http://www.debian.org">Debian</a> site.
	</body>
	EOF
	exit 0
}


# Do we need lynxcgi URLs? For the moment our criterion is
# 1) HTTP_USER_AGENT=Lynx*  and 2) HTTP_HOST is unset.
AGENT="${HTTP_USER_AGENT-unknown}"

case "$AGENT" in
    Lynx*|lynx*)
	HH="${HTTP_HOST-nohh}"
	SED="s/%lynx //"
	;;
    *)
	HH=nolynx
	SED="/%lynx/d"
	;;
esac

SERVER="${SERVER_NAME-localhost}"
case "$HH" in
    nohh)
	CG="lynxcgi:/usr/lib/cgi-bin/man"
	;;
    *)
	CG="/cgi-bin/man"
	;;
esac
QUOTE="'"
export CG QUOTE SED

exec gawk '
function removeopts(string) {
  gsub(/^[ \t]/, "", string);	# Remove leading spaces
  gsub(/[ \t]$/, "", string);	# Remove trailing spaces
  gsub(/[ \t\\];/, ";", string);	# Remove spaces before ;
  gsub(/[ \t];/, ",", string);  # Remove spaces before ,
  while (match(string, /^-[FLBwk1-8]/)) {
    if (match(string, /^-[FL]( |.)[^ \t]+[ \t]+/)) { # Option with arg
      options = options " " substr(string, RSTART, RLENGTH);
      string = substr(string, RSTART + RLENGTH);
    } 
    else if (match(string, /^-[Bwk1-8][ \t]+/)) { # Option without arg
      options = options " " substr(string, RSTART, RLENGTH);
      string = substr(string, RSTART + RLENGTH);
    }
    else if (match(string, /^-[^ \t]/)) { # Remove it
      string = substr(string, RSTART + RLENGTH);   
    }
  }
  return string;
}

function urldecode(string) {
    gsub(/\+/, " ", string);
    oldIGNORECASE=IGNORECASE;
    IGNORECASE=1;
    while(match(string, /%[0-9a-f][0-9a-f]/)) {
        a=substr(string, RSTART + 1, RLENGTH - 1);
        b=sprintf("%c",strtonum("0x" a)) ;
        retstr = retstr substr(string, 0, RSTART - 1) b;
        string = substr(string, RSTART + RLENGTH);
    }
    IGNORECASE=oldIGNORECASE;
    return retstr string;
}

function urlencode(string) {
# uses global ord table, set up in BEGIN
    encoded = 0;
    retstr = "";
    while(match(string, /[^a-zA-Z0-9_:\/\.\-]/)) {
        a=substr(string, RSTART, RLENGTH);
        if (a == " ") {
            b = "+";
        } else {
            b = "%" sprintf("%02X", ord[a]);
        }
        retstr = retstr substr(string, 0, RSTART - 1) b;
        string = substr(string, RSTART + RLENGTH);
        encoded = 1;
    }

    if (encoded) {
        return "?query=" retstr string;
    } else {
        return retstr string;
    }
}

BEGIN {
# fill ord table, used by urlencode
  for (i = 0; i < 255; i++) 
    ord[sprintf("%c", i)] = i


  searchdocument = "/usr/share/man2html/mansearch.aux";
  quote = ENVIRON["QUOTE"];
  cgipath = ENVIRON["CG"];
  sedcmd = ENVIRON["SED"];
  truncate_at = 11;		# Single page display match limit.

  glimpse_cmd = "search++ --config-file=/usr/share/man2html/swish++.conf "


  qry_str = ENVIRON["QUERY_STRING"];
  if (match(qry_str, /query=[^&]+/)) {
	qry_str = substr(qry_str, RSTART + 6, RLENGTH - 6);
	string = urldecode(qry_str);
  }

  if (!string) {
	for (i = 1; i < ARGC; i++) {
		string = string " " ARGV[i];
  	}
  }
				# Have to be careful to single quote this
				# string later.
  gsub(/[^a-zA-Z0-9\-_+ \t\/@%:;,$*|=]/, " ", string);

#  string = removeopts(string);

#  gsub(/[^a-zA-Z0-9-_+ \t\/@%:,]/, " ", options);


  if (!string) {
    if (system("test -r " searchdocument ) != 0) {
	print "<head>";
	print "<title>mansearch - file not found</title>";
	print "</head>\n<body>";
	print "Sorry - cannot read " searchdocument ".";
	print "</body>";
	exit;
    }
    system("sed " quote "s#%cg#" cgipath "#g;" sedcmd quote " " searchdocument );
    exit;
  }

  print "Content-type: text/html\n";
  print "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">";
  print "<HTML>";
  print "<HEAD>";
  print "<TITLE>Manual Pages - Search Results: " string "</TITLE>";
  print "</HEAD>";
  print "<BODY>";
  
  print "<H1>Manual Pages - Search Results</H1>";
  print "<H2>Target text: " options " " string "</H2>";

  print "<A HREF=\"" cgipath "/mansearch\">";
  print "Perform another search";
  print "</A><BR>";
  print "<A HREF=\"" cgipath "/man2html\">";
  print "Return to Main Contents";
  print "</A>";

  print "<HR>";  

  print "<DL>";
				# Unless you like being hacked, the single
				# forward quotes are most important.
  cmd = glimpse_cmd " " options " " quote string quote " 2>/dev/null" ;

  while ((cmd | getline matchline) > 0) {
    if (split(matchline, part, "__--__") <= 3) {
      continue;
    }
    else {
      fullname = part[2];
    }

#    if (fullname == "glimpse") {
#      print "<DT><B>"fullname"</B>:";
#    }
#    else 
    if (fullname != last_fullname) {
      mcount++;
      tcount = 0;
      last_fullname = fullname ;
      last_text = "";

      if (match(fullname, ".*/")) {
	dirname = substr(fullname, 1, RLENGTH);
	filename = substr(fullname, RLENGTH + 1);
	if (dirname != last_dirname) {
	  last_dirname = dirname;
          print "</DL>";
	  print "<H3>Location: " dirname "</H3>";
	  print "<DL>";
	}
      }
      else {
	filename = fullname;
      }

      if (match(filename, /\.[^.]+$/)) {
	ref = substr(filename, 1, RSTART - 1) "+" substr(filename, RSTART + 1);
      }
      else {
	ref = filename;
      }
      print "<DT> <A HREF=\"" cgipath "/man2html" urlencode(fullname) "\">";
      textname = filename;
      sub(/\.(gz|Z|z)$/, "", textname);

      # replace last "." with "(". gsub is used to count number of "."
      textname = gensub(/\./, "(", gsub(/\./, ".", textname), textname);
      textname = textname ")";
      print textname;
      print "</A>";	
    }

    text = part[4];
    tcount++;
    if (tcount < truncate_at) {
      sub(/^ *.[^ ]+[- ]+/, "", text);
      sub(/ +$/, "", text);
      gsub(/\\f./,    "", text);
      gsub(/\\&/,     "", text);
      gsub(/\\/,      "", text);
      print "<DD>" text;
    }
    else if (tcount == truncate_at) {
      print "<DD> <I>...additional matches not shown.</I>";
    }
  }
  close(cmd);

  print "</DL>";
  if (mcount == 0) {
    print "No matches found.";
  }
  else if (mcount == 1) {
    print "<HR>\n<P>1 match found."
  }
  else {
    print "<HR>\n<P>" mcount " matches found."
  }
  print "</BODY>";
  print "</HTML>";
  exit;    
}' "$@"

