#!/bin/sh
# tla-prune-revlib -- Prune arch revision library, somewhat conservatively
#
#  Copyright (C) 2004, 2005, 2006  Miles Bader <miles@gnu.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Written by Miles Bader <miles@gnu.org>
#
#-
#   -n, --dry-run            Show how much would be pruned, but do not
#                            actually delete anything.
#
#   -v, --verbose            Print a message even for unpruned versions
#   -q, --quiet              Print no messages at all
#
#   --min-age=NDAYS          Do not prune revisions less than NDAYS days old
#                            (default 10)
#   --keep-new-revs=NREVS    Do not prune the newest NREVS revisions (default 2)
#   --keep-old-revs=NREVS    Do not prune the oldest NREVS revisions (default 2)
#
#   -c, --checkpoint=FREQ    Do not prune revisions whose patch-number is a
#                            multiple of FREQ; these are intended to serve as
#                            "checkpoint" revisions to establish an upper
#                            limit on the number of replays that are needed to
#                            retrieve an arbitrary older revision.  If FREQ is
#                            0, no checkpoint revisions will be retained.
#                            (default 50)
#   -C, --no-checkpoints     Do not retain checkpoint revisions.
#
#   -h, --help               Display a help message and exit
#   -V, --version            Display a release identifier string and exit

# (---- beginning of hdr.shpp ----)
# hdr.shpp

me=`basename $0`

bindir='/usr/bin'
AWK='/usr/bin/nawk'; export AWK
TLA='tla'; export TLA
SED='/bin/sed'; export SED
UUIDGEN='uuidgen'; export UUIDGEN

# (---- TLA_TOOLS_VERSION defined from ,tla-tools-version ----)
TLA_TOOLS_VERSION='unknown-version
'
# (---- end of TLA_TOOLS_VERSION defined from ,tla-tools-version ----)

TLA_TOOL_PFX="${bindir+$bindir/}"
export TLA_TOOL_PFX

TLA_ESCAPE='yes'

if test "$TLA_ESCAPE" = yes; then
  TLA_UNESCAPED_OPT='--unescaped'
else
  TLA_UNESCAPED_OPT=''
fi

# Some tools get completely confused in stupid ways by non-default
# settings of LANG (like gawk, which fucks up regexp character ranges).
LANG=C; export LANG

# (---- end of hdr.shpp ----)
# (---- beginning of cmd-line.shpp ----)
# cmd-line.shpp -- Command-line helper functions for shell scripts

script="$0"
case "$script" in
  */*) ;;
  *)   script="${TLA_TOOL_PFX}$script";;
esac

usage ()
{
  $SED -n -e '/^\([^#]\|#-* *$\)/{s@.*@Usage: '"$me"' [--help|--version]@p;q;}'	\
         -e '/^# *Usage:/,/^# *$/{s/^# //p;q;}'				\
     < "$script"
}

short_help ()
{
  $SED -n -e '/^\([^#]\|-*# *$\|# *Usage:\)/q'				\
	 -e '/^#!/d;s/^.*-- */# /;s/^#[ 	]*//p'			\
     < "$script" | fmt
}

help_body ()
{
  $SED -n '/^ *$/q;/^#-/,/^[^#]/s/^#\( \|$\)//p' < "$script"
}

help ()
{
  usage
  short_help
  echo ''
  help_body
}

version ()
{
  local no_nl_vers=`echo "$TLA_TOOLS_VERSION"`
  echo "$me (tla-tools) $no_nl_vers"
  $SED -n '/^[^#]/q;/^#-/q;s/^# *\(Written by\)/\
\1/p' < "$script"
  $SED -n '/^[^#]/q;/^#-/q;s/^# *\(Copyright\)/\
\1/p' < "$script"
}

unrec_opt ()
{
  echo 1>&2 "$me: unrecognized option "\`"$1'"
  echo 1>&2 "Try "\`"$me --help' for more information."
}

cmd_line_err ()
{
  usage 1>&2
  echo 1>&2 "Try "\`"$me --help' for more information."
}

long_opt_val ()
{
  echo "$1" | $SED 's/^[^=]*=//'
}

short_opt_val ()
{
  echo "$1" | $SED 's/^-.//'
}

# (---- end of cmd-line.shpp ----)

# Try to delete entries old than this many days
MIN_AGE=10
# ... but keep at least this many of the most recent revisions in any
# given version
KEEP_NEW_REVS=2
# ... and this many of the oldest revisions (in any given version)
KEEP_OLD_REVS=2

# Keep revisions whose patch number is a multiple of this
CHECKPOINT_FREQ=50

DRY_RUN=n
VERBOSE=n
QUIET=n

# Parse command-line options
while :; do
  case "$1" in
    -n|--dry-run)
      DRY_RUN=y; shift;;
    -v|--verbose)
      VERBOSE=y; QUIET=n; shift;;
    -q|--quiet)
      QUIET=y; VERBOSE=n; shift;;
    --keep-new-revs|--keep-new|--keep-new-revisions)
      KEEP_NEW_REVS="$2"; shift 2;;
    --keep-new-revs=*|--keep-new=*|--keep-new-revisions=*)
      KEEP_NEW_REVS=`long_opt_val "$1"`; shift;;
    --keep-old-revs|--keep-old|--keep-old-revisions)
      KEEP_OLD_REVS="$2"; shift 2;;
    --keep-old-revs=*|--keep-old=*|--keep-old-revisions=*)
      KEEP_OLD_REVS=`long_opt_val "$1"`; shift;;
    --min-age)
      MIN_AGE="$2"; shift 2;;
    --min-age=*)
      MIN_AGE=`long_opt_val "$1"`; shift;;
    -c|--checkpoint|--checkpoints)
      CHECKPOINT_FREQ="$2"; shift 2;;
    -c*|--checkpoint=*|--checkpoints=*)
      CHECKPOINT_FREQ=`long_opt_val "$1"`; shift;;
    -C|--no-checkpoint|--no-checkpoints)
      CHECKPOINT_FREQ=0; shift;;
    --help|-h|-H)
      help; exit 0;;
    --version|-V)
      version; exit 0;;
    -[!-]?*)
      # split concatenated single-letter options apart
      FIRST="$1"; shift
      set -- `echo $FIRST | $SED 's/-\(.\)\(.*\)/-\1 -\2/'` "$@"
      ;;
    -*)
      unrec_opt "$1"; exit 10;;
    *)
      break;
  esac
done

test "$#" = 0 || { cmd_line_err; exit 10; }

REVLIBS=`$TLA my-revision-library`

if test $DRY_RUN = n; then
  PRUNE_MSG="Pruning"
  KEEP_MSG="Keeping"
else
  PRUNE_MSG="Would prune"
  KEEP_MSG="Would keep"
fi

for REVLIB in $REVLIBS; do
  (
    cd "$REVLIB"

    FIND_OLD_REV_EXPR="\( -name '*--patch-*' -o  -name '*--base-*' -o  -name '*--version-*' -o  -name '*--versionfix-*' \) -type d -mtime +$MIN_AGE"

    VERSION_DIRS=`eval find . -maxdepth 5 -mindepth 5 $FIND_OLD_REV_EXPR | $SED 's@^\./\(.*\)/[^/]*@\1@' | sort -u`

    # If there are multiple revision libraries, print the revlib name
    PRINTED_REVLIB_HDR=n
    print_revlib_hdr ()
    {
      if test "$REVLIB" != "$REVLIBS" && test $PRINTED_REVLIB_HDR = n && test $QUIET != y; then
	echo "$REVLIB:"
	PRINTED_REVLIB_HDR=y
      fi
    }

    for VD in $VERSION_DIRS; do
      cd "$REVLIB/$VD"

      VERSION=`echo "$VD" | $SED 's@\([^/]*\)/.*/\([^/]*\)@\1/\2@'`

      REVS=`eval find . -maxdepth 1 -mindepth 1 $FIND_OLD_REV_EXPR | $SED 's@^\./@@'`
      NUM_REVS=`ls -d $REVS | wc -l`

	# NUM_REVS doesn't include anything initially rejected by the
	# $MIN_AGE test.  This function includes those, for printing
	# accurate numbers in messages.
      total_num_revs ()
      {
	local num_too_new=`eval find . -maxdepth 1 -mindepth 1 $FIND_OLD_REV_EXPR -o -print | wc -l`

	expr $num_too_new + $NUM_REVS
      }

      if test $NUM_REVS -gt `expr $KEEP_NEW_REVS + $KEEP_OLD_REVS`; then
	# Gotta prune

	# Which revisions we're going to prune using an awk script
	CALC_PRUNE_REVS='
          BEGIN {
	    keep_new_revs = '$KEEP_NEW_REVS'
	    keep_old_revs = '$KEEP_OLD_REVS'
	    checkpoint_freq = '$CHECKPOINT_FREQ'
	    num_revs = 0
	  }

	  function sort2 (keys, values, len)
	  {
	    for (i = 0; i < len; i++)
	      for (j = i + 1; j < len; j++)
		if (keys[i] > keys[j])
		  {
		    tmp = keys[i]
		    keys[i] = keys[j]
		    keys[j] = tmp
		    tmp = values[i]
		    values[i] = values[j]
		    values[j] = tmp
		  }
	  }

          {
	    rev = $0
	    rev_num = rev
	    sub (/^.*--/, "", rev_num)

	    rev_nums[num_revs] = rev_num
	    revs[num_revs] = rev
	    num_revs++
	  }

          END {
	    sort2(rev_nums, revs, num_revs)

	    for (i = keep_old_revs; i < num_revs - keep_new_revs; i++) {
	      num = rev_nums[i] + 0
	      if(checkpoint_freq == 0 || num == 0 || num % checkpoint_freq != 0)
		print revs[i]
	    }
	  }
        '
	PRUNE_REVS=`ls -d $REVS | $AWK "$CALC_PRUNE_REVS"`

	NUM_PRUNE_REVS=`ls -d $PRUNE_REVS | wc -l`

	# Note that we don't put quotes around $NUM_PRUNE_REVS,
	# because it was calculated using `wc -l', and many old
	# versions of wc include a leading tab in output.
	if test $QUIET != y; then
	  print_revlib_hdr
	  echo "$VERSION: $PRUNE_MSG" $NUM_PRUNE_REVS out of `total_num_revs` revisions
	fi

	# Finally, remove the pruned revisions!
	test $DRY_RUN = n && rm -rf $PRUNE_REVS

        elif test $VERBOSE = y; then
	  # Not going to prune anything, but output a message in verbose mode

	  print_revlib_hdr
          case $NUM_REVS in
	    0)
	      echo "$VERSION": No revisions;;
	    1)
	      echo "$VERSION: $KEEP_MSG only 1 revision";;
	    *)
	      # Note that we don't put quotes around $NUM_REVS,
	      # because it was calculated using `wc -l', and many old
	      # versions of wc include a leading tab in output.
	      echo "$VERSION: $KEEP_MSG" all $NUM_REVS revisions;;
	  esac
	fi
    done
  )
done

