#!/bin/sh
# tla-fix-changelog-conflicts -- Automatically resolve merge conflicts
#	in ChangeLog files
# Usage: tla-fix-changelog-conflicts [CHANGELOG...]
#
#  Copyright (C) 2003, 2004  Miles Bader <miles@gnu.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Written by Miles Bader <miles@gnu.org>
#
#-
# tla-fix-changelog-conflicts searches for ChangeLog*.rej files (which are
# produced when a tla merge produces conflicts) in the current tree, and
# tries to automatically resolve the conflict.  It uses simple heuristics for
# this, but they seem to work fairly well in practice.  For every file
# successfully fixed, a message is output, and the conflict files removed.
#
# If every such conflict found is fixed successfully, or no conflicts are
# found, then tla-fix-changelog-conflicts exits with a status of 0; if there
# are unresolvable conflicts, then a non-zero exit status is returned.

# (---- beginning of hdr.shpp ----)
# hdr.shpp

me=`basename $0`

bindir='/usr/bin'
AWK='/usr/bin/nawk'; export AWK
TLA='tla'; export TLA
SED='/bin/sed'; export SED
UUIDGEN='uuidgen'; export UUIDGEN

# (---- TLA_TOOLS_VERSION defined from ,tla-tools-version ----)
TLA_TOOLS_VERSION='unknown-version
'
# (---- end of TLA_TOOLS_VERSION defined from ,tla-tools-version ----)

TLA_TOOL_PFX="${bindir+$bindir/}"
export TLA_TOOL_PFX

TLA_ESCAPE='yes'

if test "$TLA_ESCAPE" = yes; then
  TLA_UNESCAPED_OPT='--unescaped'
else
  TLA_UNESCAPED_OPT=''
fi

# Some tools get completely confused in stupid ways by non-default
# settings of LANG (like gawk, which fucks up regexp character ranges).
LANG=C; export LANG

# (---- end of hdr.shpp ----)
# (---- beginning of simple-cmd-line.shpp ----)
# simple-cmd-line.shpp -- Simple command-line processing for no-option commands

# (---- beginning of cmd-line.shpp ----)
# cmd-line.shpp -- Command-line helper functions for shell scripts

script="$0"
case "$script" in
  */*) ;;
  *)   script="${TLA_TOOL_PFX}$script";;
esac

usage ()
{
  $SED -n -e '/^\([^#]\|#-* *$\)/{s@.*@Usage: '"$me"' [--help|--version]@p;q;}'	\
         -e '/^# *Usage:/,/^# *$/{s/^# //p;q;}'				\
     < "$script"
}

short_help ()
{
  $SED -n -e '/^\([^#]\|-*# *$\|# *Usage:\)/q'				\
	 -e '/^#!/d;s/^.*-- */# /;s/^#[ 	]*//p'			\
     < "$script" | fmt
}

help_body ()
{
  $SED -n '/^ *$/q;/^#-/,/^[^#]/s/^#\( \|$\)//p' < "$script"
}

help ()
{
  usage
  short_help
  echo ''
  help_body
}

version ()
{
  local no_nl_vers=`echo "$TLA_TOOLS_VERSION"`
  echo "$me (tla-tools) $no_nl_vers"
  $SED -n '/^[^#]/q;/^#-/q;s/^# *\(Written by\)/\
\1/p' < "$script"
  $SED -n '/^[^#]/q;/^#-/q;s/^# *\(Copyright\)/\
\1/p' < "$script"
}

unrec_opt ()
{
  echo 1>&2 "$me: unrecognized option "\`"$1'"
  echo 1>&2 "Try "\`"$me --help' for more information."
}

cmd_line_err ()
{
  usage 1>&2
  echo 1>&2 "Try "\`"$me --help' for more information."
}

long_opt_val ()
{
  echo "$1" | $SED 's/^[^=]*=//'
}

short_opt_val ()
{
  echo "$1" | $SED 's/^-.//'
}

# (---- end of cmd-line.shpp ----)

case "$1" in
  --help)
    usage
    short_help
    echo ''
    echo "      --help           Display this help message and exit"
    echo "      --version        Display a release identifier string and exit"
    echo ''
    help_body
    exit 0
    ;;
  --version|-V)
    version; exit 0;;
  --)
    shift;;
  -*)
    unrec_opt "$1"; exit 1;;
esac

# (---- end of simple-cmd-line.shpp ----)

# (---- TLA_AWK_FUNS defined from tla-tools-funs.awk ----)
TLA_AWK_FUNS='# tla-tools-funs.awk -- AWK functions used by my tla-* shell scripts

function _append_cmd_arg(cmd, arg)
{
  if (arg) {
    gsub (/'\''/, "'\''\\'\'''\''", arg)
    cmd = cmd " '\''" arg "'\''"
  }
  return cmd
}

# Return a shell command string corresponding to CMD with args
# ARG1...ARG4.  CMD is included as-is, so can contain shell
# meta-characters; ARG1...ARG4 are quoted to prevent evaluation by the
# shell, and correctly handle any embedded spaces.
function make_cmd(cmd, arg1, arg2, arg3, arg4)
{
  cmd = _append_cmd_arg(cmd, arg1)
  cmd = _append_cmd_arg(cmd, arg2)
  cmd = _append_cmd_arg(cmd, arg3)
  cmd = _append_cmd_arg(cmd, arg4)
  return cmd
}

# Run CMD with args ARG1...ARG4, return non-zero if successful.
# CMD is passed raw to the shell, so can contain shell meta-characters;
# ARG1...ARG4 are quoted to prevent evaluation by the shell, and 
# correctly handle any embedded spaces.  Returns 1 if the command
# succeeded, and 0 otherwise.
function run_cmd(cmd, arg1, arg2, arg3, arg4)
{
  # print "run_cmd: " make_cmd(cmd, arg1, arg2, arg3, arg4)
  return (system(make_cmd(cmd, arg1, arg2, arg3, arg4)) == 0) ? 1 : 0
}

# Run CMD with args ARG1...ARG4, return the first line of output, or 0
# if the command returned a failure status (or the command could not be
# executed).  CMD is passed raw to the shell, so can contain shell
# meta-characters; ARG1...ARG4 are quoted to prevent evaluation by the
# shell, and correctly handle any embedded spaces.
function run_cmd_first_line(cmd, arg1, arg2, arg3, arg4  ,result)
{
  cmd = make_cmd(cmd, arg1, arg2, arg3, arg4)
  if ((cmd| getline result) <= 0)
    result = 0
  close (cmd)
  # print "run_cmd_first_line: " cmd " => " result
  return result
}

# Return the first line of FILE
function file_first_line(file)
{
  return run_cmd_first_line("sed 1q", file)
}

# Return the last line of FILE
function file_last_line(file)
{
  return run_cmd_first_line("sed -n", "$p", file)
}

# Return the number of lines in FILE
function file_num_lines(file)
{
  return run_cmd_first_line("wc -l <", file) + 0
}

function file_is_dir(file)
{
  return run_cmd("ls -d >/dev/null 2>/dev/null", file "/.")
}

function file_exists(file  ,line,result)
{
  result = (getline line < file)
  close (file)
  return result >= 0
}

# Append TEXT to FILE, with an intervening blank line if LAST_LINE
# isn'\''t blank.  Returns 1 if succesful, and 0 otherwise.
function append_text(file, text, last_line  ,append_cmd)
{
  append_cmd = make_cmd("cat >>", file)
  if (last_line && last_line !~ /^[ \t]*$/)
    print "" |append_cmd
  printf ("%s\n", text) |append_cmd
  return close (append_cmd) == 0
}

function file_explicit_id_dir(file  ,dir)
{
  dir = file
  sub (/\/[^\/]*$/, "", dir)
  sub (/.*\//, "", file)
  return ((dir && dir != file) ? dir "/.arch-ids" : ".arch-ids")
}
function file_explicit_id_file(file  ,dir)
{
  dir = file
  sub (/\/[^\/]*$/, "", dir)
  sub (/.*\//, "", file)
  return ((dir && dir != file) ? dir "/.arch-ids/" : ".arch-ids/") file ".id"
}

function file_from_explicit_id_file(file  ,dir)
{
  sub (/\.id$/, "", file)
  
  dir = file
  sub (/\/[^\/]*$/, "", dir)
  sub (/.*\//, "", file)

  sub (/\.arch-ids$/, "", dir)

  return dir file
}

function file_has_explicit_id(file)
{
  return file_exists(file_explicit_id_file(file))
}

# Returns the id-tag and tagging-method of FILE, in tla "METH_ID" format
# (i.e., explicit ids have "x_" prepended to them, and taglines have "i_").
# FILE may be in a different project tree than the current directory.
# If no id can be found for FILE, 0 is returned instead.
function file_meth_id(file  ,output,parts)
{
  if (! (file in _file_meth_ids)) {
    output = run_cmd_first_line("$TLA id 2>/dev/null", file)
    if (! output)
      return 0

    split (output, parts)
    _file_meth_ids[file] = parts[2]
  }

  return _file_meth_ids[file]
}

# Returns the id-tag of FILE.
# FILE may be in a different project tree than the current directory.
# If no id can be found for FILE, 0 is returned instead.
function file_id(file  ,id)
{
  id = file_meth_id(file)
  if (id)
    sub (/^._/, "", id)
  return id
}

# Return the (absolute) filename corresponding to ID in TREE_ROOT,
# or zero if there is none.  If DIRS_ONLY is true, only directories are
# searched for (which can be slightly faster).
function id_file(id, tree_root, dirs_only  ,level,type_opt,inven_cmd,cmd_status,inven_line,parts)
{
  level = dirs_only ? 1 : 2;

  if (_id_files_tree_level[tree_root] + 0 < level) {
    # We have not searched TREE_ROOT before, or only searched for dirs
    type_opt = (dirs_only ? " --directories" : " --both")

    inven_cmd = make_cmd("$TLA inventory --ids --source 2>/dev/null" type_opt, tree_root)

    while ((cmd_status = (inven_cmd |getline inven_line)) > 0) {
      split (inven_line, parts)

      # Add to _file_meth_ids array since we have the info handy
      _file_meth_ids[parts[1]] = parts[2]

      # Add all entries to _id_files
      sub (/^._/, "", parts[2])
      _id_files[parts[2], tree_root] = parts[1]
    }

    if (cmd_status >= 0)
      close (inven_cmd)

    _id_files_tree_level[tree_root] = level
  }

  return _id_files[id, tree_root]
}

# Return a prefix suitable for prepending to filenames in the current
# directory to make them properly project-tree-root relative, to the
# tree-root TREE_ROOT; if TREE_ROOT is zero (or not given), then the tla
# `tree-root'\'' command is invoked to compute the current tree-root.  If
# the current directory is a tree-root, then the result is the empty
# string.
function tree_root_prefix(tree_root  ,cwd)
{
  if (! tree_root)
    tree_root = run_cmd_first_line("$TLA tree-root 2>/dev/null")
  cwd = run_cmd_first_line("pwd")
  if (cwd != tree_root && substr (cwd, 1, length (tree_root)) == tree_root)
    return substr (cwd, length (tree_root) + 2) "/"
  else
    return ""
}

# Return the path to FILE in a pristine version (either a revision
# library entry or a pristine tree) of the latest revision, or 0 if one
# cannot be found.
function pristine_file(file  ,latest_rev,revlib,revlibs_cmd,revlibs_cmd_status,greedy)
{
  if (! pristine_root) {
    # Find the latest revision and make sure we have a pristine tree for
    # it; by `pristine tree'\'' we really mean revlib entry or pristine tree

    latest_rev = run_cmd_first_line("$TLA logs -f | sed -n '\''$p'\''")

    # See if we'\''ve got a revlib entry handy
    pristine_root = run_cmd_first_line("$TLA library-find --silent", latest_rev)

    if (! pristine_root) {
      # No revlib entry; can we add one to a greedy library?

      # Search for a greedy revision library
      revlibs_cmd = make_cmd("$TLA my-revision-library 2>/dev/null")
      while ((revlibs_cmd_status = (revlibs_cmd |getline revlib)) > 0) {
	greedy = run_cmd_first_line(make_cmd("$TLA library-config", revlib) \
				    "| grep '\''^greedy[?]'\''")
	if (greedy ~ /yes$/)
	  break
      }
      if (revlibs_cmd_status >= 0)
	close (revlibs_cmd)

      if (revlibs_cmd_status > 0) {
	# Found a greedy library, add an entry for this revision to it

	if (run_cmd("$TLA library-add", latest_rev))
	  pristine_root = run_cmd_first_line("$TLA library-find", latest_rev)
      }

      if (! pristine_root) {
	# Give up with revlibs and try to add a pristine tree

	if (run_cmd("$TLA add-pristine", latest_rev))
	  pristine_root = run_cmd_first_line("$TLA find-pristine", latest_rev)
      }
    }
  }

  if (pristine_root)
    return pristine_root "/" file
  else
    return 0
}

# Return a unique ID string
function unique_id() { return run_cmd_first_line("$UUIDGEN") }

# Return the filename FILE with any leading `./'\'' removed
function no_dot(file) { sub (/^\.\//, "", file); return file }

# Returns the (fully-specified) revision REV with the patch-level
# component removed
function revision_version(rev  ,archive,parts,ver)
{
  if (split (rev, parts, "/") == 2) {
    archive = parts[1]
    rev = parts[2]
  } else
    archive = 0
    
  split (rev, parts, "--")

  ver = parts[1] "--" parts[2] "--" parts[3]
  if (archive)
    ver = archive "/" ver

  return ver
}

# Returns the patch-level component of the (fully-specified) revision REV
function revision_patch_level(rev  ,parts)
{
  # Note that the archive component can have embedded -- markers too,
  # but that does not effect the result
  return parts[split (rev, parts, "--")]
}

function patch_log_file_name(rev   ,archive,parts)
{
  split (rev, parts, "/")
  archive = parts[1]
  rev = parts[2]
    
  split (rev, parts, "--")

  return								\
    "{arch}/"								\
    parts[1]								\
    "/" parts[1] "--" parts[2]						\
    "/" parts[1] "--" parts[2] "--" parts[3]				\
    "/" archive								\
    "/patch-log/" parts[4]
}

'
# (---- end of TLA_AWK_FUNS defined from tla-tools-funs.awk ----)

diff3_rej_msg="Conflicts occured, diff3 conflict markers left in file."

if test $# -eq 0; then
  set -- `find . -name '[Cc]hange[Ll]*.rej' -print`
fi

tmp_sfx=".$$.new"
exit_status=0

for arg; do
  arg=`echo "$arg" | $SED 's@^\./@@'`
  case "$arg" in
    *.rej)
      rej="$arg"; cl=`echo "$rej" | $SED 's@\.rej$@@'`;;
    *)
      cl="$arg"; rej="$arg.rej";;
  esac
    
  if [ -r "$cl" ] && [ -r "$rej" ]; then
    if test x"`sed q < "$rej"`" = x"$diff3_rej_msg"; then
      # diff3-style inline conflict markers, left by --three-way option

      cl_new="`dirname "$cl"`/,,`basename "$cl"`.new"

      # This initial "cp -p" is used only to ensure that the
      # existing file permissions are preserved (unfortunately,
      # chmod --reference is not portable).
      cp -p "$cl" "$cl_new"

      # Do the actual merge
      if $AWK '
	BEGIN {
	  state = 0
	  failure = 0
	  suppressed_duplicate_lines = 0
	}

        function new_state(old, new)
	{
	  if (state != old) {
	    failure = 1
	    exit (11)
	  }

	  state = new;
	  num_lines[state] = 0
	}

	# Print the 2nd-half of a from-to conflict pair, removing any tail
	# lines that are duplicates of those in the first half.
	function flush_2nd_half(  tail_common,nl1,nl2,pl)
	{
	  nl1 = num_lines[1]
	  nl2 = num_lines[2]

	  # See if one of the two halves is a tail of the other
	  for (tail_common = 0; nl1 > tail_common && nl2 > tail_common; tail_common++)
	    if (lines[1, nl1 - tail_common] != lines[2, nl2 - tail_common])
	      break;

	  if (tail_common != nl1 && tail_common != nl2)
	    # If the common tail part is not the entire 2nd hunk, remove
	    # blank lines from it (blank lines often cause "false tails")
	    while (tail_common > 0 && lines[1, nl1 - tail_common + 1] ~ /^[ \t]*$/)
	      tail_common--

	  # Remove the common tail from the 2nd half
	  nl2 -= tail_common
	  suppressed_duplicate_lines += tail_common

	  # Yes; print the remaining unique head of the 2nd half, if any
	  for (pl = 1; pl <= nl2; pl++)
	    print lines[2, pl]
	}

	/^<<<<<<< / { new_state(0, 1); next }
	/^=======$/ { new_state(1, 2); next }
	/^>>>>>>> / { new_state(2, 0); flush_2nd_half(); next }

	{
	  if (state != 0)
	    lines[state, ++num_lines[state]] = $0
	  if (state != 2)
	    print
	}

	END {
	  if (! failure) {
	    if (state != 0)
	      exit (12)

	    if (suppressed_duplicate_lines > 0)
	      printf ("* deleted %d duplicate lines in '"$cl"'\n",
		      suppressed_duplicate_lines) |"cat 1>&3"
	  }
	}
      ' 3>&1 "$cl" > "$cl_new"
      then
	mv "$cl_new" "$cl"						\
	&& rm -f "$rej" "$cl.orig"					\
	&& echo "* fixed ChangeLog conflict: $cl"
      else
	echo 1>&2 "$me: Unfixable ChangeLog conflict: " $rej
      fi 
    else
      # Traditional patch .rej files

      $AWK '
	'"$TLA_AWK_FUNS"'
	BEGIN { failed = 0; hunk_num = -1; new_entry = 0 }

	function fail() { failed = 1; exit (1) }

	/^diff / { next }

	# Hunk header
	/^(\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*$|@@ )/ {
	  if (hunk_num >= 0)
	    fail()	# Multiple hunks
	  rej = FILENAME
	  changelog = rej; sub (/\.rej$/, "", changelog)
	  hunk_num = 0
	  space_count = 0
	  unidiff = ($1 == "@@")
	  next
	}

	# Hunk part header
	/^(---|\*\*\*|\+\+\+) / { space_count = 0; next }
	# unchanged line
	/^ / { space_count++; next }
	# line addition
	/^[+]/ {
	  if (space_count > 0)
	    fail()	# Not at beginning of hunk, or not contiguous
	  if (unidiff)
	    sub (/^[+]/, "")
	  else
	    sub (/^[+] /, "")
	  if (new_entry)
	    new_entry = new_entry "\n" $0
	  else
	    new_entry = $0
	  next
	}

	 # Either a line deletion (-) or unrecognized junk
	{ rej = FILENAME; fail() }

	END {
	  new = changelog "'"$tmp_sfx"'"
	  if (failed)
	    print "'"$me"': Unfixable ChangeLog conflict: " rej |"cat 1>&2"
	  else {
	    # Prepend the new entry to the Changelog; the initial "cp -p"
	    # is used only to ensure that the existing file permissions
	    # are preserved (unfortunately, chmod --reference is not
	    # portable).
	    run_cmd("cp -p", changelog, new)
	    print new_entry >new
	    run_cmd("cat >>", new, changelog)
	    run_cmd("mv", new, changelog)
	    run_cmd("rm", rej, changelog ".orig")
	    print "* fixed ChangeLog conflict: " changelog
	  }
	  exit (failed ? 10 : 0)
	}
      ' "$rej" || exit_status=10
    fi
  else
    echo 1>&2 "$me: Unfixable ChangeLog conflict: " $rej
  fi
done

exit $exit_status

