#!/bin/sh
set -o errexit
set -o nounset

if [ "$#" != '0' ]
then
	cat 1>&2 <<EOF
Usage: $0

Parses bids-schema.spec in the same directory as this script; downloads test
data and produces two archives, filtered to include only selected datasets with
audited license terms, in the current working directory.
EOF
	exit 1
fi

OUTDIR="${PWD}"
TMPDIR="$(mktemp -d)"
trap "rm -rf '${TMPDIR}'" INT TERM EXIT

SPEC="$(cd "$(dirname "${0}")"; echo "${PWD}")/bids-schema.spec"
MACROS="$(grep -E '^%global' "${SPEC}")"
get_macro()
{
  rpm -E "${MACROS}
%{${1}}"
}

EX_URL="$(get_macro examples_url)"
EX_COMMIT="$(get_macro examples_commit)"
EX_LIST="$(get_macro examples)"
ERREX_URL="$(get_macro error_examples_url)"
ERREX_COMMIT="$(get_macro error_examples_commit)"
ERREX_LIST="$(get_macro error_examples)"

cd "${TMPDIR}"
EX_ARCHIVE="bids-examples-${EX_COMMIT}.tar.gz"
EX_DL="${EX_URL}/archive/${EX_COMMIT}/${EX_ARCHIVE}"
echo "--> Downloading: ${EX_DL}" 1>&2
curl -L -O "${EX_DL}"
ERREX_ARCHIVE="bids-error-examples-${ERREX_COMMIT}.tar.gz"
ERREX_DL="${ERREX_URL}/archive/${ERREX_COMMIT}/${ERREX_ARCHIVE}"
echo "--> Downloading: ${ERREX_DL}" 1>&2
curl -L -O "${ERREX_DL}"

echo "--> Extracting: ${EX_ARCHIVE})" 1>&2
tar -xzf "${EX_ARCHIVE}"
echo "--> Extracting: ${ERREX_ARCHIVE})" 1>&2
tar -xzf "${ERREX_ARCHIVE}"
echo '--> Removing all but “whitelisted” datasets' 1>&2
EX_ARCHDIR="$(basename "${EX_ARCHIVE}" '.tar.gz')"
ERREX_ARCHDIR="$(basename "${ERREX_ARCHIVE}" '.tar.gz')"
mv "${EX_ARCHDIR}" "${EX_ARCHDIR}-original"
mv "${ERREX_ARCHDIR}" "${ERREX_ARCHDIR}-original"
mkdir "${EX_ARCHDIR}" "${ERREX_ARCHDIR}"
for ds in ${EX_LIST}
do
  mv "${EX_ARCHDIR}-original/${ds}" "${EX_ARCHDIR}"
done
for ds in ${ERREX_LIST}
do
  mv "${ERREX_ARCHDIR}-original/${ds}" "${ERREX_ARCHDIR}"
done
# Restore the original mtimes
touch -r "${EX_ARCHDIR}-original" "${EX_ARCHDIR}"
touch -r "${ERREX_ARCHDIR}-original" "${ERREX_ARCHDIR}"

rearchive()
{
  ARCHDIR="${1}"
  FILTERED="${2}"
  echo "--> Re-archiving: ${FILTERED}" 1>&2
  # https://www.gnu.org/software/tar/manual/html_section/Reproducibility.html
  TZ=UTC LC_ALL=C tar \
      --create \
      --sort=name \
      --format=posix \
      --numeric-owner --owner=0 --group=0 \
      --mode=go+u,go-w \
      --pax-option='delete=atime,delete=ctime' \
      "${ARCHDIR}/" |
    zstdmt --ultra -22 > "${FILTERED}"
}

EX_FILTERED="${EX_ARCHDIR}-filtered.tar.zst"
ERREX_FILTERED="${ERREX_ARCHDIR}-filtered.tar.zst"
rearchive "${EX_ARCHDIR}" "${EX_FILTERED}"
rearchive "${ERREX_ARCHDIR}" "${ERREX_FILTERED}"

mv "${EX_FILTERED}" "${ERREX_FILTERED}" "${OUTDIR}"
echo 'Done.' 1>&2
