#!/bin/sh

if [ "$#" -eq 0 ]
then
  echo "Must supply path to archive files"
  exit 1
fi

archive="$1"
shift

native="$archive"

osname=`uname -s | sed -e 's/_NT-.*$/_NT/; s/^MINGW[0-9]*/CYGWIN/'`
if [ "$osname" = "CYGWIN_NT" -a -x /bin/cygpath ]
then
  archive=`cygpath -w "$archive"`
fi

archive=${archive%/}
native=${native%/}

deleteCitations() {
  inp="$1"
  pmidlist=.TO-REPORT
  delenda=.TO-DELETE
  cat "$inp" |
  xtract -pattern DeleteCitation -block PMID -tab "\n" -sep "." -element "PMID" |
  sort -n | uniq > $pmidlist
  cat "$pmidlist" |
  rchive -trie -gzip |
  sort -n | uniq > $delenda
  if [ -s $delenda ]
  then
    (cd "$native" && xargs rm -f) < $delenda
  fi
  if [ -s $pmidlist ]
  then
    cat "$pmidlist" >> "$native/deleted.uid"
  fi
  rm $pmidlist
  rm $delenda
}

reportVersioned() {
  inp="$1"
  pmidlist=.TO-REPORT
  xtract -input "$inp" -pattern PubmedArticle \
    -block MedlineCitation/PMID -if "@Version" -gt 1 -element "PMID" |
  sort -n | uniq > $pmidlist
  if [ -s $pmidlist ]
  then
    cat "$pmidlist" >> "$native/versioned.uid"
  fi
  rm $pmidlist
}

rm -f "versioned.xml.gz"
rm -f "versioned.snt"

needToReport=true

for fl in *.xml.gz
do
  base=${fl%.xml.gz}
  if [ -f "$base.snt" ]
  then
    continue
  fi
  secnds_start=$(date "+%s")
  echo "$base.xml"
  gunzip -c "$fl" |
  transmute -compress -strict -wrp PubmedArticleSet \
    -pattern "PubmedArticleSet/*" -format > "$base.xml"
  rchive -gzip -input "$base.xml" -archive "$archive" \
    -index MedlineCitation/PMID^Version -pattern PubmedArticle
  deleteCitations "$base.xml"
  reportVersioned "$base.xml"
  touch "$base.snt"
  rm "$base.xml"
  secnds_end=$(date "+%s")
  secnds=$((secnds_end - secnds_start))
  echo "$secnds seconds"
  if [ "$needToReport" = true ]
  then
    if [ "$secnds" -gt 100 ]
    then
      echo ""
      echo "ARCHIVING IS SLOWER THAN EXPECTED."
      echo ""
      echo "PLEASE ENSURE THAT ANTIVIRUS SCANNING AND CONTENT INDEXING ARE DISABLED,"
      echo "AND THAT TRIM SUPPORT IS ENABLED FOR THE SOLID STATE DRIVE."
      echo ""
      needToReport=false
    fi
  fi
done
