#!/bin/sh

# ===========================================================================
#
#                            PUBLIC DOMAIN NOTICE
#            National Center for Biotechnology Information (NCBI)
#
#  This software/database is a "United States Government Work" under the
#  terms of the United States Copyright Act.  It was written as part of
#  the author's official duties as a United States Government employee and
#  thus cannot be copyrighted.  This software/database is freely available
#  to the public for use. The National Library of Medicine and the U.S.
#  Government do not place any restriction on its use or reproduction.
#  We would, however, appreciate having the NCBI and the author cited in
#  any work or product based on this material.
#
#  Although all reasonable efforts have been taken to ensure the accuracy
#  and reliability of the software and data, the NLM and the U.S.
#  Government do not and cannot warrant the performance or results that
#  may be obtained by using this software or data. The NLM and the U.S.
#  Government disclaim all warranties, express or implied, including
#  warranties of performance, merchantability or fitness for any particular
#  purpose.
#
# ===========================================================================
#
# File Name:  epost
#
# Author:  Jonathan Kans, Aaron Ucko
#
# Version Creation Date:   06/25/20
#
# ==========================================================================

pth=$( dirname "$0" )

# conditionally execute original Perl implementation

PERL=""

internal=no
while [ "$#" -ne 0 ]
do
  case "$1" in
    -internal )
      internal=yes
      shift
      ;;
    -newmode )
      USE_NEW_EDIRECT=1
      shift
      ;;
    -oldmode )
      USE_NEW_EDIRECT=0
      shift
      ;;
    * )
      break
      ;;
  esac
done
if [ "$internal" = yes ]
then
  set _ -internal "$@"
  shift
fi

if [ ! -f "$pth"/ecommon.sh ]
then
  USE_NEW_EDIRECT=false
fi

case "${USE_NEW_EDIRECT}" in
  "" | [FfNn]* | 0 | [Oo][Ff][Ff] )
    # set PERL path if using old EDirect
    PERL=perl
    case "$( uname -s )" in
      CYGWIN_NT* )
        # Use a negative match here because the shell treats 0 as success.
        if perl -e 'exit $^O !~ /^MSWin/'; then
           pth=$( cygpath -w "$pth" )
        fi
        ;;
      Darwin )
        PERL="/usr/bin/perl"
        ;;
    esac
    ;;
  * )
    ;;
esac

if [ -n "${PERL}" ]
then
  exec "${PERL}" "$pth"/edirect.pl -post "$@"
  exit 0
fi

# handle common flags - dot command is equivalent of "source"

. "$pth"/ecommon.sh

# help text

PrintHelp() {

  echo "epost $version"
  cat << "EOF"

  -db        Database name
  -id        Unique identifier(s) or accession number(s)
  -format    uid or acc
  -input     Read identifier(s) from file instead of stdin

EOF
}

# check for isHelp flag

if [ "$isHelp" = true ]
then
  PrintHelp
  exit 0
fi

# initialize specific flags

format=""

# read command-line arguments

while [ $# -gt 0 ]
do
  case "$1" in
    -db )
      shift
      if [ $# -gt 0 ]
      then
        db="$1"
        shift
      else
        echo "ERROR: Missing -db argument" >&2
        exit 1
      fi
      ;;
    -id )
      shift
      if [ $# -gt 0 ]
      then
        ids="$1"
        shift
      else
        echo "ERROR: Missing -id argument" >&2
        exit 1
      fi
      while [ $# -gt 0 ]
      do
        case "$1" in
          -* )
            break
            ;;
          * )
            # concatenate run of UIDs with commas
            ids="$ids,$1"
            shift
            ;;
        esac
      done
      ;;
    -format )
      # no longer needed to identify non-numeric accessions, provided for backward compatibility
      shift
      if [ $# -gt 0 ]
      then
        format="$1"
        shift
      else
        echo "ERROR: Missing -format argument" >&2
        exit 1
      fi
      ;;
    -h | -help | --help )
      PrintHelp
      exit 0
      ;;
    -* )
      ParseCommonArgs "$@"
      if [ "$argsConsumed" -gt 0 ]
      then
        shift "$argsConsumed"
      else
        echo "ERROR: Unrecognized option $1" >&2
        exit 1
      fi
      ;;
    * )
      # allows while loop to check for multiple flags
      break
      ;;
  esac
done

FinishSetup

# check for missing database argument

if [ -z "$db" ]
then
  echo "ERROR: Missing -db argument" >&2
  exit 1
fi

# check for piped UIDs unless database and UIDs provided in command line

if [ -z "$ids" ] && [ -z "$input" ]
then
  ParseStdin
fi

# needHistory allows reuse of GenerateUidList

if [ -z "$ids$rest$input" ]
then
  needHistory=true
fi

# take database from WebEnv value or -db argument

if [ -z "$dbase" ]
then
  dbase="$db"
fi

# check for missing required arguments

if [ -z "$dbase" ]
then
  echo "ERROR: Missing -db argument" >&2
  exit 1
fi

# helper function adds post-specific arguments (if set)

RunWithPostArgs() {

  if [ "$log" = true ]
  then
    printf "." >&2
  fi

  AddIfNotEmpty -WebEnv "$web_env" \
  RunWithCommonArgs "$@"
}

# convert spaces between UIDs to commas

ids=$( echo "$ids" | sed -e "s/ /,/g; s/,,*/,/g" )

# special case for accessions in assembly

AccessionQuery() {

  GenerateUidList "$dbase" |
  while read uid
  do
    notInteger=$( echo "$uid" | sed -e 's/[0-9.]//g' )
    if [ -n "$notInteger" ]
    then
      echo "$uid+[ACCN]"
    else
      echo "$uid+[UID]"
    fi
  done |
  join-into-groups-of 10000 |
  sed -e 's/,/ OR /g' |
  tr '+' ' '
}

if [ "$dbase" = "assembly" ] && [ "$needHistory" = false ]
then
  query=$( AccessionQuery )
  esearch -db "$dbase" -query "$query"

  exit 0
fi

# post to history in groups, join if necessary with esearch

wb="$web_env"

PostInGroups() {

  if [ "$log" = true ]
  then
    printf "EPost\n" >&2
  fi

  GenerateUidList "$dbase" |
  join-into-groups-of 10000 |
  while read uids
  do
    err=""
    res=$( RunWithPostArgs nquire -url "$base" epost.fcgi -db "$dbase" -id "$uids" )

    if [ -n "$res" ]
    then
      qry_key=""
      ParseMessage "$res" ePostResult web_env WebEnv qry_key QueryKey

      if [ -n "$err" ]
      then
        echo "ERROR: epost failed - $err" >&2
        exit 1
      fi
      if [ -z "$web_env" ]
      then
        echo "WebEnv value not found in epost output - WebEnv1 $wb"
        exit 1
      fi
      if [ -n "$wb" ] && [ "$web_env" != "$wb" ]
      then
        echo "WebEnv mismatch in epost output - WebEnv1 $wb, WebEnv2 $web_env"
        exit 1
      fi

      WriteEDirectStep "$dbase" "$web_env" "$qry_key" "$err"
    fi
  done

  if [ "$log" = true ]
  then
    printf "\n" >&2
  fi
}

psts=$( PostInGroups )

if [ -n "$psts" ]
then
  # extract first database and webenv values, and all key numbers
  comps=$( echo "$psts" | xtract -wrp Set,Rec -pattern ENTREZ_DIRECT \
           -wrp Web -element WebEnv -wrp Key -element QueryKey )

  wbnv=$( echo "$comps" | xtract -pattern Set -first Web )
  qrry=$( echo "$comps" | xtract -pattern Set -block Rec -pfx "(#" -sfx ")" -tab " OR " -element Key )

  err=""
  num=""
  # send search command, e.g, "(#1) OR (#2)", along with database and web environment
  srch=$( RunWithCommonArgs nquire -get "$base" esearch.fcgi -db "$dbase" \
          -WebEnv "$wbnv" -term "$qrry" -retmax 0 -usehistory y )

  if [ -n "$srch" ]
  then
    res=$( echo "$srch" | sed -e 's|<TranslationStack>.*</TranslationStack>||' )
    ParseMessage "$srch" eSearchResult web_env WebEnv qry_key QueryKey num Count
  fi

  WriteEDirect "$dbase" "$web_env" "$qry_key" "$num" "$stp" "$err"

  exit 0
fi

# warn on error

echo "ERROR: EPost failure" >&2
exit 1
