#!/bin/sh

# ===========================================================================
#
#                            PUBLIC DOMAIN NOTICE
#            National Center for Biotechnology Information (NCBI)
#
#  This software/database is a "United States Government Work" under the
#  terms of the United States Copyright Act.  It was written as part of
#  the author's official duties as a United States Government employee and
#  thus cannot be copyrighted.  This software/database is freely available
#  to the public for use. The National Library of Medicine and the U.S.
#  Government do not place any restriction on its use or reproduction.
#  We would, however, appreciate having the NCBI and the author cited in
#  any work or product based on this material.
#
#  Although all reasonable efforts have been taken to ensure the accuracy
#  and reliability of the software and data, the NLM and the U.S.
#  Government do not and cannot warrant the performance or results that
#  may be obtained by using this software or data. The NLM and the U.S.
#  Government disclaim all warranties, express or implied, including
#  warranties of performance, merchantability or fitness for any particular
#  purpose.
#
# ===========================================================================
#
# File Name:  xplore
#
# Author:  Jonathan Kans
#
# Version Creation Date:   2/6/19
#
# ==========================================================================

#  Experimental front-end for expanding Entrez Direct style of command-line
#  navigation to external resources.

#  Initial implementation uses services created and maintained by BioThings.io.

#  Thanks to Kevin Xin for helpful conversations, and to the Su and Wu labs at
#  Scripps Research for devising and building the BioThings infrastructure.

#  BioThings references:
#
#  Xin J, Afrasiabi C, Lelong S, Adesara J, Tsueng G, Su AI, Wu C.
#  Cross-linking BioThings APIs through JSON-LD to facilitate knowledge exploration.
#  BMC Bioinformatics. 2018 Feb 1;19(1):30.
#  DOI: 10.1186/s12859-018-2041-5.
#  PMID: 29390967
#
#  Xin J, Mark A, Afrasiabi C, Tsueng G, Juchler M, Gopal N, Stupp GS, Putman TE,
#  Ainscough BJ, Griffith OL, Torkamani A, Whetzel PL, Mungall CJ, Mooney SD, Su AI,
#  and Wu C.
#  High-performance web services for querying gene and variant annotation.
#  Genome Biol. 2016 May 6;17(1):91.
#  DOI: 10.1186/s13059-016-0953-9.
#  PMID: 27154141

#  BioThings software repository:
#
#  https://github.com/biothings

#  This is still a work in progress. Obvious future improvements include:
#
#  - automatic retry in case of transient network failure
#  - expand shortcut arguments beyond -organism and -action
#  - closer integration with EDirect, e.g., epost accepting the ENTREZ_EXTEND message
#  - implementing xplore -save xml for direct access to underlying source databases
#  - expansion to other consolidated data resources (e.g., Monarch Initiative)
#  - automate discovery of resources and connections using SmartAPI

dir=`dirname "$0"`
case "`uname -s`" in
 CYGWIN_NT*)
   # Use a negative match here because the shell treats 0 as success.
   if perl -e 'exit $^O !~ /^MSWin/'; then
      dir=`cygpath -w "$dir"`
   fi
   ;;
esac

if [ "$#" -lt 1 ]
then
  echo "Must supply command argument"
  exit 1
fi

cmd="$1"
shift

DoPrepare() {

  for db in gene variant chem
  do
    tbl=$(nquire -get "https://my$db.info/context/context.json" | transmute -j2x)
    echo "$tbl" | xtract -pattern "_context" -block "_context/*" -tab "\n" -element "?" |
    while read path
    do
      echo "$tbl" |
      xtract -pattern "_context" -sep "\n" \
        -element "$path" -lbl "$path"
    done |
    while read type path
    do
      type=$(echo "${type%/}")
      type=$(echo "${type##*/}")
      echo "$db\t$type\t$path"
    done
  done |
  sort -k 1,1f -k 2,2f -k 3,3f > "$dir"/bt-context.txt
}

# allow -prepare and -help before testing for presence of context file

case "$cmd" in
  -prepare )
    DoPrepare
    exit 0
    ;;
  -version )
    version=$( einfo -version )
    echo "$version"
    exit 0
    ;;
  -h | -help | --help )
  version=$( einfo -version )
  cat <<EOF
xplore $version

Commands

  -link
  -load
  -save
  -search

  -version
  -help

Examples

  xplore -load ncbigene 2652

  echo WP455 | xplore -load wikipathway

  xplore -search ncbigene -query "symbol:OPN1MW AND taxid:9606"
  xplore -search ncbigene -query "symbol:OPN1MW" -organism "homo sapiens"
  xplore -search ncbigene -query "symbol:OPN1MW" -organism human
  xplore -search ncbigene -query "symbol:OPN1MW" -organism 9606

  xplore -search hgvs -query rs58991260

  xplore -search inchikey -query "drugbank.targets.uniprot:P05231 AND drugbank.targets.actions:inhibitor"
  xplore -search inchikey -query "drugbank.targets.uniprot:P05231" -action inhibitor

  xplore -save uid

  xplore -save xml (not yet implemented)

  xplore -search ncbigene -query "symbol:OPN1MW" -organism human |
  xplore -link wikipathways |
  xplore -link ncbigene |
  xplore -save uid

  esearch -db gene -query "OPN1MW [PREF]" -organism human |
  elink -target biosystems |
  efilter -pathway wikipathways |
  elink -target gene |
  efetch -format uid |
  sort -n

  xplore -load hgvs "chr6:g.26093141G>A,chr12:g.111351981C>T" |
  xplore -link ncbigene |
  xplore -link wikipathways |
  xplore -link ncbigene |
  xplore -link uniprot |
  xplore -link inchikey

EOF
    exit 0
    ;;
  * )
    break
    ;;
esac

if [ ! -f "$dir/bt-context.txt" ]
then
  echo "You must first run xplore -prepare to create the bt-context.txt file"
  exit 1
fi

case "$cmd" in
  -link )
    "$dir"/bt-link "$@"
    exit 0
    ;;
  -load )
    "$dir"/bt-load "$@"
    exit 0
    ;;
  -save )
    "$dir"/bt-save "$@"
    exit 0
    ;;
  -search )
    "$dir"/bt-srch "$@"
    exit 0
    ;;
  * )
    exec >&2
    echo "$0: Unrecognized command $cmd"
    exit 1
    ;;
esac
