#!/bin/sh
# query packages mentioned at
#  Packages column of https://docs.google.com/spreadsheets/d/1tApLhVqxRZ2VOuMH_aPUgFENQJfbLlB_PFH_Ah_q7hM/edit?ts=5eb957ba#gid=543782716
#
# TODO: diamond-aligner has duplicated result but should not

usage() {
cat >/dev/stderr <<EOT
Usage: $0
      -h this help screen
      -m forcing public mirror over local one

Description:
  Query UDD for packages belonging to a task of a specified blend.
  The output is sorted according to usage statistics and contains
  information about existence of an autopkgtest as well whether the
  interface might require X11 or not.
EOT
}

if [ ! $(which psql) ] ; then
    cat <<EOT
No PostgreSQL client providing /usr/bin/psql installed.
On a Debian system please
    sudo apt-get install postgresql-client-common
EOT
    exit 1
fi

SERVICE="service=udd"
#if there is a local UDD clone just use this
if psql $PORT -l 2>/dev/null | grep -qw udd ; then
    SERVICE=udd
fi

# Check UDD connection
if ! psql $PORT $SERVICE -c "" 2>/dev/null ; then
    echo "No local UDD found, use public mirror."
    export PGPASSWORD="udd-mirror"
    SERVICE="--host=udd-mirror.debian.net --port=5432 --username=udd-mirror udd"
    #SERVICE="postgresql://udd-mirror:udd-mirror@udd-mirror.debian.net/udd"
fi

while getopts "hjm" o; do
    case "${o}" in
        h)
            usage
            exit 0
            ;;
        m)
           export PGPASSWORD="udd-mirror"
           SERVICE="--host=udd-mirror.debian.net --port=5432 --username=udd-mirror udd"
           #SERVICE="postgresql://udd-mirror:udd-mirror@udd-mirror.debian.net/udd"
           ;;
        *)
            usage
            exit 1
            ;;
    esac
done

output="bio_covid-19_dependencies_result"

echo "Last-Update: `date -R`" > "$output"
echo >> "$output"

psql --quiet $SERVICE >>"$output" <<EOT
BEGIN;

CREATE TEMPORARY TABLE covid19_packages ( id SERIAL PRIMARY KEY, package text ) ;
INSERT INTO covid19_packages VALUES
(DEFAULT,'nextflow'),
(DEFAULT,'capsule'),
(DEFAULT,'snakemake'),
(DEFAULT,'artic'),
(DEFAULT,'bcbio'),
(DEFAULT,'pigx-rnaseq'),
(DEFAULT,'pigx-scrnaseq'),
(DEFAULT,'rosa'),
(DEFAULT,'scrnaseq'),
(DEFAULT,'shovill'),
(DEFAULT,'smartseq2'),
(DEFAULT,'viralcon'),
(DEFAULT,'nf-core'),
(DEFAULT,'abacas'),
(DEFAULT,'adamjava'),
(DEFAULT,'allelecount'),
(DEFAULT,'python3-anndata'),
(DEFAULT,'arvados'),
(DEFAULT,'r-other-ascat'),
(DEFAULT,'assembly-stats'),
(DEFAULT,'atropos'),
(DEFAULT,'augustus'),
(DEFAULT,'bamkit'),
(DEFAULT,'bandage'),
(DEFAULT,'libbbhash-dev'),
(DEFAULT,'bcftools'),
(DEFAULT,'bedtools'),
(DEFAULT,'biobambam2'),
(DEFAULT,'python3-biopython'),
(DEFAULT,'ncbi-blast+'), -- == blast
(DEFAULT,'bowtie2'),
(DEFAULT,'busco'),
(DEFAULT,'bustools'),
(DEFAULT,'busybee'),
(DEFAULT,'bwa'),
(DEFAULT,'canu'),
(DEFAULT,'cat-bat'),
(DEFAULT,'centrifuge'),
(DEFAULT,'changeo'),
(DEFAULT,'r-bioc-complexheatmap'),
(DEFAULT,'python3-cooler'),
(DEFAULT,'cutadapt'),
(DEFAULT,'python3-cyvcf2'),
(DEFAULT,'dextractor'),
(DEFAULT,'diamond-aligner'),
(DEFAULT,'python3-deeptools'),
(DEFAULT,'r-bioc-degreport'),
(DEFAULT,'drop-seq-tools'),
(DEFAULT,'r-bioc-edger'),
(DEFAULT,'fastp'),
(DEFAULT,'fastqc'),
(DEFAULT,'fieldbioinformatics'),
(DEFAULT,'filtlong'),
(DEFAULT,'flappie'),
(DEFAULT,'flash'),
(DEFAULT,'flye'),
(DEFAULT,'freebayes'),
(DEFAULT,'gatk'),
(DEFAULT,'python3-geneimpacts'),
(DEFAULT,'gffread'),
(DEFAULT,'python3-gffutils'),
(DEFAULT,'gindex'),
(DEFAULT,'gjh-asl-json'),
(DEFAULT,'tigr-glimmer'),
(DEFAULT,'grabix'),
(DEFAULT,'graphmap2'),
(DEFAULT,'bio-guppy'),
(DEFAULT,'h5sparse'),
(DEFAULT,'hisat2'),
(DEFAULT,'golang-github-biogo-hts-dev'),
(DEFAULT,'r-bioc-htsfilter'),
(DEFAULT,'libhts-dev'),
(DEFAULT,'python3-htseq'),
(DEFAULT,'iitii'),
(DEFAULT,'python3-intake'),
(DEFAULT,'ivar'),
(DEFAULT,'kallisto'),
(DEFAULT,'kmc'),
(DEFAULT,'kraken2'),
(DEFAULT,'radiant'),  -- == krona - Debian had to change the name
(DEFAULT,'lighter'),
(DEFAULT,'longshot'),
(DEFAULT,'r-cran-lsd'),
(DEFAULT,'lumpy-sv'),
(DEFAULT,'ncbi-magicblast'),
(DEFAULT,'manta'),
(DEFAULT,'python3-mappy'),
(DEFAULT,'marginalign'),
(DEFAULT,'mash'),
(DEFAULT,'masurca'),
(DEFAULT,'mcaller'),
(DEFAULT,'mecat2'),
(DEFAULT,'medaka'),
(DEFAULT,'megahit'),
(DEFAULT,'megalodon'),
(DEFAULT,'metabat'),
(DEFAULT,'metaspades'),
(DEFAULT,'minia'),
(DEFAULT,'minimap2'),
(DEFAULT,'minitour'),
(DEFAULT,'libmmap-allocator-dev'),
(DEFAULT,'mmmulti'),
(DEFAULT,'mosdepth'),
(DEFAULT,'multiqc'),
(DEFAULT,'muscle'),
(DEFAULT,'nanofilt'),
(DEFAULT,'nanolyse'),
(DEFAULT,'python3-nanomath'),
(DEFAULT,'nanoplot'),
(DEFAULT,'nanopolish'),
(DEFAULT,'nanook'),
(DEFAULT,'nanoraw'),
(DEFAULT,'nanosv'),
(DEFAULT,'python3-ncls'),
(DEFAULT,'ngmlr'),
(DEFAULT,'oncofuse'),
(DEFAULT,'openstructure'),
(DEFAULT,'optitype'),
(DEFAULT,'orca'),
(DEFAULT,'python3-pairtools'),
(DEFAULT,'parallel-fastq-dump'),
(DEFAULT,'python3-pauvre'),
(DEFAULT,'picard-tools'),
(DEFAULT,'picopore'),
(DEFAULT,'pinfish'),
(DEFAULT,'plasmidid'),
(DEFAULT,'pilon'),
(DEFAULT,'pomoxis'),
(DEFAULT,'pore'),
(DEFAULT,'pore-c'),
(DEFAULT,'porechop'),
(DEFAULT,'porejuicer'),
(DEFAULT,'poretools'),
(DEFAULT,'pplacer'),
(DEFAULT,'presto'),
(DEFAULT,'prinseq-lite'),
(DEFAULT,'prodigal'),
(DEFAULT,'prokka'),
(DEFAULT,'pybedtools-bin'),
(DEFAULT,'python3-pychopper'),
(DEFAULT,'pycoqc'),
(DEFAULT,'pyomo'),
(DEFAULT,'pyranges'),
(DEFAULT,'python3-pysam'),
(DEFAULT,'python3-vcf'),
(DEFAULT,'qcat'),
(DEFAULT,'qmean'),
(DEFAULT,'qsignature'),
(DEFAULT,'qualimap'),
(DEFAULT,'quast'),
(DEFAULT,'r-cran-alakazam'),
(DEFAULT,'r-cran-shazam'),
(DEFAULT,'r-cran-tigger'),
(DEFAULT,'readucks'),
(DEFAULT,'r-bioc-rsamtools'),
(DEFAULT,'sailfish'),
(DEFAULT,'salmon'),
(DEFAULT,'sambamba'),
(DEFAULT,'samblaster'),
(DEFAULT,'samclip'),
(DEFAULT,'samtools'),
(DEFAULT,'scrappie'),
(DEFAULT,'scribl'),
(DEFAULT,'sepp'),
(DEFAULT,'python3-seqcluster'),
(DEFAULT,'seqlib'),
(DEFAULT,'seqkit'),
(DEFAULT,'seqtk'),
(DEFAULT,'seqwish'),
(DEFAULT,'signalalign'),
(DEFAULT,'shasta'),
(DEFAULT,'skesa'),
(DEFAULT,'sniffles'),
(DEFAULT,'snpeff'),
(DEFAULT,'snpsift'),
(DEFAULT,'spades'),
(DEFAULT,'spoa'),
(DEFAULT,'sra-toolkit'),
(DEFAULT,'rna-star'),
(DEFAULT,'streamformatics'),
(DEFAULT,'strelka'),
(DEFAULT,'python3-streamz'),
(DEFAULT,'tabix'),
(DEFAULT,'tiddit'),
(DEFAULT,'tombo'),
(DEFAULT,'tophat-recondition'),
-- (DEFAULT,'tornado'),
(DEFAULT,'trim-galore'),
(DEFAULT,'trimmomatic'),
(DEFAULT,'tulip'),
(DEFAULT,'umap-learn'),
(DEFAULT,'unicycler'),
(DEFAULT,'varscan'),
(DEFAULT,'vcfanno'),
(DEFAULT,'golang-github-brentp-vcfgo-dev'),
(DEFAULT,'vcftools'),
(DEFAULT,'velvet'),
(DEFAULT,'ensembl-vep'),
(DEFAULT,'vg'),
(DEFAULT,'vienna-rna'),
(DEFAULT,'vsearch'),
(DEFAULT,'vt'),
(DEFAULT,'wham-align'),
(DEFAULT,'yanagiba'),
(DEFAULT,'yanosim'),
(DEFAULT,'libargs-dev'),
(DEFAULT,'libatomicqueue-dev'),
(DEFAULT,'libatomicbitvector-dev'),
(DEFAULT,'libconcurrentqueue-dev'),
(DEFAULT,'tao-json'),
(DEFAULT,'python3-h5py'),
(DEFAULT,'python3-numpy'),
(DEFAULT,'python3-pandas'),
(DEFAULT,'python3-pyarrow'),
(DEFAULT,'python3-sklearn'),
(DEFAULT,'python3-toolz'),
(DEFAULT,'python3-tornado'),
(DEFAULT,'python3-tqdm'),
(DEFAULT,'libxenium-dev'),
(DEFAULT,'xonsh'),
(DEFAULT,'golang-github-shenwei356-xopen-dev'),
(DEFAULT,'python3-yaml')
;

SELECT co.package, architectures, s.section,
   CASE WHEN r.releases IS NULL THEN
        CASE WHEN n.component IS NULL THEN
             CASE WHEN vcs.vcs_browser IS NULL THEN '' ELSE vcs.vcs_browser END
           ELSE 'new' END
     ELSE r.releases
     END AS releases,
     conda.conda,
     bt.biotools AS "bio.tools",
     t.testsuite
   FROM covid19_packages co
  LEFT JOIN (
    SELECT package, string_agg(architecture, ',') AS architectures FROM (
      SELECT DISTINCT package, architecture FROM packages WHERE release in ('sid') AND
        architecture IN ('all', 'amd64', 'arm64', 'ppc64el' ) AND
        package IN (SELECT package FROM covid19_packages)
      ORDER BY package, architecture
      ) tmp GROUP BY package
    ) p ON p.package = co.package
  LEFT JOIN (
   SELECT DISTINCT package, CASE WHEN substring(section from 0 for 10) = 'non-free/' THEN 'non-free' ELSE 'main' END AS section FROM packages WHERE release in ('sid') AND
        package IN (SELECT package FROM covid19_packages)
    ) s ON s.package = co.package
  LEFT JOIN (
    SELECT package, string_agg(release, ',') AS releases FROM (
      SELECT DISTINCT p.package, p.release, s.sort FROM packages p
         JOIN releases s ON p.release = s.release
         WHERE p.release in ('sid', 'bullseye', 'buster') AND
          package IN (SELECT package FROM covid19_packages)
      ORDER BY p.package, s.sort, p.release
      ) tmp GROUP BY package
    ) r ON r.package = co.package
  LEFT JOIN (SELECT package, component FROM new_packages) n ON n.package = co.package
  LEFT JOIN (SELECT package, vcs_browser FROM blends_prospectivepackages) vcs ON vcs.package = co.package
  LEFT JOIN (SELECT DISTINCT package, entry AS conda FROM (
               SELECT p.source, p.package, entry FROM registry r
                 JOIN (SELECT DISTINCT * FROM (
                         SELECT source, package FROM packages WHERE release in ('sid')
                         UNION
                         SELECT source, package FROM new_packages
                         UNION
                         SELECT source, package FROM blends_prospectivepackages
                        ) pkgs
                      ) p ON r.source = p.source
                WHERE name = 'conda:bioconda') tmp) conda ON conda.package = co.package
  LEFT JOIN (SELECT DISTINCT package, entry AS biotools FROM (
               SELECT p.source, p.package, entry FROM registry r
                 JOIN (SELECT DISTINCT source, package FROM packages WHERE release in ('sid')) p ON r.source = p.source
                WHERE name = 'bio.tools') tmp) bt ON bt.package = co.package
  LEFT OUTER JOIN (SELECT DISTINCT p.package, p.source, s.testsuite FROM 
                    (SELECT DISTINCT source, package, release FROM packages WHERE release ='sid') p
                    LEFT JOIN (SELECT source, testsuite FROM -- avoid duplicates when having instances with and without testsuite for different architectures
    			(SELECT source, testsuite, row_number() OVER (PARTITION BY source ORDER BY testsuite) FROM sources WHERE release = 'sid' ) tmp
    			WHERE row_number= 1) s ON p.source = s.source ) t ON co.package  = t.package
  ORDER BY id
  ;
END;
EOT
