From 94719df050f86c4792871391b894f6b596fd39f4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman <r.h.p.vorderman@lumc.nl> Date: Mon, 23 Jul 2018 13:35:25 +0200 Subject: [PATCH] ncbi to 1.0 --- ncbi.wdl | 105 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 56 insertions(+), 49 deletions(-) diff --git a/ncbi.wdl b/ncbi.wdl index f32959e..4978a59 100644 --- a/ncbi.wdl +++ b/ncbi.wdl @@ -1,41 +1,44 @@ -task genomeDownload { - String outputPath - String? section = "refseq" - String? format = "all" - String? assemblyLevel = "all" - String? taxId - String? refseqCategory - Boolean? humanReadable - String? ncbiBaseUri - Int? parallel - Int? retries - Boolean? verbose=true - Boolean? debug - String? domain = "all" +version 1.0 - String? executable = "ncbi-genome-download" - String? preCommand +task genomeDownload { + input { + String outputPath + String? section = "refseq" + String? format = "all" + String? assemblyLevel = "all" + String? taxId + String? refseqCategory + Boolean? humanReadable + String? ncbiBaseUri + Int? parallel + Int? retries + Boolean? verbose=true + Boolean? debug + String? domain = "all" + String? executable = "ncbi-genome-download" + String? preCommand + } command { set -e -o pipefail - ${preCommand} - ${executable} \ - ${"--section " + section} \ - ${"--format " + format} \ - ${"--assembly-level " + assemblyLevel } \ - ${"--taxid " + taxId } \ - ${"--refseq-category " + refseqCategory} \ - ${"--output-folder " + outputPath } \ - ${true="--human-readable" false="" humanReadable} \ - ${"--uri " + ncbiBaseUri } \ - ${"--parallel " + parallel } \ - ${"--retries " + retries } \ - ${true="--verbose" false="" verbose } \ - ${true="--debug" false ="" debug } \ - ${domain} + ~{preCommand} + ~{executable} \ + ~{"--section " + section} \ + ~{"--format " + format} \ + ~{"--assembly-level " + assemblyLevel } \ + ~{"--taxid " + taxId } \ + ~{"--refseq-category " + refseqCategory} \ + ~{"--output-folder " + outputPath } \ + ~{true="--human-readable" false="" humanReadable} \ + ~{"--uri " + ncbiBaseUri } \ + ~{"--parallel " + parallel } \ + ~{"--retries " + retries } \ + ~{true="--verbose" false="" verbose } \ + ~{true="--debug" false ="" debug } \ + ~{domain} # Check md5sums for all downloaded files - for folder in $(realpath ${outputPath})/*/*/* + for folder in $(realpath ~{outputPath})/*/*/* do ( md5sums="$( @@ -69,20 +72,22 @@ task genomeDownload { task downloadNtFasta{ - String libraryPath - String seqTaxMapPath - Boolean? unzip = true - String ntDir = libraryPath + "/nt" - String ntFilePath = ntDir + "/nt.fna" + input { + String libraryPath + String seqTaxMapPath + Boolean? unzip = true + String ntDir = libraryPath + "/nt" + String ntFilePath = ntDir + "/nt.fna" + } command { set -e -o pipefail - mkdir -p ${ntDir} - rsync -av --partial rsync://ftp.ncbi.nih.gov/blast/db/FASTA/nt.gz* ${ntDir} - (cd ${ntDir} && md5sum -c nt.gz.md5) + mkdir -p ~{ntDir} + rsync -av --partial rsync://ftp.ncbi.nih.gov/blast/db/FASTA/nt.gz* ~{ntDir} + (cd ~{ntDir} && md5sum -c nt.gz.md5) # Only unzip when necessary - if ${true='true' false='false' unzip} + if ~{true='true' false='false' unzip} then - zcat ${ntDir}/nt.gz > ${ntFilePath} + zcat ~{ntDir}/nt.gz > ~{ntFilePath} fi } output { @@ -96,16 +101,18 @@ task downloadNtFasta{ } task downloadAccessionToTaxId { - String downloadDir - Boolean gzip = false + input { + String downloadDir + Boolean gzip = false + } command { set -e -o pipefail - mkdir -p ${downloadDir} - rsync -av --partial rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* ${downloadDir} - (cd ${downloadDir} && md5sum -c *.md5) - for file in ${downloadDir}/nucl_*.accession2taxid.gz + mkdir -p ~{downloadDir} + rsync -av --partial rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* ~{downloadDir} + (cd ~{downloadDir} && md5sum -c *.md5) + for file in ~{downloadDir}/nucl_*.accession2taxid.gz do - zcat $file | tail -n +2 | cut -f 2,3 ${true="| gzip " false='' gzip}> $file.seqtaxmap${true='.gz' false='' gzip} + zcat $file | tail -n +2 | cut -f 2,3 ~{true="| gzip " false='' gzip}> $file.seqtaxmap~{true='.gz' false='' gzip} done } output { -- GitLab