From 94719df050f86c4792871391b894f6b596fd39f4 Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Mon, 23 Jul 2018 13:35:25 +0200
Subject: [PATCH] ncbi to 1.0

---
 ncbi.wdl | 105 +++++++++++++++++++++++++++++--------------------------
 1 file changed, 56 insertions(+), 49 deletions(-)

diff --git a/ncbi.wdl b/ncbi.wdl
index f32959e..4978a59 100644
--- a/ncbi.wdl
+++ b/ncbi.wdl
@@ -1,41 +1,44 @@
-task genomeDownload {
-    String outputPath
-    String? section = "refseq"
-    String? format = "all"
-    String? assemblyLevel = "all"
-    String? taxId
-    String? refseqCategory
-    Boolean? humanReadable
-    String? ncbiBaseUri
-    Int? parallel
-    Int? retries
-    Boolean? verbose=true
-    Boolean? debug
-    String? domain = "all"
+version 1.0
 
-    String? executable = "ncbi-genome-download"
-    String? preCommand
+task genomeDownload {
+    input {
+        String outputPath
+        String? section = "refseq"
+        String? format = "all"
+        String? assemblyLevel = "all"
+        String? taxId
+        String? refseqCategory
+        Boolean? humanReadable
+        String? ncbiBaseUri
+        Int? parallel
+        Int? retries
+        Boolean? verbose=true
+        Boolean? debug
+        String? domain = "all"
 
+        String? executable = "ncbi-genome-download"
+        String? preCommand
+    }
     command {
         set -e -o pipefail
-        ${preCommand}
-        ${executable} \
-        ${"--section " + section} \
-        ${"--format " + format} \
-        ${"--assembly-level " + assemblyLevel } \
-        ${"--taxid " + taxId } \
-        ${"--refseq-category " + refseqCategory} \
-        ${"--output-folder " + outputPath } \
-        ${true="--human-readable" false="" humanReadable} \
-        ${"--uri " + ncbiBaseUri } \
-        ${"--parallel " + parallel } \
-        ${"--retries " + retries } \
-        ${true="--verbose" false="" verbose } \
-        ${true="--debug" false ="" debug } \
-        ${domain}
+        ~{preCommand}
+        ~{executable} \
+        ~{"--section " + section} \
+        ~{"--format " + format} \
+        ~{"--assembly-level " + assemblyLevel } \
+        ~{"--taxid " + taxId } \
+        ~{"--refseq-category " + refseqCategory} \
+        ~{"--output-folder " + outputPath } \
+        ~{true="--human-readable" false="" humanReadable} \
+        ~{"--uri " + ncbiBaseUri } \
+        ~{"--parallel " + parallel } \
+        ~{"--retries " + retries } \
+        ~{true="--verbose" false="" verbose } \
+        ~{true="--debug" false ="" debug } \
+        ~{domain}
 
         # Check md5sums for all downloaded files
-        for folder in $(realpath ${outputPath})/*/*/*
+        for folder in $(realpath ~{outputPath})/*/*/*
             do
                 (
                 md5sums="$(
@@ -69,20 +72,22 @@ task genomeDownload {
 
 
 task downloadNtFasta{
-    String libraryPath
-    String seqTaxMapPath
-    Boolean? unzip = true
-    String ntDir = libraryPath + "/nt"
-    String ntFilePath = ntDir + "/nt.fna"
+    input {
+        String libraryPath
+        String seqTaxMapPath
+        Boolean? unzip = true
+        String ntDir = libraryPath + "/nt"
+        String ntFilePath = ntDir + "/nt.fna"
+    }
     command {
         set -e -o pipefail
-        mkdir -p ${ntDir}
-        rsync -av --partial rsync://ftp.ncbi.nih.gov/blast/db/FASTA/nt.gz* ${ntDir}
-        (cd ${ntDir} && md5sum -c nt.gz.md5)
+        mkdir -p ~{ntDir}
+        rsync -av --partial rsync://ftp.ncbi.nih.gov/blast/db/FASTA/nt.gz* ~{ntDir}
+        (cd ~{ntDir} && md5sum -c nt.gz.md5)
         # Only unzip when necessary
-        if ${true='true' false='false' unzip}
+        if ~{true='true' false='false' unzip}
         then
-            zcat ${ntDir}/nt.gz > ${ntFilePath}
+            zcat ~{ntDir}/nt.gz > ~{ntFilePath}
         fi
         }
     output {
@@ -96,16 +101,18 @@ task downloadNtFasta{
 }
 
 task downloadAccessionToTaxId {
-    String downloadDir
-    Boolean gzip = false
+    input {
+        String downloadDir
+        Boolean gzip = false
+    }
     command {
         set -e -o pipefail
-        mkdir -p ${downloadDir}
-        rsync -av --partial rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* ${downloadDir}
-        (cd ${downloadDir} && md5sum -c *.md5)
-        for file in ${downloadDir}/nucl_*.accession2taxid.gz
+        mkdir -p ~{downloadDir}
+        rsync -av --partial rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* ~{downloadDir}
+        (cd ~{downloadDir} && md5sum -c *.md5)
+        for file in ~{downloadDir}/nucl_*.accession2taxid.gz
         do
-            zcat $file | tail -n +2 | cut -f 2,3 ${true="| gzip " false='' gzip}> $file.seqtaxmap${true='.gz' false='' gzip}
+            zcat $file | tail -n +2 | cut -f 2,3 ~{true="| gzip " false='' gzip}> $file.seqtaxmap~{true='.gz' false='' gzip}
         done
         }
     output {
-- 
GitLab