Skip to content
Snippets Groups Projects
Commit 94719df0 authored by Ruben Vorderman's avatar Ruben Vorderman
Browse files

ncbi to 1.0

parent ab88bc1a
No related branches found
No related tags found
1 merge request!34Move all files to version 1.0
task genomeDownload { version 1.0
String outputPath
String? section = "refseq"
String? format = "all"
String? assemblyLevel = "all"
String? taxId
String? refseqCategory
Boolean? humanReadable
String? ncbiBaseUri
Int? parallel
Int? retries
Boolean? verbose=true
Boolean? debug
String? domain = "all"
String? executable = "ncbi-genome-download" task genomeDownload {
String? preCommand input {
String outputPath
String? section = "refseq"
String? format = "all"
String? assemblyLevel = "all"
String? taxId
String? refseqCategory
Boolean? humanReadable
String? ncbiBaseUri
Int? parallel
Int? retries
Boolean? verbose=true
Boolean? debug
String? domain = "all"
String? executable = "ncbi-genome-download"
String? preCommand
}
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ~{preCommand}
${executable} \ ~{executable} \
${"--section " + section} \ ~{"--section " + section} \
${"--format " + format} \ ~{"--format " + format} \
${"--assembly-level " + assemblyLevel } \ ~{"--assembly-level " + assemblyLevel } \
${"--taxid " + taxId } \ ~{"--taxid " + taxId } \
${"--refseq-category " + refseqCategory} \ ~{"--refseq-category " + refseqCategory} \
${"--output-folder " + outputPath } \ ~{"--output-folder " + outputPath } \
${true="--human-readable" false="" humanReadable} \ ~{true="--human-readable" false="" humanReadable} \
${"--uri " + ncbiBaseUri } \ ~{"--uri " + ncbiBaseUri } \
${"--parallel " + parallel } \ ~{"--parallel " + parallel } \
${"--retries " + retries } \ ~{"--retries " + retries } \
${true="--verbose" false="" verbose } \ ~{true="--verbose" false="" verbose } \
${true="--debug" false ="" debug } \ ~{true="--debug" false ="" debug } \
${domain} ~{domain}
# Check md5sums for all downloaded files # Check md5sums for all downloaded files
for folder in $(realpath ${outputPath})/*/*/* for folder in $(realpath ~{outputPath})/*/*/*
do do
( (
md5sums="$( md5sums="$(
...@@ -69,20 +72,22 @@ task genomeDownload { ...@@ -69,20 +72,22 @@ task genomeDownload {
task downloadNtFasta{ task downloadNtFasta{
String libraryPath input {
String seqTaxMapPath String libraryPath
Boolean? unzip = true String seqTaxMapPath
String ntDir = libraryPath + "/nt" Boolean? unzip = true
String ntFilePath = ntDir + "/nt.fna" String ntDir = libraryPath + "/nt"
String ntFilePath = ntDir + "/nt.fna"
}
command { command {
set -e -o pipefail set -e -o pipefail
mkdir -p ${ntDir} mkdir -p ~{ntDir}
rsync -av --partial rsync://ftp.ncbi.nih.gov/blast/db/FASTA/nt.gz* ${ntDir} rsync -av --partial rsync://ftp.ncbi.nih.gov/blast/db/FASTA/nt.gz* ~{ntDir}
(cd ${ntDir} && md5sum -c nt.gz.md5) (cd ~{ntDir} && md5sum -c nt.gz.md5)
# Only unzip when necessary # Only unzip when necessary
if ${true='true' false='false' unzip} if ~{true='true' false='false' unzip}
then then
zcat ${ntDir}/nt.gz > ${ntFilePath} zcat ~{ntDir}/nt.gz > ~{ntFilePath}
fi fi
} }
output { output {
...@@ -96,16 +101,18 @@ task downloadNtFasta{ ...@@ -96,16 +101,18 @@ task downloadNtFasta{
} }
task downloadAccessionToTaxId { task downloadAccessionToTaxId {
String downloadDir input {
Boolean gzip = false String downloadDir
Boolean gzip = false
}
command { command {
set -e -o pipefail set -e -o pipefail
mkdir -p ${downloadDir} mkdir -p ~{downloadDir}
rsync -av --partial rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* ${downloadDir} rsync -av --partial rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* ~{downloadDir}
(cd ${downloadDir} && md5sum -c *.md5) (cd ~{downloadDir} && md5sum -c *.md5)
for file in ${downloadDir}/nucl_*.accession2taxid.gz for file in ~{downloadDir}/nucl_*.accession2taxid.gz
do do
zcat $file | tail -n +2 | cut -f 2,3 ${true="| gzip " false='' gzip}> $file.seqtaxmap${true='.gz' false='' gzip} zcat $file | tail -n +2 | cut -f 2,3 ~{true="| gzip " false='' gzip}> $file.seqtaxmap~{true='.gz' false='' gzip}
done done
} }
output { output {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment