Skip to content
Snippets Groups Projects
Unverified Commit ca35e74a authored by Cats's avatar Cats Committed by GitHub
Browse files

Merge pull request #92 from biowdl/BIOWDL-199

Dockerize GATK. Scattering now works with containers. Tabix another docker image.
parents e55fc372 68ff64af
No related branches found
No related tags found
No related merge requests found
...@@ -205,54 +205,60 @@ task FastqSync { ...@@ -205,54 +205,60 @@ task FastqSync {
task ReorderGlobbedScatters { task ReorderGlobbedScatters {
input { input {
Array[File]+ scatters Array[File]+ scatters
String scatterDir # Should not be changed from the main pipeline. As it should not influence results.
String dockerTag = "3.6"
} }
command <<< command <<<
set -e
# Copy all the scatter files to the CWD so the output matches paths in
# the cwd.
for file in ~{sep=" " scatters}
do cp $file .
done
python << CODE python << CODE
from os.path import basename from os.path import basename
scatters = ['~{sep="','" scatters}'] scatters = ['~{sep="','" scatters}']
splitext = [basename(x).split(".") for x in scatters] splitext = [basename(x).split(".") for x in scatters]
splitnum = [x.split("-") + [y] for x,y in splitext] splitnum = [x.split("-") + [y] for x,y in splitext]
ordered = sorted(splitnum, key=lambda x: int(x[1])) ordered = sorted(splitnum, key=lambda x: int(x[1]))
merged = ["~{scatterDir}/{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered] merged = ["{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered]
for x in merged: for x in merged:
print(x) print(x)
CODE CODE
>>> >>>
output { output {
Array[String] reorderedScatters = read_lines(stdout()) Array[File] reorderedScatters = read_lines(stdout())
} }
runtime { runtime {
memory: 1 docker: "python:" + dockerTag
# 4 gigs of memory to be able to build the docker image in singularity
memory: 4
} }
} }
task ScatterRegions { task ScatterRegions {
input { input {
String? preCommand
Reference reference Reference reference
String outputDirPath
File? toolJar
Int? scatterSize Int? scatterSize
File? regions File? regions
Boolean notSplitContigs = false Boolean notSplitContigs = false
Int memory = 4 Int memory = 4
Float memoryMultiplier = 3.0 Float memoryMultiplier = 3.0
String dockerTag = "0.2--0"
} }
String toolCommand = if defined(toolJar) # OutDirPath must be defined here because the glob process relies on
then "java -Xmx" + memory + "G -jar " +toolJar # linking. This path must be in the containers filesystem, otherwise the
else "biopet-scatterregions -Xmx" + memory + "G" # linking does not work.
String outputDirPath = "scatters"
command { command {
set -e -o pipefail set -e -o pipefail
~{preCommand}
mkdir -p ~{outputDirPath} mkdir -p ~{outputDirPath}
~{toolCommand} \ biopet-scatterregions -Xmx~{memory}G \
-R ~{reference.fasta} \ -R ~{reference.fasta} \
-o ~{outputDirPath} \ -o ~{outputDirPath} \
~{"-s " + scatterSize} \ ~{"-s " + scatterSize} \
...@@ -265,6 +271,7 @@ task ScatterRegions { ...@@ -265,6 +271,7 @@ task ScatterRegions {
} }
runtime { runtime {
docker: "quay.io/biocontainers/biopet-scatterregions:" + dockerTag
memory: ceil(memory * memoryMultiplier) memory: ceil(memory * memoryMultiplier)
} }
} }
......
...@@ -5,8 +5,6 @@ import "common.wdl" ...@@ -5,8 +5,6 @@ import "common.wdl"
# Apply Base Quality Score Recalibration (BQSR) model # Apply Base Quality Score Recalibration (BQSR) model
task ApplyBQSR { task ApplyBQSR {
input { input {
String? preCommand
File? gatkJar
IndexedBamFile inputBam IndexedBamFile inputBam
String outputBamPath String outputBamPath
File recalibrationReport File recalibrationReport
...@@ -15,28 +13,25 @@ task ApplyBQSR { ...@@ -15,28 +13,25 @@ task ApplyBQSR {
Int memory = 4 Int memory = 4
Float memoryMultiplier = 3.0 Float memoryMultiplier = 3.0
String dockerTag = "4.1.0.0--0"
} }
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command { command {
set -e -o pipefail set -e -o pipefail
~{preCommand} mkdir -p $(dirname ~{outputBamPath})
~{toolCommand} \ gatk --java-options -Xmx~{memory}G \
ApplyBQSR \ ApplyBQSR \
--create-output-bam-md5 \ --create-output-bam-md5 \
--add-output-sam-program-record \ --add-output-sam-program-record \
-R ~{reference.fasta} \ -R ~{reference.fasta} \
-I ~{inputBam.file} \ -I ~{inputBam.file} \
--use-original-qualities \ --use-original-qualities \
-O ~{outputBamPath} \ -O ~{outputBamPath} \
-bqsr ~{recalibrationReport} \ -bqsr ~{recalibrationReport} \
--static-quantized-quals 10 \ --static-quantized-quals 10 \
--static-quantized-quals 20 \ --static-quantized-quals 20 \
--static-quantized-quals 30 \ --static-quantized-quals 30 \
-L ~{sep=" -L " sequenceGroupInterval} -L ~{sep=" -L " sequenceGroupInterval}
} }
output { output {
...@@ -48,6 +43,7 @@ task ApplyBQSR { ...@@ -48,6 +43,7 @@ task ApplyBQSR {
} }
runtime { runtime {
docker: "quay.io/biocontainers/gatk4:" + dockerTag
memory: ceil(memory * memoryMultiplier) memory: ceil(memory * memoryMultiplier)
} }
} }
...@@ -55,8 +51,6 @@ task ApplyBQSR { ...@@ -55,8 +51,6 @@ task ApplyBQSR {
# Generate Base Quality Score Recalibration (BQSR) model # Generate Base Quality Score Recalibration (BQSR) model
task BaseRecalibrator { task BaseRecalibrator {
input { input {
String? preCommand
File? gatkJar
IndexedBamFile inputBam IndexedBamFile inputBam
String recalibrationReportPath String recalibrationReportPath
Array[File]+ sequenceGroupInterval Array[File]+ sequenceGroupInterval
...@@ -66,6 +60,7 @@ task BaseRecalibrator { ...@@ -66,6 +60,7 @@ task BaseRecalibrator {
Reference reference Reference reference
Int memory = 4 Int memory = 4
Float memoryMultiplier = 3.0 Float memoryMultiplier = 3.0
String dockerTag = "4.1.0.0--0"
} }
Array[File]+ knownIndelsSitesVCFsArg = flatten([ Array[File]+ knownIndelsSitesVCFsArg = flatten([
...@@ -73,14 +68,10 @@ task BaseRecalibrator { ...@@ -73,14 +68,10 @@ task BaseRecalibrator {
[select_first([dbsnpVCF]).file] [select_first([dbsnpVCF]).file]
]) ])
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command { command {
set -e -o pipefail set -e -o pipefail
~{preCommand} mkdir -p $(dirname ~{recalibrationReportPath})
~{toolCommand} \ gatk --java-options -Xmx~{memory}G \
BaseRecalibrator \ BaseRecalibrator \
-R ~{reference.fasta} \ -R ~{reference.fasta} \
-I ~{inputBam.file} \ -I ~{inputBam.file} \
...@@ -95,35 +86,28 @@ task BaseRecalibrator { ...@@ -95,35 +86,28 @@ task BaseRecalibrator {
} }
runtime { runtime {
docker: "quay.io/biocontainers/gatk4:" + dockerTag
memory: ceil(memory * memoryMultiplier) memory: ceil(memory * memoryMultiplier)
} }
} }
task CombineGVCFs { task CombineGVCFs {
input { input {
String? preCommand
Array[File]+ gvcfFiles Array[File]+ gvcfFiles
Array[File]+ gvcfFilesIndex Array[File]+ gvcfFilesIndex
Array[File]+ intervals Array[File]+ intervals
String outputPath String outputPath
String? gatkJar
Reference reference Reference reference
Int memory = 4 Int memory = 4
Float memoryMultiplier = 3.0 Float memoryMultiplier = 3.0
String dockerTag = "4.1.0.0--0"
} }
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command { command {
set -e -o pipefail set -e -o pipefail
~{preCommand} mkdir -p $(dirname ~{outputPath})
~{toolCommand} \ gatk --java-options -Xmx~{memory}G \
CombineGVCFs \ CombineGVCFs \
-R ~{reference.fasta} \ -R ~{reference.fasta} \
-O ~{outputPath} \ -O ~{outputPath} \
...@@ -139,6 +123,7 @@ task CombineGVCFs { ...@@ -139,6 +123,7 @@ task CombineGVCFs {
} }
runtime { runtime {
docker: "quay.io/biocontainers/gatk4:" + dockerTag
memory: ceil(memory * memoryMultiplier) memory: ceil(memory * memoryMultiplier)
} }
} }
...@@ -146,23 +131,18 @@ task CombineGVCFs { ...@@ -146,23 +131,18 @@ task CombineGVCFs {
# Combine multiple recalibration tables from scattered BaseRecalibrator runs # Combine multiple recalibration tables from scattered BaseRecalibrator runs
task GatherBqsrReports { task GatherBqsrReports {
input { input {
String? preCommand
String? gatkJar
Array[File] inputBQSRreports Array[File] inputBQSRreports
String outputReportPath String outputReportPath
Int memory = 4 Int memory = 4
Float memoryMultiplier = 3.0 Float memoryMultiplier = 3.0
String dockerTag = "4.1.0.0--0"
} }
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command { command {
set -e -o pipefail set -e -o pipefail
~{preCommand} mkdir -p $(dirname ~{outputReportPath})
~{toolCommand} \ gatk --java-options -Xmx~{memory}G \
GatherBQSRReports \ GatherBQSRReports \
-I ~{sep=' -I ' inputBQSRreports} \ -I ~{sep=' -I ' inputBQSRreports} \
-O ~{outputReportPath} -O ~{outputReportPath}
...@@ -173,39 +153,31 @@ task GatherBqsrReports { ...@@ -173,39 +153,31 @@ task GatherBqsrReports {
} }
runtime { runtime {
docker: "quay.io/biocontainers/gatk4:" + dockerTag
memory: ceil(memory * memoryMultiplier) memory: ceil(memory * memoryMultiplier)
} }
} }
task GenotypeGVCFs { task GenotypeGVCFs {
input { input {
String? preCommand
Array[File]+ gvcfFiles Array[File]+ gvcfFiles
Array[File]+ gvcfFilesIndex Array[File]+ gvcfFilesIndex
Array[File]+ intervals Array[File]+ intervals
String outputPath String outputPath
String? gatkJar
Reference reference Reference reference
IndexedVcfFile? dbsnpVCF IndexedVcfFile? dbsnpVCF
Int memory = 6 Int memory = 6
Float memoryMultiplier = 2.0 Float memoryMultiplier = 2.0
String dockerTag = "4.1.0.0--0"
} }
File dbsnpFile = if (defined(dbsnpVCF)) then select_first([dbsnpVCF]).file else "" File dbsnpFile = if (defined(dbsnpVCF)) then select_first([dbsnpVCF]).file else ""
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command { command {
set -e -o pipefail set -e -o pipefail
~{preCommand} mkdir -p $(dirname ~{outputPath})
~{toolCommand} \ gatk --java-options -Xmx~{memory}G \
GenotypeGVCFs \ GenotypeGVCFs \
-R ~{reference.fasta} \ -R ~{reference.fasta} \
-O ~{outputPath} \ -O ~{outputPath} \
...@@ -224,7 +196,8 @@ task GenotypeGVCFs { ...@@ -224,7 +196,8 @@ task GenotypeGVCFs {
} }
} }
runtime{ runtime {
docker: "quay.io/biocontainers/gatk4:" + dockerTag
memory: ceil(memory * memoryMultiplier) memory: ceil(memory * memoryMultiplier)
} }
} }
...@@ -232,31 +205,25 @@ task GenotypeGVCFs { ...@@ -232,31 +205,25 @@ task GenotypeGVCFs {
# Call variants on a single sample with HaplotypeCaller to produce a GVCF # Call variants on a single sample with HaplotypeCaller to produce a GVCF
task HaplotypeCallerGvcf { task HaplotypeCallerGvcf {
input { input {
String? preCommand
Array[File]+ inputBams Array[File]+ inputBams
Array[File]+ inputBamsIndex Array[File]+ inputBamsIndex
Array[File]+ intervalList Array[File]+ intervalList
String gvcfPath String gvcfPath
Reference reference Reference reference
Float contamination = 0.0 Float contamination = 0.0
String? gatkJar
IndexedVcfFile? dbsnpVCF IndexedVcfFile? dbsnpVCF
Int memory = 4 Int memory = 4
Float memoryMultiplier = 3 Float memoryMultiplier = 3
String dockerTag = "4.1.0.0--0"
} }
File dbsnpFile = if (defined(dbsnpVCF)) then select_first([dbsnpVCF]).file else "" File dbsnpFile = if (defined(dbsnpVCF)) then select_first([dbsnpVCF]).file else ""
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command { command {
set -e -o pipefail set -e -o pipefail
~{preCommand} mkdir -p $(dirname ~{gvcfPath})
~{toolCommand} \ gatk --java-options -Xmx~{memory}G \
HaplotypeCaller \ HaplotypeCaller \
-R ~{reference.fasta} \ -R ~{reference.fasta} \
-O ~{gvcfPath} \ -O ~{gvcfPath} \
...@@ -275,14 +242,13 @@ task HaplotypeCallerGvcf { ...@@ -275,14 +242,13 @@ task HaplotypeCallerGvcf {
} }
runtime { runtime {
docker: "quay.io/biocontainers/gatk4:" + dockerTag
memory: ceil(memory * memoryMultiplier) memory: ceil(memory * memoryMultiplier)
} }
} }
task MuTect2 { task MuTect2 {
input { input {
String? preCommand
Array[File]+ inputBams Array[File]+ inputBams
Array[File]+ inputBamsIndex Array[File]+ inputBamsIndex
Reference reference Reference reference
...@@ -291,19 +257,15 @@ task MuTect2 { ...@@ -291,19 +257,15 @@ task MuTect2 {
String? normalSample String? normalSample
Array[File]+ intervals Array[File]+ intervals
String? gatkJar
Int memory = 4 Int memory = 4
Float memoryMultiplier = 3 Float memoryMultiplier = 3
String dockerTag = "4.1.0.0--0"
} }
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command { command {
set -e -o pipefail set -e -o pipefail
~{preCommand} mkdir -p $(dirname ~{outputVcf})
~{toolCommand} \ gatk --java-options -Xmx~{memory}G \
Mutect2 \ Mutect2 \
-R ~{reference.fasta} \ -R ~{reference.fasta} \
-I ~{sep=" -I " inputBams} \ -I ~{sep=" -I " inputBams} \
...@@ -321,32 +283,27 @@ task MuTect2 { ...@@ -321,32 +283,27 @@ task MuTect2 {
} }
runtime { runtime {
docker: "quay.io/biocontainers/gatk4:" + dockerTag
memory: ceil(memory * memoryMultiplier) memory: ceil(memory * memoryMultiplier)
} }
} }
task SplitNCigarReads { task SplitNCigarReads {
input { input {
String? preCommand
IndexedBamFile inputBam IndexedBamFile inputBam
Reference reference Reference reference
String outputBam String outputBam
String? gatkJar
Array[File]+ intervals Array[File]+ intervals
Int memory = 4 Int memory = 4
Float memoryMultiplier = 4 Float memoryMultiplier = 4
String dockerTag = "4.1.0.0--0"
} }
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command { command {
set -e -o pipefail set -e -o pipefail
~{preCommand} mkdir -p $(dirname ~{outputBam})
~{toolCommand} \ gatk --java-options -Xmx~{memory}G \
SplitNCigarReads \ SplitNCigarReads \
-I ~{inputBam.file} \ -I ~{inputBam.file} \
-R ~{reference.fasta} \ -R ~{reference.fasta} \
...@@ -362,6 +319,7 @@ task SplitNCigarReads { ...@@ -362,6 +319,7 @@ task SplitNCigarReads {
} }
runtime { runtime {
docker: "quay.io/biocontainers/gatk4:" + dockerTag
memory: ceil(memory * memoryMultiplier) memory: ceil(memory * memoryMultiplier)
} }
} }
...@@ -7,6 +7,7 @@ task BgzipAndIndex { ...@@ -7,6 +7,7 @@ task BgzipAndIndex {
File inputFile File inputFile
String outputDir String outputDir
String type = "vcf" String type = "vcf"
String dockerTag = "0.2.6--ha92aebf_0"
} }
String outputGz = outputDir + "/" + basename(inputFile) + ".gz" String outputGz = outputDir + "/" + basename(inputFile) + ".gz"
...@@ -20,6 +21,10 @@ task BgzipAndIndex { ...@@ -20,6 +21,10 @@ task BgzipAndIndex {
File compressed = outputGz File compressed = outputGz
File index = outputGz + ".tbi" File index = outputGz + ".tbi"
} }
runtime {
docker: "quay.io/biocontainers/tabix:" + dockerTag
}
} }
task Index { task Index {
...@@ -185,6 +190,7 @@ task Tabix { ...@@ -185,6 +190,7 @@ task Tabix {
input { input {
String inputFile String inputFile
String type = "vcf" String type = "vcf"
String dockerTag = "0.2.6--ha92aebf_0"
} }
command { command {
...@@ -194,6 +200,10 @@ task Tabix { ...@@ -194,6 +200,10 @@ task Tabix {
output { output {
File index = inputFile + ".tbi" File index = inputFile + ".tbi"
} }
runtime {
docker: "quay.io/biocontainers/tabix:" + dockerTag
}
} }
task View { task View {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment