Merge remote-tracking branch 'origin/develop' into biowdl-307

1724ee74 · Mei · 33f53d10 · d40e5a34 · 1724ee74 · 1724ee74
Commit 1724ee74 authored 5 years ago by Mei
--- a/.travis.yml
+++ b/.travis.yml
-language: java
+# We use conda to install cromwell.
+
+language: python
+
+python:
+  - 3.6
+
+before_install:
+  # Install conda
+  - export MINICONDA=${HOME}/miniconda
+  - export PATH=${MINICONDA}/bin:${PATH}
+  - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
+  - bash miniconda.sh -b -f -p ${MINICONDA}
+  - conda config --set always_yes yes
+  - conda config --add channels defaults
+  - conda config --add channels bioconda
+  - conda config --add channels conda-forge
+
+install:
+  - conda install cromwell
+
 script:
- set -e
- export CROMWELL_VERSION=35
- wget https://github.com/broadinstitute/cromwell/releases/download/$CROMWELL_VERSION/womtool-$CROMWELL_VERSION.jar
- for F in `find -name "*.wdl"`; do echo $F; java -jar womtool-*.jar validate $F; done
- 'if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then git submodule foreach --recursive git checkout $TRAVIS_BRANCH && git submodule foreach --recursive git pull; fi'
- "git diff --exit-code || (echo ERROR: Git changes detected. Please update submodules && exit 1)"
+  - set -e
+  - for FILE in $(find -name "*.wdl"); do echo $FILE; womtool validate $FILE; done
+  - 'if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then git submodule foreach git checkout develop && git submodule foreach git pull; fi'
+  - "git diff --exit-code || (echo ERROR: Git changes detected. Please update submodules && exit 1)"
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,24 @@ that users understand how the changes affect the new version.

 version 1.0.0-dev
 ---------------------------
+ Removed "pipefail" from command sections TALON and TranscriptClean
+ Add WDL task for Minimap2
+ Add WDL task for TALON
+ Add WDL task for TranscriptClean
+ Fastqsplitter: fix mkdir command to work with biocontainer's busybox mkdir
+ Cutadapt: simplify interface
+ Bigger memory multiplier in mutect to take in account bigger vmem usage
+ Cutadapt: Remove default adapter
+ Fastqsplitter: use version 1.1.
+ Picard: Use version 2.20.5 of the biocontainer as this includes the R dependency
+ Common: Update dockerTag to dockerImage.
+ GATK: Add CombineVariants task that allows, e.g., to merge VCFs from different callers.
+ Mutect2: Add GATK tasks related to variant filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, CalculateContamination and FilterMutectCalls).
+ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring an update to GATK 4.1.2.0. 
+ Mutect2: Add necessary missing index attribute for panel of normals.
+ MultiQC: Add memory variable to multiqc task.
+ GATK: SplitNCigarReads, BaseRecalibration and ApplyBQSR do no longer need regions files as required inputs.
+ VarDict: Add user definable flags (-M, -A, -Q, -d, -v, -f) to the paired VCF filtering script.
 + Cutadapt: If the output is a gzipped file, compress with level 1 (instead of default 6).
 + Cutadapt: Fix issues with read2output when using single-end reads.
 + Add feature type, idattr and additional attributes to htseq-count.

--- a/common.wdl
+++ b/common.wdl
@@ -77,7 +77,7 @@ task Copy {
        Boolean recursive = false

        # Version not that important as long as it is stable.
-        String dockerTag = "5.0.2"
+        String dockerImage = "bash:5.0.2"
    }

    command {
@@ -91,7 +91,7 @@ task Copy {
    }

    runtime {
-        docker: "bash:" + dockerTag
+        docker: dockerImage
    }
 }

@@ -155,7 +155,7 @@ task YamlToJson {
    input {
        File yaml
        String outputJson = basename(yaml, "\.ya?ml$") + ".json"
-        String dockerTag = "3.13-py37-slim"
+        String dockerImage = "biowdl/pyyaml:3.13-py37-slim"
    }
    command {
        set -e
@@ -174,7 +174,7 @@ task YamlToJson {
    }

    runtime {
-        docker: "biowdl/pyyaml:" + dockerTag
+        docker: dockerImage
    }
 }


--- a/cutadapt.wdl
+++ b/cutadapt.wdl
@@ -7,16 +7,12 @@ task Cutadapt {
        String read1output = "cut_r1.fq.gz"
        String? read2output
        String? format
-        Array[String]+? adapter
-        Array[String]+? front
-        Array[String]+? anywhere
-        Array[String]+? adapterRead2
-        Array[String]+? frontRead2
-        Array[String]+? anywhereRead2
-        # FIXME: default should be set at the subworkflow level, not here. Needs to wait for cromwell fix.
-        Array[String]+? adapterBoth = ["AGATCGGAAGAG"]
-        # contaminations = anywhereBoth
-        Array[String]+? contaminations
+        Array[String] adapter = []
+        Array[String] front = []
+        Array[String] anywhere = []
+        Array[String] adapterRead2 = []
+        Array[String] frontRead2 = []
+        Array[String] anywhereRead2 = []
        Boolean? interleaved
        String? pairFilter
        Float? errorRate
@@ -74,25 +70,7 @@ task Cutadapt {
        then "mkdir -p $(dirname " + realRead2output + ")"
        else ""

-    # FIXME: This crappy overengineering can be removed once cromwell can handle subworkflow inputs correctly.
-    # Some WDL magic here to set both adapters with one setting.
-    # If then else's are needed to keep the variable optional and undefined
-    Array[String]+? adapterForward = if (defined(adapter) || defined(adapterBoth))
-                                     then select_first([adapter, adapterBoth])
-                                     else adapter
-    # Check if read2 is defined before applying adapters.
-    Array[String]+? adapterReverse = if (defined(read2) && (defined(adapterRead2) || defined(adapterBoth)))
-                                     then select_first([adapterRead2, adapterBoth])
-                                     else adapterRead2
-
-    # Same for contaminations
-    Array[String]+? anywhereForward = if (defined(anywhere) || defined(contaminations))
-                                      then select_first([anywhere, contaminations])
-                                      else anywhere
-    Array[String]+? anywhereReverse = if (defined(read2) && (defined(anywhereRead2) || defined(contaminations)))
-                                      then select_first([anywhereRead2, contaminations])
-                                      else anywhereRead2
-
+    # FIXME: Use prefix() function for adapter, adapterRead2, etc.
    command {
        set -e
        ~{"mkdir -p $(dirname " + read1output + ")"}
@@ -100,12 +78,12 @@ task Cutadapt {
        cutadapt \
        ~{"--cores=" + cores} \
        ~{true="-Z" false="" Z} \
-        ~{true="-a" false="" defined(adapterForward)} ~{sep=" -a " adapterForward} \
-        ~{true="-A" false="" defined(adapterReverse)} ~{sep=" -A " adapterReverse} \
-        ~{true="-g" false="" defined(front)} ~{sep=" -g " front} \
-        ~{true="-G" false="" defined(frontRead2)} ~{sep=" -G " frontRead2} \
-        ~{true="-b" false="" defined(anywhereForward)} ~{sep=" -b " anywhereForward} \
-        ~{true="-B" false="" defined(anywhereReverse)} ~{sep=" -B " anywhereReverse} \
+        ~{true="-a" false="" length(adapter) > 0} ~{sep=" -a " adapter} \
+        ~{true="-A" false="" length(adapterRead2) > 0} ~{sep=" -A " adapterRead2} \
+        ~{true="-g" false="" length(front) > 0} ~{sep=" -g " front} \
+        ~{true="-G" false="" length(frontRead2) > 0} ~{sep=" -G " frontRead2} \
+        ~{true="-b" false="" length(anywhere) > 0} ~{sep=" -b " anywhere} \
+        ~{true="-B" false="" length(anywhereRead2) > 0} ~{sep=" -B " anywhereRead2} \
        --output ~{read1output} ~{if defined(read2) then "-p " + realRead2output else ""} \
        ~{"--to-short-output " + tooShortOutputPath} \
        ~{"--to-short-paired-output " + tooShortPairedOutputPath} \

--- a/fastqsplitter.wdl
+++ b/fastqsplitter.wdl
@@ -26,7 +26,7 @@ task Fastqsplitter {
    input {
        File inputFastq
        Array[String]+ outputPaths
-        String dockerImage = "quay.io/biocontainers/fastqsplitter:1.0.0--py_0"
+        String dockerImage = "quay.io/biocontainers/fastqsplitter:1.1.0--py37h516909a_1"
        Int? compressionLevel
        Int? threadsPerFile
         # fastqplitter utilizes one thread per input file and one or more threads per output file + one thread for the application.
@@ -34,15 +34,18 @@ task Fastqsplitter {
        Int cores = 1 + ceil(0.5 * length(outputPaths))
    }

-    command {
+    # Busybox mkdir does not accept multiple paths.
+    command <<<
        set -e
-        mkdir -p $(dirname ~{sep=' ' outputPaths})
+        for FILE in ~{sep=' ' outputPaths}
+            do mkdir -p $(dirname $FILE)
+        done
        fastqsplitter \
        ~{"-c " + compressionLevel} \
        ~{"-t " + threadsPerFile} \
        -i ~{inputFastq} \
        -o ~{sep=' -o ' outputPaths}
-    }
+    >>>

    output {
        Array[File] chunks = outputPaths

--- a/gatk.wdl
+++ b/gatk.wdl
@@ -7,7 +7,7 @@ task ApplyBQSR {
        File inputBamIndex
        String outputBamPath
        File recalibrationReport
-        Array[File]+ sequenceGroupInterval
+        Array[File] sequenceGroupInterval = []
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai
@@ -32,7 +32,7 @@ task ApplyBQSR {
        --static-quantized-quals 10 \
        --static-quantized-quals 20 \
        --static-quantized-quals 30 \
-        -L ~{sep=" -L " sequenceGroupInterval}
+        ~{true="-L" false="" length(sequenceGroupInterval) > 0} ~{sep=' -L ' sequenceGroupInterval}
    }

    output {
@@ -53,7 +53,7 @@ task BaseRecalibrator {
        File inputBam
        File inputBamIndex
        String recalibrationReportPath
-        Array[File]+ sequenceGroupInterval
+        Array[File] sequenceGroupInterval = []
        Array[File]? knownIndelsSitesVCFs
        Array[File]? knownIndelsSitesVCFIndexes
        File? dbsnpVCF
@@ -82,7 +82,7 @@ task BaseRecalibrator {
        --use-original-qualities \
        -O ~{recalibrationReportPath} \
        --known-sites ~{sep=" --known-sites " knownIndelsSitesVCFsArg} \
-        -L ~{sep=" -L " sequenceGroupInterval}
+        ~{true="-L" false="" length(sequenceGroupInterval) > 0} ~{sep=' -L ' sequenceGroupInterval}
    }

    output {
@@ -258,12 +258,17 @@ task MuTect2 {
        String outputVcf
        String tumorSample
        String? normalSample
+        File? germlineResource
+        File? germlineResourceIndex
        File? panelOfNormals
+        File? panelOfNormalsIndex
+        String f1r2TarGz = "f1r2.tar.gz"
        Array[File]+ intervals
+        String outputStats = outputVcf + ".stats"

        Int memory = 4
-        Float memoryMultiplier = 3
-        String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
+        Float memoryMultiplier = 4
+        String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
    }

    command {
@@ -275,7 +280,9 @@ task MuTect2 {
        -I ~{sep=" -I " inputBams} \
        -tumor ~{tumorSample} \
        ~{"-normal " + normalSample} \
+        ~{"--germline-resource " + germlineResource} \
        ~{"--panel-of-normals " + panelOfNormals} \
+        ~{"--f1r2-tar-gz " + f1r2TarGz} \
        -O ~{outputVcf} \
        -L ~{sep=" -L " intervals}
    }
@@ -283,6 +290,178 @@ task MuTect2 {
    output {
        File vcfFile = outputVcf
        File vcfFileIndex = outputVcf + ".tbi"
+        File f1r2File = f1r2TarGz
+        File stats = outputStats
+    }
+
+    runtime {
+        docker: dockerImage
+        memory: ceil(memory * memoryMultiplier)
+    }
+}
+
+task LearnReadOrientationModel {
+    input {
+        Array[File]+ f1r2TarGz
+
+        Int memory = 12
+        Float memoryMultiplier = 2
+        String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
+    }
+
+    command {
+        set -e
+        gatk --java-options -Xmx~{memory}G \
+        LearnReadOrientationModel \
+        -I ~{sep=" -I " f1r2TarGz} \
+        -O "artifact-priors.tar.gz"
+    }
+
+    output {
+        File artifactPriorsTable = "artifact-priors.tar.gz"
+    }
+
+    runtime {
+        docker: dockerImage
+        memory: ceil(memory * memoryMultiplier)
+    }
+}
+
+task MergeStats {
+    input {
+        Array[File]+ stats
+
+        Int memory = 14
+        Float memoryMultiplier = 2
+        String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
+    }
+
+    command {
+        set -e
+        gatk --java-options -Xmx~{memory}G \
+        MergeMutectStats \
+        -stats ~{sep=" -stats " stats} \
+        -O "merged.stats"
+    }
+
+    output {
+        File mergedStats = "merged.stats"
+    }
+
+    runtime {
+        docker: dockerImage
+        memory: ceil(memory * memoryMultiplier)
+    }
+}
+
+task GetPileupSummaries {
+    input {
+        File sampleBam
+        File sampleBamIndex
+        File variantsForContamination
+        File variantsForContaminationIndex
+        File sitesForContamination
+        File sitesForContaminationIndex
+        String outputPrefix
+
+        Int memory = 12
+        Float memoryMultiplier = 2
+        String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
+    }
+
+    command {
+        set -e
+        gatk --java-options -Xmx~{memory}G \
+        GetPileupSummaries \
+        -I ~{sampleBam} \
+        -V ~{variantsForContamination} \
+        -L ~{sitesForContamination} \
+        -O ~{outputPrefix + "-pileups.table"}
+    }
+
+    output {
+        File pileups = outputPrefix + "-pileups.table"
+    }
+
+    runtime {
+        docker: dockerImage
+        memory: ceil(memory * memoryMultiplier)
+    }
+}
+
+task CalculateContamination {
+    input {
+        File tumorPileups
+        File? normalPileups
+
+        Int memory = 12
+        Float memoryMultiplier = 2
+        String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
+    }
+
+    command {
+        set -e
+        gatk --java-options -Xmx~{memory}G \
+        CalculateContamination \
+        -I ~{tumorPileups} \
+        ~{"-matched " + normalPileups} \
+        -O "contamination.table" \
+        --tumor-segmentation "segments.table"
+    }
+
+    output {
+        File contaminationTable = "contamination.table"
+        File mafTumorSegments = "segments.table"
+    }
+
+    runtime {
+        docker: dockerImage
+        memory: ceil(memory * memoryMultiplier)
+    }
+}
+
+task FilterMutectCalls {
+    input {
+        File referenceFasta
+        File referenceFastaFai
+        File referenceFastaDict
+        File unfilteredVcf
+        File unfilteredVcfIndex
+        String outputVcf
+        File? contaminationTable
+        File? mafTumorSegments
+        File? artifactPriors
+        Int uniqueAltReadCount = 4
+        File mutect2Stats
+        String? extraArgs
+
+        Int memory = 12
+        Float memoryMultiplier = 2
+        String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputVcf})
+        gatk --java-options -Xmx~{memory}G \
+        FilterMutectCalls \
+        -R ~{referenceFasta} \
+        -V ~{unfilteredVcf} \
+        -O ~{outputVcf} \
+        ~{"--contamination-table " + contaminationTable} \
+        ~{"--tumor-segmentation " + mafTumorSegments} \
+        ~{"--ob-priors " + artifactPriors} \
+        ~{"--unique-alt-read-count " + uniqueAltReadCount} \
+        ~{"-stats " + mutect2Stats} \
+        --filtering-stats "filtering.stats" \
+        --showHidden \
+        ~{extraArgs}
+    }
+
+    output {
+        File filteredVcf = outputVcf
+        File filteredVcfIndex = outputVcf + ".tbi"
+        File filteringStats = "filtering.stats"
    }

    runtime {
@@ -299,7 +478,7 @@ task SplitNCigarReads {
        File referenceFastaDict
        File referenceFastaFai
        String outputBam
-        Array[File]+ intervals
+        Array[File] intervals = []

        Int memory = 4
        Float memoryMultiplier = 4
@@ -314,7 +493,7 @@ task SplitNCigarReads {
        -I ~{inputBam} \
        -R ~{referenceFasta} \
        -O ~{outputBam} \
-        -L ~{sep=' -L ' intervals}
+        ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals}
    }

    output {
@@ -327,3 +506,59 @@ task SplitNCigarReads {
        memory: ceil(memory * memoryMultiplier)
    }
 }
+
+task CombineVariants {
+    input {
+        String installDir = "/usr"  # .jar location in the docker image
+
+        File referenceFasta
+        File referenceFastaFai
+        File referenceFastaDict
+        String genotypeMergeOption = "UNIQUIFY"
+        String filteredRecordsMergeType = "KEEP_IF_ANY_UNFILTERED"
+        Array[String]+ identifiers
+        Array[File]+ variantVcfs # follow "identifiers" array order
+        Array[File]+ variantIndexes
+        String outputPath
+
+        Int memory = 12
+        Float memoryMultiplier = 2
+        String dockerImage = "broadinstitute/gatk3:3.8-1"
+    }
+
+    command <<<
+        set -e
+        mkdir -p $(dirname "~{outputPath}")
+
+        # build "-V:<ID> <file.vcf>" arguments according to IDs and VCFs to merge
+        # Make sure commands are run in bash
+        bash -c '#!/usr/bin/env bash
+        set -eux
+        ids=(~{sep=" " identifiers})
+        vars=(~{sep=" " variantVcfs})
+        V_args=$(
+            for (( i = 0; i < ${#ids[@]}; ++i ))
+              do
+                printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}"
+              done
+        )
+        java -Xmx~{memory}G -jar ~{installDir}/GenomeAnalysisTK.jar \
+        -T CombineVariants \
+        -R ~{referenceFasta} \
+        --genotypemergeoption ~{genotypeMergeOption} \
+        --filteredrecordsmergetype ~{filteredRecordsMergeType} \
+        --out ~{outputPath} \
+        $V_args
+        '
+    >>>
+
+    output {
+        File combinedVcf = outputPath
+        File combinedVcfIndex = outputPath + ".tbi"
+    }
+
+    runtime {
+        docker: dockerImage
+        memory: ceil(memory * memoryMultiplier)
+    }
+}
--- a/minimap2.wdl
+++ b/minimap2.wdl
+version 1.0
+
+# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task Indexing {
+    input {
+        File referenceFile
+        String outputPrefix
+        Boolean useHomopolymerCompressedKmer = false
+        Int kmerSize = 15
+        Int minimizerWindowSize = 10
+
+        Int? splitIndex
+
+        Int cores = 1
+        Int memory = 4
+        String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        minimap2 \
+        ~{true="-H" false="" useHomopolymerCompressedKmer} \
+        ~{"-k " + kmerSize} \
+        ~{"-w " + minimizerWindowSize} \
+        ~{"-I " + splitIndex} \
+        ~{"-d " + outputPrefix + ".mmi"} \
+        ~{"-t " + cores} \
+        ~{referenceFile}
+    }
+
+    output {
+        File outputIndexFile = outputPrefix + ".mmi"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        referenceFile: "Reference fasta file."
+        outputPrefix: "Output directory path + output file prefix."
+        useHomopolymerCompressedKmer: "Use homopolymer-compressed k-mer (preferrable for PacBio)."
+        kmerSize: "K-mer size (no larger than 28)."
+        minimizerWindowSize: "Minimizer window size."
+        splitIndex: "Split index for every ~NUM input bases."
+
+        outputIndexFile: "Indexed reference file."
+    }
+}
+
+task Mapping {
+    input {
+        File queryFile
+        File referenceFile
+        String outputPrefix
+        String presetOption
+        Boolean outputSAM = false
+
+        Int? maxFragmentLength
+        Int? maxIntronLength
+        Boolean? skipSelfAndDualMappings
+        Int? retainMaxSecondaryAlignments
+        Int? matchingScore
+        Int? mismatchPenalty
+        String? howToFindGTAG
+        Boolean? secondaryAlignment
+
+        Int cores = 4
+        Int memory = 7
+        String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        minimap2 \
+        ~{"-x " + presetOption} \
+        ~{true="-a" false="" outputSAM} \
+        ~{"-G " + maxIntronLength} \
+        ~{"-F " + maxFragmentLength} \
+        ~{true="-X" false="" skipSelfAndDualMappings} \
+        ~{"-N " + retainMaxSecondaryAlignments} \
+        ~{"-A " + matchingScore} \
+        ~{"-B " + mismatchPenalty} \
+        ~{"-u " + howToFindGTAG} \
+        --secondary=~{true="yes" false="no" secondaryAlignment} \
+        ~{"-o " + outputPrefix} \
+        ~{"-t " + cores} \
+        ~{referenceFile} \
+        ~{queryFile}
+    }
+
+    output {
+        File outputAlignmentFile = outputPrefix
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        queryFile: "Input fasta file."
+        referenceFile: "Reference fasta file."
+        outputPrefix: "Output directory path + output file prefix."
+        presetOption: "This option applies multiple options at the same time."
+        outputSAM: "Output in the SAM format."
+        maxFragmentLength: "Max fragment length (effective with -xsr or in the fragment mode)."
+        maxIntronLength: "Max intron length (effective with -xsplice; changing -r)."
+        skipSelfAndDualMappings: "Skip self and dual mappings (for the all-vs-all mode)."
+        retainMaxSecondaryAlignments: "Retain at most INT secondary alignments."
+        matchingScore: "Matching score."
+        mismatchPenalty: "Mismatch penalty."
+        howToFindGTAG: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG."
+        secondaryAlignment: "Whether to output secondary alignments."
+
+        outputAlignmentFile: "Mapping and alignment between collections of DNA sequences file."
+    }
+}
--- a/multiqc.wdl
+++ b/multiqc.wdl
@@ -38,6 +38,7 @@ task MultiQC {
        Boolean verbose  = false
        Boolean quiet = false
        Array[Boolean] finished = []  # An array of booleans that can be used to let multiqc wait on stuff.
+        Int memory = 4
    }

    command {
@@ -86,6 +87,7 @@ task MultiQC {
    }

    runtime {
+        memory: memory
        docker: dockerImage
    }
 }
--- a/picard.wdl
+++ b/picard.wdl
@@ -8,7 +8,7 @@ task BedToIntervalList {

        Int memory = 4
        Float memoryMultiplier = 3.0
-        String dockerImage = "quay.io/biocontainers/picard:2.18.26--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
    }

    command {
@@ -52,9 +52,7 @@ task CollectMultipleMetrics {

        Int memory = 8
        Float memoryMultiplier = 4
-        # https://raw.githubusercontent.com/BioContainers/multi-package-containers/80886dfea00f3cd9e7ae2edf4fc42816a10e5403/combinations/mulled-v2-23d9f7c700e78129a769e78521eb86d6b8341923%3A8dde04faba6c9ac93fae7e846af3bafd2c331b3b-0.tsv
-        # Contains r-base=3.4.1,picard=2.18.2
-        String dockerImage = "quay.io/biocontainers/mulled-v2-23d9f7c700e78129a769e78521eb86d6b8341923:8dde04faba6c9ac93fae7e846af3bafd2c331b3b-0"
+        String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
    }


@@ -137,9 +135,7 @@ task CollectRnaSeqMetrics {

        Int memory = 8
        Float memoryMultiplier = 4.0
-        # https://raw.githubusercontent.com/BioContainers/multi-package-containers/80886dfea00f3cd9e7ae2edf4fc42816a10e5403/combinations/mulled-v2-23d9f7c700e78129a769e78521eb86d6b8341923%3A8dde04faba6c9ac93fae7e846af3bafd2c331b3b-0.tsv
-        # Contains r-base=3.4.1,picard=2.18.2
-        String dockerImage = "quay.io/biocontainers/mulled-v2-23d9f7c700e78129a769e78521eb86d6b8341923:8dde04faba6c9ac93fae7e846af3bafd2c331b3b-0"
+        String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
    }

    command {
@@ -178,7 +174,7 @@ task CollectTargetedPcrMetrics {

        Int memory = 4
        Float memoryMultiplier = 3.0
-        String dockerImage = "quay.io/biocontainers/picard:2.18.26--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
    }

    command {
@@ -216,7 +212,7 @@ task GatherBamFiles {

        Int memory = 4
        Float memoryMultiplier = 3.0
-        String dockerImage = "quay.io/biocontainers/picard:2.18.26--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
    }

    command {
@@ -250,7 +246,7 @@ task GatherVcfs {

        Int memory = 4
        Float memoryMultiplier = 3.0
-        String dockerImage = "quay.io/biocontainers/picard:2.18.26--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
    }

    command {
@@ -282,7 +278,7 @@ task MarkDuplicates {

        Int memory = 8
        Float memoryMultiplier = 3.0
-        String dockerImage = "quay.io/biocontainers/picard:2.18.26--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"

        # The program default for READ_NAME_REGEX is appropriate in nearly every case.
        # Sometimes we wish to supply "null" in order to turn off optical duplicate detection
@@ -335,7 +331,7 @@ task MergeVCFs {

        Int memory = 8
        Float memoryMultiplier = 3.0
-        String dockerImage = "quay.io/biocontainers/picard:2.18.26--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
    }

    # Using MergeVcfs instead of GatherVcfs so we can create indices
@@ -369,7 +365,7 @@ task SamToFastq {

        Int memory = 16 # High memory default to avoid crashes.
        Float memoryMultiplier = 3.0
-        String dockerImage = "quay.io/biocontainers/picard:2.18.26--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
        File? NONE
    }

@@ -406,7 +402,7 @@ task ScatterIntervalList {

        Int memory = 4
        Float memoryMultiplier = 3.0
-        String dockerImage = "quay.io/biocontainers/picard:2.18.26--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
    }

    command {
@@ -441,7 +437,7 @@ task SortVcf {

        Int memory = 8
        Float memoryMultiplier = 3.0
-        String dockerImage = "quay.io/biocontainers/picard:2.18.26--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
        }



--- a/somaticseq.wdl
+++ b/somaticseq.wdl
@@ -267,3 +267,35 @@ task ParallelSingleTrain {
        docker: dockerImage
    }
 }
+
+task ModifyStrelka {
+    input {
+        String installDir = "/opt/somaticseq/vcfModifier" #the location in the docker image
+
+        File strelkaVCF
+        String? outputVCFName = basename(strelkaVCF, ".gz")
+
+        Int threads = 1
+        String dockerImage = "lethalfang/somaticseq:3.1.0"
+    }
+
+    command {
+        set -e
+
+        ~{installDir}/modify_Strelka.py \
+        -infile ~{strelkaVCF} \
+        -outfile "modified_strelka.vcf"
+
+        first_FORMAT_line_num=$(grep -n -m 1 '##FORMAT' "modified_strelka.vcf" | cut -d : -f 1)
+        sed "$first_FORMAT_line_num"'i##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">' "modified_strelka.vcf" > ~{outputVCFName}
+    }
+
+    output {
+        File outputVcf = outputVCFName
+    }
+
+    runtime {
+        cpu: threads
+        docker: dockerImage
+    }
+}
--- a/talon.wdl
+++ b/talon.wdl
+version 1.0
+
+# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task CreateAbundanceFileFromDatabase {
+    input {
+        File databaseFile
+        String outputPrefix
+        String genomeBuild
+        String annotationVersion
+        Boolean filterTranscripts = false
+
+        File? filterPairingsFile
+
+        Int cores = 1
+        Int memory = 4
+        String dockerImage = "biocontainers/talon:v4.2_cv2"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        create_abundance_file_from_database \
+        ~{"--db=" + databaseFile} \
+        ~{"--o=" + outputPrefix} \
+        ~{"-b " + genomeBuild} \
+        ~{"-a " + annotationVersion} \
+        ~{true="--filter" false="" filterTranscripts} \
+        ~{"-p " + filterPairingsFile}
+    }
+
+    output {
+        File outputAbundanceFile = outputPrefix + "_talon_abundance.tsv"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        databaseFile: "TALON database."
+        outputPrefix: "Output directory path + output file prefix."
+        genomeBuild: "Genome build to use."
+        annotationVersion: "Which annotation version to use."
+        filterTranscripts: "The transcripts in the database will be filtered prior to GTF creation."
+        filterPairingsFile: "A file indicating which datasets should be considered together."
+
+        outputAbundanceFile: "Abundance for each transcript in the TALON database across datasets."
+    }
+}
+
+task CreateGtfAbundanceFromDatabase {
+    input {
+        File databaseFile
+        String outputPrefix
+        String genomeBuild
+        String annotationVersion
+        Boolean filterTranscripts = false
+
+        File? filterPairingsFile
+
+        Int cores = 1
+        Int memory = 4
+        String dockerImage = "biocontainers/talon:v4.2_cv2"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        create_GTF_abundance_from_database \
+        ~{"--db=" + databaseFile} \
+        ~{"--o=" + outputPrefix} \
+        ~{"-b " + genomeBuild} \
+        ~{"-a " + annotationVersion} \
+        ~{true="--filter" false="" filterTranscripts} \
+        ~{"-p " + filterPairingsFile}
+    }
+
+    output {
+        File outputGTFfile = outputPrefix + "_talon_observedOnly.gtf"
+        File outputAbundanceFile = outputPrefix + "_talon_abundance.tsv"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        databaseFile: "TALON database."
+        outputPrefix: "Output directory path + output file prefix."
+        genomeBuild: "Genome build to use."
+        annotationVersion: "Which annotation version to use."
+        filterTranscripts: "The transcripts in the database will be filtered prior to GTF creation."
+        filterPairingsFile: "A file indicating which datasets should be considered together."
+
+        outputGTFfile: "The genes, transcripts, and exons stored a TALON database in GTF format."
+        outputAbundanceFile: "Abundance for each transcript in the TALON database across datasets."
+    }
+}
+
+task CreateGtfFromDatabase {
+    input {
+        File databaseFile
+        String outputPrefix
+        String genomeBuild
+        String annotationVersion
+        Boolean observedInDataset = false
+
+        File? whitelistFile
+        File? datasetFile
+
+        Int cores = 1
+        Int memory = 4
+        String dockerImage = "biocontainers/talon:v4.2_cv2"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        create_GTF_from_database \
+        ~{"--db=" + databaseFile} \
+        ~{"--o=" + outputPrefix} \
+        ~{"-b " + genomeBuild} \
+        ~{"-a " + annotationVersion} \
+        ~{"--whitelist=" + whitelistFile} \
+        ~{true="--observed" false="" observedInDataset} \
+        ~{"-d " + datasetFile}
+    }
+
+    output {
+        File outputGTFfile = outputPrefix + "_talon.gtf"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        databaseFile: "TALON database."
+        outputPrefix: "Output directory path + output file prefix."
+        genomeBuild: "Genome build to use."
+        annotationVersion: "Which annotation version to use."
+        observedInDataset: "Output only includes transcripts that were observed at least once."
+        whitelistFile: "Whitelist file of transcripts to include in the output."
+        datasetFile: "A file indicating which datasets should be included."
+
+        outputGTFfile: "The genes, transcripts, and exons stored a TALON database in GTF format."
+    }
+}
+
+task InitializeTalonDatabase {
+    input {
+        File GTFfile
+        String outputPrefix
+        String genomeBuild
+        String annotationVersion
+        Int minimumLength = 300
+        String novelIDprefix = "TALON"
+        Int cutoff5p = 500
+        Int cutoff3p = 300
+
+        Int cores = 1
+        Int memory = 10
+        String dockerImage = "biocontainers/talon:v4.2_cv2"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        initialize_talon_database \
+        ~{"--f=" + GTFfile} \
+        ~{"--o=" + outputPrefix} \
+        ~{"--g=" + genomeBuild} \
+        ~{"--a=" + annotationVersion} \
+        ~{"--l=" +  minimumLength} \
+        ~{"--idprefix=" + novelIDprefix} \
+        ~{"--5p=" + cutoff5p} \
+        ~{"--3p=" + cutoff3p}
+    }
+
+    output {
+        File outputDatabase = outputPrefix + ".db"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        GTFfile: "GTF annotation containing genes, transcripts, and edges."
+        outputPrefix: "Output directory path + output file prefix."
+        genomeBuild: "Name of genome build that the GTF file is based on (ie hg38)."
+        annotationVersion: "Name of supplied annotation (will be used to label data)."
+        minimumLength: "Minimum required transcript length."
+        novelIDprefix: "Prefix for naming novel discoveries in eventual TALON runs."
+        cutoff5p: "Maximum allowable distance (bp) at the 5' end during annotation."
+        cutoff3p: "Maximum allowable distance (bp) at the 3' end during annotation."
+
+        outputDatabase: "TALON database."
+    }
+}
+
+task MapAntisenseGenesToSense {
+    input {
+        File databaseFile
+        String outputPrefix
+        String annotationVersion
+
+        Int cores = 1
+        Int memory = 4
+        String dockerImage = "biocontainers/talon:v4.2_cv2"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        map_antisense_genes_to_sense \
+        ~{"--db=" + databaseFile} \
+        ~{"--o=" + outputPrefix} \
+        ~{"-a " + annotationVersion}
+    }
+
+    output {
+        File outputAntisenseMapFile = outputPrefix + "_antisense_mapping.gtf"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        databaseFile: "TALON database."
+        outputPrefix: "Output directory path + output file prefix."
+        annotationVersion: "Which annotation version to use."
+
+        outputAntisenseMapFile: "IDs of the sense gene for every antisense gene in the database."
+    }
+}
+
+task SummarizeDatasets {
+    input {
+        File databaseFile
+        String outputPrefix
+
+        File? datasetGroupsCSV
+
+        Int cores = 1
+        Int memory = 4
+        String dockerImage = "biocontainers/talon:v4.2_cv2"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        summarize_datasets \
+        ~{"--db " + databaseFile} \
+        ~{"--o " + outputPrefix} \
+        ~{"--groups " + datasetGroupsCSV}
+    }
+
+    output {
+        File outputSummaryFile = outputPrefix + "_talon_summary.tsv"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        databaseFile: "TALON database."
+        outputPrefix: "Output directory path + output file prefix."
+        datasetGroupsCSV: "File of comma-delimited dataset groups to process together."
+
+        outputSummaryFile: "Tab-delimited file of gene and transcript counts for each dataset."
+    }
+}
+
+task Talon {
+    input {
+        File SAMfile
+        File configFile
+        File databaseFile
+        String outputPrefix
+        String genomeBuild
+        String configFileName = basename(configFile)
+        String SAMfileName = basename(SAMfile)
+        Float minimumCoverage = 0.9
+        Int minimumIdentity = 0
+
+        Int cores = 1
+        Int memory = 4
+        String dockerImage = "biocontainers/talon:v4.2_cv2"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        mv ${configFile} ./${configFileName}
+        mv ${SAMfile} ./${SAMfileName}
+        talon \
+        ~{"--f " + configFileName} \
+        ~{"--db " + databaseFile} \
+        ~{"--o " + outputPrefix} \
+        ~{"--build " + genomeBuild} \
+        ~{"--cov " + minimumCoverage} \
+        ~{"--identity " + minimumIdentity}
+    }
+
+    output {
+        File outputUpdatedDatabase = databaseFile
+        File outputLog = outputPrefix + "_talon_QC.log"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        SAMfile: "Input SAM file, same one as described in configFile."
+        configFile: "Dataset config file."
+        databaseFile: "TALON database. Created using initialize_talon_database.py."
+        outputPrefix: "Output directory path + output file prefix."
+        genomeBuild: "Genome build (i.e. hg38) to use."
+        minimumCoverage: "Minimum alignment coverage in order to use a SAM entry."
+        minimumIdentity: "Minimum alignment identity in order to use a SAM entry."
+
+        outputUpdatedDatabase: "Updated TALON database."
+        outputLog: "Log file from TALON run."
+    }
+}
--- a/transcriptclean.wdl
+++ b/transcriptclean.wdl
+version 1.0
+
+# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task CleanSpliceJunctions {
+    input {
+        File SAMfile
+        File referenceGenome
+        String outputPrefix
+        File spliceJunctionAnnotation
+
+        File? variantFile
+
+        Int cores = 1
+        Int memory = 4
+        String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        clean_splice_jns \
+        ~{"--f=" + SAMfile} \
+        ~{"--g=" + referenceGenome} \
+        ~{"--o=" + outputPrefix} \
+        ~{"--s=" + spliceJunctionAnnotation} \
+        ~{"--v=" + variantFile}
+    }
+
+    output {
+        File outputCleanedSAM = outputPrefix + "_clean.sam"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        SAMfile: "Input SAM file"
+        referenceGenome: "Reference genome fasta file."
+        outputPrefix: "Output directory path + output file prefix."
+        spliceJunctionAnnotation: "Splice junction file"
+        variantFile: "VCF formatted file of variants"
+
+        outputCleanedSAM: "Cleaned sam output file."
+    }
+}
+
+task GetCorrectedSJsFromLog {
+    input {
+        File TElogFile
+        String outputPrefix
+
+        Int cores = 1
+        Int memory = 5
+        String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1"
+    }
+
+    command {
+        set -e 
+        mkdir -p $(dirname ~{outputPrefix})
+        get_corrected_SJs_from_log \
+        ~{TElogFile} \
+        ~{outputPrefix + ".tsv"}
+    }
+
+    output {
+        File outputCorrectedSJs = outputPrefix + ".tsv"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        TElogFile: "TE log from TranscriptClean."
+        outputPrefix: "Output directory path + output file prefix."
+
+        outputCorrectedSJs: "Formely noncanonical splice junctions in BED format."
+    }
+}
+
+task GetSJsFromGtf {
+    input {
+        File GTFfile
+        File genomeFile
+        String outputPrefix
+        Int minIntronSize = 21
+
+        Int cores = 1
+        Int memory = 8
+        String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        get_SJs_from_gtf \
+        ~{"--f=" + GTFfile} \
+        ~{"--g=" + genomeFile} \
+        ~{"--o=" + outputPrefix + ".tsv"} \
+        ~{"--minIntronSize=" + minIntronSize}
+    }
+
+    output {
+        File outputSJsFile = outputPrefix + ".tsv"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        GTFfile: "Input GTF file"
+        genomeFile: "Reference genome"
+        outputPrefix: "Output directory path + output file prefix."
+        minIntronSize: "Minimum size of intron to consider a junction."
+
+        outputSJsFile: "Extracted splice junctions."
+    }
+}
+
+task GetTranscriptCleanStats {
+    input {
+        File transcriptCleanSAMfile
+        String outputPrefix
+
+        Int cores = 1
+        Int memory = 4
+        String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        get_TranscriptClean_stats \
+        ~{transcriptCleanSAMfile} \
+        ~{outputPrefix}
+    }
+
+    output {
+        File outputStatsFile = stdout()
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        transcriptCleanSAMfile: "Output SAM file from TranscriptClean"
+        outputPrefix: "Output directory path + output file prefix."
+
+        outputStatsFile: "Summary stats from TranscriptClean run."
+    }
+}
+
+task TranscriptClean {
+    input {
+        File SAMfile
+        File referenceGenome
+        String outputPrefix
+        Int maxLenIndel = 5
+        Int maxSJoffset = 5
+        Boolean correctMismatches = true
+        Boolean correctIndels = true
+        Boolean dryRun = false
+        Boolean primaryOnly = false
+
+        File? spliceJunctionAnnotation
+        File? variantFile
+        Boolean? correctSJs
+
+        Int cores = 1
+        Int memory = 25
+        String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1"
+    }
+
+    command {
+        set -e
+        mkdir -p $(dirname ~{outputPrefix})
+        TranscriptClean \
+        ~{"-s " + SAMfile} \
+        ~{"-g " + referenceGenome} \
+        ~{"-o " + outputPrefix} \
+        ~{"-j " + spliceJunctionAnnotation} \
+        ~{"-v " + variantFile} \
+        ~{"--maxLenIndel=" + maxLenIndel} \
+        ~{"--maxSJOffset=" + maxSJoffset} \
+        ~{true="-m CORRECTMISMATCHES" false="-m false" correctMismatches} \
+        ~{true="-i CORRECTINDELS" false="-i false" correctIndels} \
+        ~{true="--correctSJs=CORRECTSJS" false="--correctSJs=false" correctSJs} \
+        ~{true="--dryRun" false="" dryRun} \
+        ~{true="--primaryOnly" false="" primaryOnly}
+    }
+
+    output {
+        File outputTranscriptCleanFasta = outputPrefix + "_clean.fa"
+        File outputTranscriptCleanLog = outputPrefix + "_clean.log"
+        File outputTranscriptCleanSAM = outputPrefix + "_clean.sam"
+        File outputTranscriptCleanTElog = outputPrefix + "_clean.TE.log"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        SAMfile: "Input SAM file containing transcripts to correct."
+        referenceGenome: "Reference genome fasta file."
+        outputPrefix: "Output directory path + output file prefix."
+        spliceJunctionAnnotation: "Splice junction file"
+        maxLenIndel: "Maximum size indel to correct."
+        maxSJoffset: "Maximum distance from annotated splice junction to correct."
+        correctMismatches: "Set this to make TranscriptClean correct mismatches."
+        correctIndels: "Set this to make TranscriptClean correct indels."
+        correctSJs: "Set this to make TranscriptClean correct splice junctions."
+        dryRun: "TranscriptClean will read in the data but don't do any correction."
+        primaryOnly: "TranscriptClean will only output primary mappings of transcripts."
+
+        outputTranscriptCleanFasta: "Fasta file containing corrected reads."
+        outputTranscriptCleanLog: "Log file of TranscriptClean run."
+        outputTranscriptCleanSAM: "SAM file containing corrected aligned reads."
+        outputTranscriptCleanTElog: "TE log file of TranscriptClean run."
+   }
+}
--- a/vardict.wdl
+++ b/vardict.wdl
@@ -20,6 +20,13 @@ task VarDict {
        Int endColumn = 3
        Int geneColumn = 4

+        Boolean outputCandidateSomaticOnly = true
+        Boolean outputAllVariantsAtSamePosition = true
+        Float mappingQuality = 20
+        Int minimumTotalDepth = 8
+        Int minimumVariantDepth = 4
+        Float minimumAlleleFrequency = 0.02
+
        Int threads = 1
        Int memory = 16
        Float memoryMultiplier = 2.5
@@ -45,6 +52,12 @@ task VarDict {
        ~{true="var2vcf_paired.pl" false="var2vcf_valid.pl" defined(normalBam)} \
        -N "~{tumorSampleName}~{"|" + normalSampleName}" \
        ~{true="" false="-E" defined(normalBam)} \
+        ~{true="-M" false="" outputCandidateSomaticOnly} \
+        ~{true="-A" false="" outputAllVariantsAtSamePosition} \
+        -Q ~{mappingQuality} \
+        -d ~{minimumTotalDepth} \
+        -v ~{minimumVariantDepth} \
+        -f ~{minimumAlleleFrequency} \
        > ~{outputVcf}
    }