diff --git a/.travis.yml b/.travis.yml index 7019e07a4fde725b05e6e8d355f3f9574e81d428..5109f9559d081a6b80af67c9993aff943af688ac 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,26 @@ -language: java +# We use conda to install cromwell. + +language: python + +python: + - 3.6 + +before_install: + # Install conda + - export MINICONDA=${HOME}/miniconda + - export PATH=${MINICONDA}/bin:${PATH} + - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh + - bash miniconda.sh -b -f -p ${MINICONDA} + - conda config --set always_yes yes + - conda config --add channels defaults + - conda config --add channels bioconda + - conda config --add channels conda-forge + +install: + - conda install cromwell + script: -- set -e -- export CROMWELL_VERSION=35 -- wget https://github.com/broadinstitute/cromwell/releases/download/$CROMWELL_VERSION/womtool-$CROMWELL_VERSION.jar -- for F in `find -name "*.wdl"`; do echo $F; java -jar womtool-*.jar validate $F; done -- 'if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then git submodule foreach --recursive git checkout $TRAVIS_BRANCH && git submodule foreach --recursive git pull; fi' -- "git diff --exit-code || (echo ERROR: Git changes detected. Please update submodules && exit 1)" + - set -e + - for FILE in $(find -name "*.wdl"); do echo $FILE; womtool validate $FILE; done + - 'if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then git submodule foreach git checkout develop && git submodule foreach git pull; fi' + - "git diff --exit-code || (echo ERROR: Git changes detected. Please update submodules && exit 1)" diff --git a/CHANGELOG.md b/CHANGELOG.md index bcdcc6f41972cc47859927af8753ce90b508e931..ce2247333cc077426083ac9cb04f707603be2bcb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,24 @@ that users understand how the changes affect the new version. version 1.0.0-dev --------------------------- ++ Removed "pipefail" from command sections TALON and TranscriptClean ++ Add WDL task for Minimap2 ++ Add WDL task for TALON ++ Add WDL task for TranscriptClean ++ Fastqsplitter: fix mkdir command to work with biocontainer's busybox mkdir ++ Cutadapt: simplify interface ++ Bigger memory multiplier in mutect to take in account bigger vmem usage ++ Cutadapt: Remove default adapter ++ Fastqsplitter: use version 1.1. ++ Picard: Use version 2.20.5 of the biocontainer as this includes the R dependency ++ Common: Update dockerTag to dockerImage. ++ GATK: Add CombineVariants task that allows, e.g., to merge VCFs from different callers. ++ Mutect2: Add GATK tasks related to variant filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, CalculateContamination and FilterMutectCalls). ++ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring an update to GATK 4.1.2.0. ++ Mutect2: Add necessary missing index attribute for panel of normals. ++ MultiQC: Add memory variable to multiqc task. ++ GATK: SplitNCigarReads, BaseRecalibration and ApplyBQSR do no longer need regions files as required inputs. ++ VarDict: Add user definable flags (-M, -A, -Q, -d, -v, -f) to the paired VCF filtering script. + Cutadapt: If the output is a gzipped file, compress with level 1 (instead of default 6). + Cutadapt: Fix issues with read2output when using single-end reads. + Add feature type, idattr and additional attributes to htseq-count. diff --git a/common.wdl b/common.wdl index 7b85d461c58869e45cc818344ffb4d24e2f293d2..5c53c7727e3ecf5e11a66761dbf1693f13d8156e 100644 --- a/common.wdl +++ b/common.wdl @@ -77,7 +77,7 @@ task Copy { Boolean recursive = false # Version not that important as long as it is stable. - String dockerTag = "5.0.2" + String dockerImage = "bash:5.0.2" } command { @@ -91,7 +91,7 @@ task Copy { } runtime { - docker: "bash:" + dockerTag + docker: dockerImage } } @@ -155,7 +155,7 @@ task YamlToJson { input { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" - String dockerTag = "3.13-py37-slim" + String dockerImage = "biowdl/pyyaml:3.13-py37-slim" } command { set -e @@ -174,7 +174,7 @@ task YamlToJson { } runtime { - docker: "biowdl/pyyaml:" + dockerTag + docker: dockerImage } } diff --git a/cutadapt.wdl b/cutadapt.wdl index 84ba9b59db9a0db17f70e1aa6fe72564380c0909..f3e8e29e167df6fba7156cd2f65d71f4adfc0c8b 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -7,16 +7,12 @@ task Cutadapt { String read1output = "cut_r1.fq.gz" String? read2output String? format - Array[String]+? adapter - Array[String]+? front - Array[String]+? anywhere - Array[String]+? adapterRead2 - Array[String]+? frontRead2 - Array[String]+? anywhereRead2 - # FIXME: default should be set at the subworkflow level, not here. Needs to wait for cromwell fix. - Array[String]+? adapterBoth = ["AGATCGGAAGAG"] - # contaminations = anywhereBoth - Array[String]+? contaminations + Array[String] adapter = [] + Array[String] front = [] + Array[String] anywhere = [] + Array[String] adapterRead2 = [] + Array[String] frontRead2 = [] + Array[String] anywhereRead2 = [] Boolean? interleaved String? pairFilter Float? errorRate @@ -74,25 +70,7 @@ task Cutadapt { then "mkdir -p $(dirname " + realRead2output + ")" else "" - # FIXME: This crappy overengineering can be removed once cromwell can handle subworkflow inputs correctly. - # Some WDL magic here to set both adapters with one setting. - # If then else's are needed to keep the variable optional and undefined - Array[String]+? adapterForward = if (defined(adapter) || defined(adapterBoth)) - then select_first([adapter, adapterBoth]) - else adapter - # Check if read2 is defined before applying adapters. - Array[String]+? adapterReverse = if (defined(read2) && (defined(adapterRead2) || defined(adapterBoth))) - then select_first([adapterRead2, adapterBoth]) - else adapterRead2 - - # Same for contaminations - Array[String]+? anywhereForward = if (defined(anywhere) || defined(contaminations)) - then select_first([anywhere, contaminations]) - else anywhere - Array[String]+? anywhereReverse = if (defined(read2) && (defined(anywhereRead2) || defined(contaminations))) - then select_first([anywhereRead2, contaminations]) - else anywhereRead2 - + # FIXME: Use prefix() function for adapter, adapterRead2, etc. command { set -e ~{"mkdir -p $(dirname " + read1output + ")"} @@ -100,12 +78,12 @@ task Cutadapt { cutadapt \ ~{"--cores=" + cores} \ ~{true="-Z" false="" Z} \ - ~{true="-a" false="" defined(adapterForward)} ~{sep=" -a " adapterForward} \ - ~{true="-A" false="" defined(adapterReverse)} ~{sep=" -A " adapterReverse} \ - ~{true="-g" false="" defined(front)} ~{sep=" -g " front} \ - ~{true="-G" false="" defined(frontRead2)} ~{sep=" -G " frontRead2} \ - ~{true="-b" false="" defined(anywhereForward)} ~{sep=" -b " anywhereForward} \ - ~{true="-B" false="" defined(anywhereReverse)} ~{sep=" -B " anywhereReverse} \ + ~{true="-a" false="" length(adapter) > 0} ~{sep=" -a " adapter} \ + ~{true="-A" false="" length(adapterRead2) > 0} ~{sep=" -A " adapterRead2} \ + ~{true="-g" false="" length(front) > 0} ~{sep=" -g " front} \ + ~{true="-G" false="" length(frontRead2) > 0} ~{sep=" -G " frontRead2} \ + ~{true="-b" false="" length(anywhere) > 0} ~{sep=" -b " anywhere} \ + ~{true="-B" false="" length(anywhereRead2) > 0} ~{sep=" -B " anywhereRead2} \ --output ~{read1output} ~{if defined(read2) then "-p " + realRead2output else ""} \ ~{"--to-short-output " + tooShortOutputPath} \ ~{"--to-short-paired-output " + tooShortPairedOutputPath} \ diff --git a/fastqsplitter.wdl b/fastqsplitter.wdl index 66f79af605d4f3bd9f28c3fa7690bb9808a5147c..cbbb7f307595c0e4732f82e8ee9f983cb81f944f 100644 --- a/fastqsplitter.wdl +++ b/fastqsplitter.wdl @@ -26,7 +26,7 @@ task Fastqsplitter { input { File inputFastq Array[String]+ outputPaths - String dockerImage = "quay.io/biocontainers/fastqsplitter:1.0.0--py_0" + String dockerImage = "quay.io/biocontainers/fastqsplitter:1.1.0--py37h516909a_1" Int? compressionLevel Int? threadsPerFile # fastqplitter utilizes one thread per input file and one or more threads per output file + one thread for the application. @@ -34,15 +34,18 @@ task Fastqsplitter { Int cores = 1 + ceil(0.5 * length(outputPaths)) } - command { + # Busybox mkdir does not accept multiple paths. + command <<< set -e - mkdir -p $(dirname ~{sep=' ' outputPaths}) + for FILE in ~{sep=' ' outputPaths} + do mkdir -p $(dirname $FILE) + done fastqsplitter \ ~{"-c " + compressionLevel} \ ~{"-t " + threadsPerFile} \ -i ~{inputFastq} \ -o ~{sep=' -o ' outputPaths} - } + >>> output { Array[File] chunks = outputPaths diff --git a/gatk.wdl b/gatk.wdl index 87fa1046be22020b0a7b37b638d6c30084c43519..9504de2d6bed80c7aa57618ce53242489b46018d 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -7,7 +7,7 @@ task ApplyBQSR { File inputBamIndex String outputBamPath File recalibrationReport - Array[File]+ sequenceGroupInterval + Array[File] sequenceGroupInterval = [] File referenceFasta File referenceFastaDict File referenceFastaFai @@ -32,7 +32,7 @@ task ApplyBQSR { --static-quantized-quals 10 \ --static-quantized-quals 20 \ --static-quantized-quals 30 \ - -L ~{sep=" -L " sequenceGroupInterval} + ~{true="-L" false="" length(sequenceGroupInterval) > 0} ~{sep=' -L ' sequenceGroupInterval} } output { @@ -53,7 +53,7 @@ task BaseRecalibrator { File inputBam File inputBamIndex String recalibrationReportPath - Array[File]+ sequenceGroupInterval + Array[File] sequenceGroupInterval = [] Array[File]? knownIndelsSitesVCFs Array[File]? knownIndelsSitesVCFIndexes File? dbsnpVCF @@ -82,7 +82,7 @@ task BaseRecalibrator { --use-original-qualities \ -O ~{recalibrationReportPath} \ --known-sites ~{sep=" --known-sites " knownIndelsSitesVCFsArg} \ - -L ~{sep=" -L " sequenceGroupInterval} + ~{true="-L" false="" length(sequenceGroupInterval) > 0} ~{sep=' -L ' sequenceGroupInterval} } output { @@ -258,12 +258,17 @@ task MuTect2 { String outputVcf String tumorSample String? normalSample + File? germlineResource + File? germlineResourceIndex File? panelOfNormals + File? panelOfNormalsIndex + String f1r2TarGz = "f1r2.tar.gz" Array[File]+ intervals + String outputStats = outputVcf + ".stats" Int memory = 4 - Float memoryMultiplier = 3 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + Float memoryMultiplier = 4 + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } command { @@ -275,7 +280,9 @@ task MuTect2 { -I ~{sep=" -I " inputBams} \ -tumor ~{tumorSample} \ ~{"-normal " + normalSample} \ + ~{"--germline-resource " + germlineResource} \ ~{"--panel-of-normals " + panelOfNormals} \ + ~{"--f1r2-tar-gz " + f1r2TarGz} \ -O ~{outputVcf} \ -L ~{sep=" -L " intervals} } @@ -283,6 +290,178 @@ task MuTect2 { output { File vcfFile = outputVcf File vcfFileIndex = outputVcf + ".tbi" + File f1r2File = f1r2TarGz + File stats = outputStats + } + + runtime { + docker: dockerImage + memory: ceil(memory * memoryMultiplier) + } +} + +task LearnReadOrientationModel { + input { + Array[File]+ f1r2TarGz + + Int memory = 12 + Float memoryMultiplier = 2 + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + gatk --java-options -Xmx~{memory}G \ + LearnReadOrientationModel \ + -I ~{sep=" -I " f1r2TarGz} \ + -O "artifact-priors.tar.gz" + } + + output { + File artifactPriorsTable = "artifact-priors.tar.gz" + } + + runtime { + docker: dockerImage + memory: ceil(memory * memoryMultiplier) + } +} + +task MergeStats { + input { + Array[File]+ stats + + Int memory = 14 + Float memoryMultiplier = 2 + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + gatk --java-options -Xmx~{memory}G \ + MergeMutectStats \ + -stats ~{sep=" -stats " stats} \ + -O "merged.stats" + } + + output { + File mergedStats = "merged.stats" + } + + runtime { + docker: dockerImage + memory: ceil(memory * memoryMultiplier) + } +} + +task GetPileupSummaries { + input { + File sampleBam + File sampleBamIndex + File variantsForContamination + File variantsForContaminationIndex + File sitesForContamination + File sitesForContaminationIndex + String outputPrefix + + Int memory = 12 + Float memoryMultiplier = 2 + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + gatk --java-options -Xmx~{memory}G \ + GetPileupSummaries \ + -I ~{sampleBam} \ + -V ~{variantsForContamination} \ + -L ~{sitesForContamination} \ + -O ~{outputPrefix + "-pileups.table"} + } + + output { + File pileups = outputPrefix + "-pileups.table" + } + + runtime { + docker: dockerImage + memory: ceil(memory * memoryMultiplier) + } +} + +task CalculateContamination { + input { + File tumorPileups + File? normalPileups + + Int memory = 12 + Float memoryMultiplier = 2 + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + gatk --java-options -Xmx~{memory}G \ + CalculateContamination \ + -I ~{tumorPileups} \ + ~{"-matched " + normalPileups} \ + -O "contamination.table" \ + --tumor-segmentation "segments.table" + } + + output { + File contaminationTable = "contamination.table" + File mafTumorSegments = "segments.table" + } + + runtime { + docker: dockerImage + memory: ceil(memory * memoryMultiplier) + } +} + +task FilterMutectCalls { + input { + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File unfilteredVcf + File unfilteredVcfIndex + String outputVcf + File? contaminationTable + File? mafTumorSegments + File? artifactPriors + Int uniqueAltReadCount = 4 + File mutect2Stats + String? extraArgs + + Int memory = 12 + Float memoryMultiplier = 2 + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + mkdir -p $(dirname ~{outputVcf}) + gatk --java-options -Xmx~{memory}G \ + FilterMutectCalls \ + -R ~{referenceFasta} \ + -V ~{unfilteredVcf} \ + -O ~{outputVcf} \ + ~{"--contamination-table " + contaminationTable} \ + ~{"--tumor-segmentation " + mafTumorSegments} \ + ~{"--ob-priors " + artifactPriors} \ + ~{"--unique-alt-read-count " + uniqueAltReadCount} \ + ~{"-stats " + mutect2Stats} \ + --filtering-stats "filtering.stats" \ + --showHidden \ + ~{extraArgs} + } + + output { + File filteredVcf = outputVcf + File filteredVcfIndex = outputVcf + ".tbi" + File filteringStats = "filtering.stats" } runtime { @@ -299,7 +478,7 @@ task SplitNCigarReads { File referenceFastaDict File referenceFastaFai String outputBam - Array[File]+ intervals + Array[File] intervals = [] Int memory = 4 Float memoryMultiplier = 4 @@ -314,7 +493,7 @@ task SplitNCigarReads { -I ~{inputBam} \ -R ~{referenceFasta} \ -O ~{outputBam} \ - -L ~{sep=' -L ' intervals} + ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} } output { @@ -327,3 +506,59 @@ task SplitNCigarReads { memory: ceil(memory * memoryMultiplier) } } + +task CombineVariants { + input { + String installDir = "/usr" # .jar location in the docker image + + File referenceFasta + File referenceFastaFai + File referenceFastaDict + String genotypeMergeOption = "UNIQUIFY" + String filteredRecordsMergeType = "KEEP_IF_ANY_UNFILTERED" + Array[String]+ identifiers + Array[File]+ variantVcfs # follow "identifiers" array order + Array[File]+ variantIndexes + String outputPath + + Int memory = 12 + Float memoryMultiplier = 2 + String dockerImage = "broadinstitute/gatk3:3.8-1" + } + + command <<< + set -e + mkdir -p $(dirname "~{outputPath}") + + # build "-V:<ID> <file.vcf>" arguments according to IDs and VCFs to merge + # Make sure commands are run in bash + bash -c '#!/usr/bin/env bash + set -eux + ids=(~{sep=" " identifiers}) + vars=(~{sep=" " variantVcfs}) + V_args=$( + for (( i = 0; i < ${#ids[@]}; ++i )) + do + printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}" + done + ) + java -Xmx~{memory}G -jar ~{installDir}/GenomeAnalysisTK.jar \ + -T CombineVariants \ + -R ~{referenceFasta} \ + --genotypemergeoption ~{genotypeMergeOption} \ + --filteredrecordsmergetype ~{filteredRecordsMergeType} \ + --out ~{outputPath} \ + $V_args + ' + >>> + + output { + File combinedVcf = outputPath + File combinedVcfIndex = outputPath + ".tbi" + } + + runtime { + docker: dockerImage + memory: ceil(memory * memoryMultiplier) + } +} diff --git a/minimap2.wdl b/minimap2.wdl new file mode 100644 index 0000000000000000000000000000000000000000..b4e4d4ee63438570e0512c514565b41f647754fc --- /dev/null +++ b/minimap2.wdl @@ -0,0 +1,142 @@ +version 1.0 + +# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Indexing { + input { + File referenceFile + String outputPrefix + Boolean useHomopolymerCompressedKmer = false + Int kmerSize = 15 + Int minimizerWindowSize = 10 + + Int? splitIndex + + Int cores = 1 + Int memory = 4 + String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + minimap2 \ + ~{true="-H" false="" useHomopolymerCompressedKmer} \ + ~{"-k " + kmerSize} \ + ~{"-w " + minimizerWindowSize} \ + ~{"-I " + splitIndex} \ + ~{"-d " + outputPrefix + ".mmi"} \ + ~{"-t " + cores} \ + ~{referenceFile} + } + + output { + File outputIndexFile = outputPrefix + ".mmi" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + referenceFile: "Reference fasta file." + outputPrefix: "Output directory path + output file prefix." + useHomopolymerCompressedKmer: "Use homopolymer-compressed k-mer (preferrable for PacBio)." + kmerSize: "K-mer size (no larger than 28)." + minimizerWindowSize: "Minimizer window size." + splitIndex: "Split index for every ~NUM input bases." + + outputIndexFile: "Indexed reference file." + } +} + +task Mapping { + input { + File queryFile + File referenceFile + String outputPrefix + String presetOption + Boolean outputSAM = false + + Int? maxFragmentLength + Int? maxIntronLength + Boolean? skipSelfAndDualMappings + Int? retainMaxSecondaryAlignments + Int? matchingScore + Int? mismatchPenalty + String? howToFindGTAG + Boolean? secondaryAlignment + + Int cores = 4 + Int memory = 7 + String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + minimap2 \ + ~{"-x " + presetOption} \ + ~{true="-a" false="" outputSAM} \ + ~{"-G " + maxIntronLength} \ + ~{"-F " + maxFragmentLength} \ + ~{true="-X" false="" skipSelfAndDualMappings} \ + ~{"-N " + retainMaxSecondaryAlignments} \ + ~{"-A " + matchingScore} \ + ~{"-B " + mismatchPenalty} \ + ~{"-u " + howToFindGTAG} \ + --secondary=~{true="yes" false="no" secondaryAlignment} \ + ~{"-o " + outputPrefix} \ + ~{"-t " + cores} \ + ~{referenceFile} \ + ~{queryFile} + } + + output { + File outputAlignmentFile = outputPrefix + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + queryFile: "Input fasta file." + referenceFile: "Reference fasta file." + outputPrefix: "Output directory path + output file prefix." + presetOption: "This option applies multiple options at the same time." + outputSAM: "Output in the SAM format." + maxFragmentLength: "Max fragment length (effective with -xsr or in the fragment mode)." + maxIntronLength: "Max intron length (effective with -xsplice; changing -r)." + skipSelfAndDualMappings: "Skip self and dual mappings (for the all-vs-all mode)." + retainMaxSecondaryAlignments: "Retain at most INT secondary alignments." + matchingScore: "Matching score." + mismatchPenalty: "Mismatch penalty." + howToFindGTAG: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG." + secondaryAlignment: "Whether to output secondary alignments." + + outputAlignmentFile: "Mapping and alignment between collections of DNA sequences file." + } +} diff --git a/multiqc.wdl b/multiqc.wdl index e7a87e1490931ea84ccf156aa4c54f349d668292..fc5d49a70609702910a3958dad729c4ca983ad99 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -38,6 +38,7 @@ task MultiQC { Boolean verbose = false Boolean quiet = false Array[Boolean] finished = [] # An array of booleans that can be used to let multiqc wait on stuff. + Int memory = 4 } command { @@ -86,6 +87,7 @@ task MultiQC { } runtime { + memory: memory docker: dockerImage } } diff --git a/picard.wdl b/picard.wdl index 0dcb7ecd15ad8706b985b6c473e2dc180f53a644..450b475e60bd6191f0c9013392774e2f3ab096d8 100644 --- a/picard.wdl +++ b/picard.wdl @@ -8,7 +8,7 @@ task BedToIntervalList { Int memory = 4 Float memoryMultiplier = 3.0 - String dockerImage = "quay.io/biocontainers/picard:2.18.26--0" + String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } command { @@ -52,9 +52,7 @@ task CollectMultipleMetrics { Int memory = 8 Float memoryMultiplier = 4 - # https://raw.githubusercontent.com/BioContainers/multi-package-containers/80886dfea00f3cd9e7ae2edf4fc42816a10e5403/combinations/mulled-v2-23d9f7c700e78129a769e78521eb86d6b8341923%3A8dde04faba6c9ac93fae7e846af3bafd2c331b3b-0.tsv - # Contains r-base=3.4.1,picard=2.18.2 - String dockerImage = "quay.io/biocontainers/mulled-v2-23d9f7c700e78129a769e78521eb86d6b8341923:8dde04faba6c9ac93fae7e846af3bafd2c331b3b-0" + String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -137,9 +135,7 @@ task CollectRnaSeqMetrics { Int memory = 8 Float memoryMultiplier = 4.0 - # https://raw.githubusercontent.com/BioContainers/multi-package-containers/80886dfea00f3cd9e7ae2edf4fc42816a10e5403/combinations/mulled-v2-23d9f7c700e78129a769e78521eb86d6b8341923%3A8dde04faba6c9ac93fae7e846af3bafd2c331b3b-0.tsv - # Contains r-base=3.4.1,picard=2.18.2 - String dockerImage = "quay.io/biocontainers/mulled-v2-23d9f7c700e78129a769e78521eb86d6b8341923:8dde04faba6c9ac93fae7e846af3bafd2c331b3b-0" + String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } command { @@ -178,7 +174,7 @@ task CollectTargetedPcrMetrics { Int memory = 4 Float memoryMultiplier = 3.0 - String dockerImage = "quay.io/biocontainers/picard:2.18.26--0" + String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } command { @@ -216,7 +212,7 @@ task GatherBamFiles { Int memory = 4 Float memoryMultiplier = 3.0 - String dockerImage = "quay.io/biocontainers/picard:2.18.26--0" + String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } command { @@ -250,7 +246,7 @@ task GatherVcfs { Int memory = 4 Float memoryMultiplier = 3.0 - String dockerImage = "quay.io/biocontainers/picard:2.18.26--0" + String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } command { @@ -282,7 +278,7 @@ task MarkDuplicates { Int memory = 8 Float memoryMultiplier = 3.0 - String dockerImage = "quay.io/biocontainers/picard:2.18.26--0" + String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" # The program default for READ_NAME_REGEX is appropriate in nearly every case. # Sometimes we wish to supply "null" in order to turn off optical duplicate detection @@ -335,7 +331,7 @@ task MergeVCFs { Int memory = 8 Float memoryMultiplier = 3.0 - String dockerImage = "quay.io/biocontainers/picard:2.18.26--0" + String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } # Using MergeVcfs instead of GatherVcfs so we can create indices @@ -369,7 +365,7 @@ task SamToFastq { Int memory = 16 # High memory default to avoid crashes. Float memoryMultiplier = 3.0 - String dockerImage = "quay.io/biocontainers/picard:2.18.26--0" + String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" File? NONE } @@ -406,7 +402,7 @@ task ScatterIntervalList { Int memory = 4 Float memoryMultiplier = 3.0 - String dockerImage = "quay.io/biocontainers/picard:2.18.26--0" + String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } command { @@ -441,7 +437,7 @@ task SortVcf { Int memory = 8 Float memoryMultiplier = 3.0 - String dockerImage = "quay.io/biocontainers/picard:2.18.26--0" + String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } diff --git a/somaticseq.wdl b/somaticseq.wdl index d1163b4a1fcda41b45f9e3a12e7eb5b2ad16c882..7a25a0817950525000d53e2b13eccf4d79bfa323 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -267,3 +267,35 @@ task ParallelSingleTrain { docker: dockerImage } } + +task ModifyStrelka { + input { + String installDir = "/opt/somaticseq/vcfModifier" #the location in the docker image + + File strelkaVCF + String? outputVCFName = basename(strelkaVCF, ".gz") + + Int threads = 1 + String dockerImage = "lethalfang/somaticseq:3.1.0" + } + + command { + set -e + + ~{installDir}/modify_Strelka.py \ + -infile ~{strelkaVCF} \ + -outfile "modified_strelka.vcf" + + first_FORMAT_line_num=$(grep -n -m 1 '##FORMAT' "modified_strelka.vcf" | cut -d : -f 1) + sed "$first_FORMAT_line_num"'i##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">' "modified_strelka.vcf" > ~{outputVCFName} + } + + output { + File outputVcf = outputVCFName + } + + runtime { + cpu: threads + docker: dockerImage + } +} diff --git a/talon.wdl b/talon.wdl new file mode 100644 index 0000000000000000000000000000000000000000..507e0b9a3440a85a6109d81d2fb94d0c648e6343 --- /dev/null +++ b/talon.wdl @@ -0,0 +1,362 @@ +version 1.0 + +# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task CreateAbundanceFileFromDatabase { + input { + File databaseFile + String outputPrefix + String genomeBuild + String annotationVersion + Boolean filterTranscripts = false + + File? filterPairingsFile + + Int cores = 1 + Int memory = 4 + String dockerImage = "biocontainers/talon:v4.2_cv2" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + create_abundance_file_from_database \ + ~{"--db=" + databaseFile} \ + ~{"--o=" + outputPrefix} \ + ~{"-b " + genomeBuild} \ + ~{"-a " + annotationVersion} \ + ~{true="--filter" false="" filterTranscripts} \ + ~{"-p " + filterPairingsFile} + } + + output { + File outputAbundanceFile = outputPrefix + "_talon_abundance.tsv" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + databaseFile: "TALON database." + outputPrefix: "Output directory path + output file prefix." + genomeBuild: "Genome build to use." + annotationVersion: "Which annotation version to use." + filterTranscripts: "The transcripts in the database will be filtered prior to GTF creation." + filterPairingsFile: "A file indicating which datasets should be considered together." + + outputAbundanceFile: "Abundance for each transcript in the TALON database across datasets." + } +} + +task CreateGtfAbundanceFromDatabase { + input { + File databaseFile + String outputPrefix + String genomeBuild + String annotationVersion + Boolean filterTranscripts = false + + File? filterPairingsFile + + Int cores = 1 + Int memory = 4 + String dockerImage = "biocontainers/talon:v4.2_cv2" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + create_GTF_abundance_from_database \ + ~{"--db=" + databaseFile} \ + ~{"--o=" + outputPrefix} \ + ~{"-b " + genomeBuild} \ + ~{"-a " + annotationVersion} \ + ~{true="--filter" false="" filterTranscripts} \ + ~{"-p " + filterPairingsFile} + } + + output { + File outputGTFfile = outputPrefix + "_talon_observedOnly.gtf" + File outputAbundanceFile = outputPrefix + "_talon_abundance.tsv" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + databaseFile: "TALON database." + outputPrefix: "Output directory path + output file prefix." + genomeBuild: "Genome build to use." + annotationVersion: "Which annotation version to use." + filterTranscripts: "The transcripts in the database will be filtered prior to GTF creation." + filterPairingsFile: "A file indicating which datasets should be considered together." + + outputGTFfile: "The genes, transcripts, and exons stored a TALON database in GTF format." + outputAbundanceFile: "Abundance for each transcript in the TALON database across datasets." + } +} + +task CreateGtfFromDatabase { + input { + File databaseFile + String outputPrefix + String genomeBuild + String annotationVersion + Boolean observedInDataset = false + + File? whitelistFile + File? datasetFile + + Int cores = 1 + Int memory = 4 + String dockerImage = "biocontainers/talon:v4.2_cv2" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + create_GTF_from_database \ + ~{"--db=" + databaseFile} \ + ~{"--o=" + outputPrefix} \ + ~{"-b " + genomeBuild} \ + ~{"-a " + annotationVersion} \ + ~{"--whitelist=" + whitelistFile} \ + ~{true="--observed" false="" observedInDataset} \ + ~{"-d " + datasetFile} + } + + output { + File outputGTFfile = outputPrefix + "_talon.gtf" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + databaseFile: "TALON database." + outputPrefix: "Output directory path + output file prefix." + genomeBuild: "Genome build to use." + annotationVersion: "Which annotation version to use." + observedInDataset: "Output only includes transcripts that were observed at least once." + whitelistFile: "Whitelist file of transcripts to include in the output." + datasetFile: "A file indicating which datasets should be included." + + outputGTFfile: "The genes, transcripts, and exons stored a TALON database in GTF format." + } +} + +task InitializeTalonDatabase { + input { + File GTFfile + String outputPrefix + String genomeBuild + String annotationVersion + Int minimumLength = 300 + String novelIDprefix = "TALON" + Int cutoff5p = 500 + Int cutoff3p = 300 + + Int cores = 1 + Int memory = 10 + String dockerImage = "biocontainers/talon:v4.2_cv2" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + initialize_talon_database \ + ~{"--f=" + GTFfile} \ + ~{"--o=" + outputPrefix} \ + ~{"--g=" + genomeBuild} \ + ~{"--a=" + annotationVersion} \ + ~{"--l=" + minimumLength} \ + ~{"--idprefix=" + novelIDprefix} \ + ~{"--5p=" + cutoff5p} \ + ~{"--3p=" + cutoff3p} + } + + output { + File outputDatabase = outputPrefix + ".db" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + GTFfile: "GTF annotation containing genes, transcripts, and edges." + outputPrefix: "Output directory path + output file prefix." + genomeBuild: "Name of genome build that the GTF file is based on (ie hg38)." + annotationVersion: "Name of supplied annotation (will be used to label data)." + minimumLength: "Minimum required transcript length." + novelIDprefix: "Prefix for naming novel discoveries in eventual TALON runs." + cutoff5p: "Maximum allowable distance (bp) at the 5' end during annotation." + cutoff3p: "Maximum allowable distance (bp) at the 3' end during annotation." + + outputDatabase: "TALON database." + } +} + +task MapAntisenseGenesToSense { + input { + File databaseFile + String outputPrefix + String annotationVersion + + Int cores = 1 + Int memory = 4 + String dockerImage = "biocontainers/talon:v4.2_cv2" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + map_antisense_genes_to_sense \ + ~{"--db=" + databaseFile} \ + ~{"--o=" + outputPrefix} \ + ~{"-a " + annotationVersion} + } + + output { + File outputAntisenseMapFile = outputPrefix + "_antisense_mapping.gtf" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + databaseFile: "TALON database." + outputPrefix: "Output directory path + output file prefix." + annotationVersion: "Which annotation version to use." + + outputAntisenseMapFile: "IDs of the sense gene for every antisense gene in the database." + } +} + +task SummarizeDatasets { + input { + File databaseFile + String outputPrefix + + File? datasetGroupsCSV + + Int cores = 1 + Int memory = 4 + String dockerImage = "biocontainers/talon:v4.2_cv2" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + summarize_datasets \ + ~{"--db " + databaseFile} \ + ~{"--o " + outputPrefix} \ + ~{"--groups " + datasetGroupsCSV} + } + + output { + File outputSummaryFile = outputPrefix + "_talon_summary.tsv" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + databaseFile: "TALON database." + outputPrefix: "Output directory path + output file prefix." + datasetGroupsCSV: "File of comma-delimited dataset groups to process together." + + outputSummaryFile: "Tab-delimited file of gene and transcript counts for each dataset." + } +} + +task Talon { + input { + File SAMfile + File configFile + File databaseFile + String outputPrefix + String genomeBuild + String configFileName = basename(configFile) + String SAMfileName = basename(SAMfile) + Float minimumCoverage = 0.9 + Int minimumIdentity = 0 + + Int cores = 1 + Int memory = 4 + String dockerImage = "biocontainers/talon:v4.2_cv2" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + mv ${configFile} ./${configFileName} + mv ${SAMfile} ./${SAMfileName} + talon \ + ~{"--f " + configFileName} \ + ~{"--db " + databaseFile} \ + ~{"--o " + outputPrefix} \ + ~{"--build " + genomeBuild} \ + ~{"--cov " + minimumCoverage} \ + ~{"--identity " + minimumIdentity} + } + + output { + File outputUpdatedDatabase = databaseFile + File outputLog = outputPrefix + "_talon_QC.log" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + SAMfile: "Input SAM file, same one as described in configFile." + configFile: "Dataset config file." + databaseFile: "TALON database. Created using initialize_talon_database.py." + outputPrefix: "Output directory path + output file prefix." + genomeBuild: "Genome build (i.e. hg38) to use." + minimumCoverage: "Minimum alignment coverage in order to use a SAM entry." + minimumIdentity: "Minimum alignment identity in order to use a SAM entry." + + outputUpdatedDatabase: "Updated TALON database." + outputLog: "Log file from TALON run." + } +} diff --git a/transcriptclean.wdl b/transcriptclean.wdl new file mode 100644 index 0000000000000000000000000000000000000000..7afd24197084d5ac318b07a15bc2692b38ece5f5 --- /dev/null +++ b/transcriptclean.wdl @@ -0,0 +1,253 @@ +version 1.0 + +# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task CleanSpliceJunctions { + input { + File SAMfile + File referenceGenome + String outputPrefix + File spliceJunctionAnnotation + + File? variantFile + + Int cores = 1 + Int memory = 4 + String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + clean_splice_jns \ + ~{"--f=" + SAMfile} \ + ~{"--g=" + referenceGenome} \ + ~{"--o=" + outputPrefix} \ + ~{"--s=" + spliceJunctionAnnotation} \ + ~{"--v=" + variantFile} + } + + output { + File outputCleanedSAM = outputPrefix + "_clean.sam" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + SAMfile: "Input SAM file" + referenceGenome: "Reference genome fasta file." + outputPrefix: "Output directory path + output file prefix." + spliceJunctionAnnotation: "Splice junction file" + variantFile: "VCF formatted file of variants" + + outputCleanedSAM: "Cleaned sam output file." + } +} + +task GetCorrectedSJsFromLog { + input { + File TElogFile + String outputPrefix + + Int cores = 1 + Int memory = 5 + String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + get_corrected_SJs_from_log \ + ~{TElogFile} \ + ~{outputPrefix + ".tsv"} + } + + output { + File outputCorrectedSJs = outputPrefix + ".tsv" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + TElogFile: "TE log from TranscriptClean." + outputPrefix: "Output directory path + output file prefix." + + outputCorrectedSJs: "Formely noncanonical splice junctions in BED format." + } +} + +task GetSJsFromGtf { + input { + File GTFfile + File genomeFile + String outputPrefix + Int minIntronSize = 21 + + Int cores = 1 + Int memory = 8 + String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + get_SJs_from_gtf \ + ~{"--f=" + GTFfile} \ + ~{"--g=" + genomeFile} \ + ~{"--o=" + outputPrefix + ".tsv"} \ + ~{"--minIntronSize=" + minIntronSize} + } + + output { + File outputSJsFile = outputPrefix + ".tsv" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + GTFfile: "Input GTF file" + genomeFile: "Reference genome" + outputPrefix: "Output directory path + output file prefix." + minIntronSize: "Minimum size of intron to consider a junction." + + outputSJsFile: "Extracted splice junctions." + } +} + +task GetTranscriptCleanStats { + input { + File transcriptCleanSAMfile + String outputPrefix + + Int cores = 1 + Int memory = 4 + String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + get_TranscriptClean_stats \ + ~{transcriptCleanSAMfile} \ + ~{outputPrefix} + } + + output { + File outputStatsFile = stdout() + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + transcriptCleanSAMfile: "Output SAM file from TranscriptClean" + outputPrefix: "Output directory path + output file prefix." + + outputStatsFile: "Summary stats from TranscriptClean run." + } +} + +task TranscriptClean { + input { + File SAMfile + File referenceGenome + String outputPrefix + Int maxLenIndel = 5 + Int maxSJoffset = 5 + Boolean correctMismatches = true + Boolean correctIndels = true + Boolean dryRun = false + Boolean primaryOnly = false + + File? spliceJunctionAnnotation + File? variantFile + Boolean? correctSJs + + Int cores = 1 + Int memory = 25 + String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1" + } + + command { + set -e + mkdir -p $(dirname ~{outputPrefix}) + TranscriptClean \ + ~{"-s " + SAMfile} \ + ~{"-g " + referenceGenome} \ + ~{"-o " + outputPrefix} \ + ~{"-j " + spliceJunctionAnnotation} \ + ~{"-v " + variantFile} \ + ~{"--maxLenIndel=" + maxLenIndel} \ + ~{"--maxSJOffset=" + maxSJoffset} \ + ~{true="-m CORRECTMISMATCHES" false="-m false" correctMismatches} \ + ~{true="-i CORRECTINDELS" false="-i false" correctIndels} \ + ~{true="--correctSJs=CORRECTSJS" false="--correctSJs=false" correctSJs} \ + ~{true="--dryRun" false="" dryRun} \ + ~{true="--primaryOnly" false="" primaryOnly} + } + + output { + File outputTranscriptCleanFasta = outputPrefix + "_clean.fa" + File outputTranscriptCleanLog = outputPrefix + "_clean.log" + File outputTranscriptCleanSAM = outputPrefix + "_clean.sam" + File outputTranscriptCleanTElog = outputPrefix + "_clean.TE.log" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + SAMfile: "Input SAM file containing transcripts to correct." + referenceGenome: "Reference genome fasta file." + outputPrefix: "Output directory path + output file prefix." + spliceJunctionAnnotation: "Splice junction file" + maxLenIndel: "Maximum size indel to correct." + maxSJoffset: "Maximum distance from annotated splice junction to correct." + correctMismatches: "Set this to make TranscriptClean correct mismatches." + correctIndels: "Set this to make TranscriptClean correct indels." + correctSJs: "Set this to make TranscriptClean correct splice junctions." + dryRun: "TranscriptClean will read in the data but don't do any correction." + primaryOnly: "TranscriptClean will only output primary mappings of transcripts." + + outputTranscriptCleanFasta: "Fasta file containing corrected reads." + outputTranscriptCleanLog: "Log file of TranscriptClean run." + outputTranscriptCleanSAM: "SAM file containing corrected aligned reads." + outputTranscriptCleanTElog: "TE log file of TranscriptClean run." + } +} diff --git a/vardict.wdl b/vardict.wdl index 0cbf38acd7a69f22e4b760f7f17932f585a52852..69a5441c22f47afccf011bddf98fb4888c10c989 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -20,6 +20,13 @@ task VarDict { Int endColumn = 3 Int geneColumn = 4 + Boolean outputCandidateSomaticOnly = true + Boolean outputAllVariantsAtSamePosition = true + Float mappingQuality = 20 + Int minimumTotalDepth = 8 + Int minimumVariantDepth = 4 + Float minimumAlleleFrequency = 0.02 + Int threads = 1 Int memory = 16 Float memoryMultiplier = 2.5 @@ -45,6 +52,12 @@ task VarDict { ~{true="var2vcf_paired.pl" false="var2vcf_valid.pl" defined(normalBam)} \ -N "~{tumorSampleName}~{"|" + normalSampleName}" \ ~{true="" false="-E" defined(normalBam)} \ + ~{true="-M" false="" outputCandidateSomaticOnly} \ + ~{true="-A" false="" outputAllVariantsAtSamePosition} \ + -Q ~{mappingQuality} \ + -d ~{minimumTotalDepth} \ + -v ~{minimumVariantDepth} \ + -f ~{minimumAlleleFrequency} \ > ~{outputVcf} }