diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..246e3814b2afb0db669450e37363f20c326e319d --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +# Tasks + +This repository contains the WDL task definitions used in the various +[Biowdl](https://github.com/biowdl) workflows and pipelines. + + +## Documentation + +Documentation for this workflow can be found +[here](https://biowdl.github.io/tasks/). + +## About +These tasks are part of [Biowdl](https://github.com/biowdl) +developed by [the SASC team](http://sasc.lumc.nl/). + +## Contact + +<p> + <!-- Obscure e-mail address for spammers --> +For any question related to these tasks, please use the +<a href='https://github.com/biowdl/tasks/issues'>github issue tracker</a> +or contact + <a href='http://sasc.lumc.nl/'>the SASC team</a> directly at: <a href='mailto:sasc@lumc.nl'> +sasc@lumc.nl</a>. +</p> diff --git a/biopet.wdl b/biopet.wdl index 36d6f2a9cd7b4b4bf633a063ff2220c2cee70995..a3ecdd01282fc8579cf9a5db6e514b29f6f99bc8 100644 --- a/biopet.wdl +++ b/biopet.wdl @@ -417,3 +417,71 @@ task ValidateVcf { memory: ceil(memory * memoryMultiplier) } } + +task VcfStats { + input { + File vcfFile + File vcfIndex + File refFasta + File refFastaIndex + File refDict + String outDir + File? intervals + Array[String]+? infoTags + Array[String]+? genotypeTags + Int? sampleToSampleMinDepth + Int? binSize + Int? maxContigsInSingleJob + Boolean writeBinStats = false + Int localThreads = 1 + Boolean notWriteContigStats = false + Boolean skipGeneral = false + Boolean skipGenotype = false + Boolean skipSampleDistributions = false + Boolean skipSampleCompare = false + String? sparkMaster + Int? sparkExecutorMemory + Array[String]+? sparkConfigValues + + Int memory = 4 + Float memoryMultiplier = 2.0 + File? toolJar + String? preCommand + } + + String toolCommand = if defined(toolJar) + then "java -Xmx" + memory + "G -jar " + toolJar + else "biopet-vcfstats -Xmx" + memory + "G" + + command { + set -e -o pipefail + ~{preCommand} + ~{toolCommand} \ + -I ~{vcfFile} \ + -R ~{refFasta} \ + -o ~{outDir} \ + -t ~{localThreads} \ + ~{"--intervals " + intervals} \ + ~{true="--infoTag" false="" defined(infoTags)} ~{sep=" --infoTag " infoTags} \ + ~{true="--genotypeTag" false="" defined(genotypeTags)} ~{sep=" --genotypeTag " + genotypeTags} \ + ~{"--sampleToSampleMinDepth " + sampleToSampleMinDepth} \ + ~{"--binSize " + binSize} \ + ~{"--maxContigsInSingleJob " + maxContigsInSingleJob} \ + ~{true="--writeBinStats" false="" writeBinStats} \ + ~{true="--notWriteContigStats" false="" notWriteContigStats} \ + ~{true="--skipGeneral" false="" skipGeneral} \ + ~{true="--skipGenotype" false="" skipGenotype} \ + ~{true="--skipSampleDistributions" false="" skipSampleDistributions} \ + ~{true="--skipSampleCompare" false="" skipSampleCompare} \ + ~{"--sparkMaster " + sparkMaster} \ + ~{"--sparkExecutorMemory " + sparkExecutorMemory} \ + ~{true="--sparkConfigValue" false="" defined(sparkConfigValues)} ~{ + sep=" --sparkConfigValue" sparkConfigValues} + } + + runtime { + cpu: localThreads + memory: ceil(memory * memoryMultiplier) + } +} diff --git a/gatk.wdl b/gatk.wdl index 68281f386d0dab5e69f734415711a570f657382b..75e45e5904f7f30447ffb0c579988c028e9dadc9 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -293,6 +293,51 @@ task HaplotypeCallerGvcf { } } +task MuTect2 { + input { + String? preCommand + + Array[File]+ inputBams + File inputBamIndex + File refFasta + File refFastaIndex + File refDict + String outputVcf + String tumorSample + String? normalSample + Array[File]+ intervals + + String? gatkJar + Int memory = 4 + Float memoryMultiplier = 3 + } + + String toolCommand = if defined(gatkJar) + then "java -Xmx" + memory + "G -jar " + gatkJar + else "gatk --java-options -Xmx" + memory + "G" + + command { + set -e -o pipefail + ~{preCommand} + ~{toolCommand} \ + Mutect2 \ + -R ~{refFasta} \ + -I ~{sep=" -I " inputBams} \ + -tumor ~{tumorSample} \ + ~{"-normal " + normalSample} \ + -O ~{outputVcf} \ + -L ~{sep=" -L " intervals} + } + + output { + File vcfFile = outputVcf + } + + runtime { + memory: ceil(memory * memoryMultiplier) + } +} + task SplitNCigarReads { input { String? preCommand diff --git a/strelka.wdl b/strelka.wdl new file mode 100644 index 0000000000000000000000000000000000000000..29e2a078ef59390d1dfceef3b093b25c0ea32236 --- /dev/null +++ b/strelka.wdl @@ -0,0 +1,46 @@ +version 1.0 + +task Somatic { + input { + String? preCommand + String? installDir + String runDir + File normalBam + File tumorBam + File refFasta + + Int cores = 1 + Int memory = 4 + } + + String toolCommand = if defined(installDir) + then installDir + "bin/configureStrelkaSomaticWorkflow.py" + else "configureStrelkaSomaticWorkflow.py" + + command { + set -e -o pipefail + ~{preCommand} + ~{toolCommand} \ + --normalBam ~{normalBam} \ + --tumorBam ~{tumorBam} \ + --ref ~{refFasta} \ + --runDir ~{runDir} + + ~{runDir}/runWorkflow.py \ + -m local \ + -J ~{cores} \ + -g ~{memory} + } + + output { + File indelsVcf = runDir + "/results/variants/somatic.indels.vcf.gz" + File indelsIndex = runDir + "/results/variants/somatic.indels.vcf.gz.tbi" + File snvVcf = runDir + "/results/variants/somatic.snvs.vcf.gz" + File snvIndex = runDir + "/results/variants/somatic.snvs.vcf.gz.tbi" + } + + runtime { + cpu: cores + memory: memory + } +} \ No newline at end of file diff --git a/vardict.wdl b/vardict.wdl new file mode 100644 index 0000000000000000000000000000000000000000..9dbb0ac49471f84ad88eb2a7c003f66ef39f9be2 --- /dev/null +++ b/vardict.wdl @@ -0,0 +1,48 @@ +version 1.0 + +task VarDict { + input { + String? installDir + + File tumorBam + File normalBam + File refFasta + File bedFile + String tumorSampleName + String normalSampleName + String outputVcf + + Int chromosomeColumn = 1 + Int startColumn = 2 + Int endColumn = 3 + Int geneColumn = 4 + + String? preCommand + } + + String toolCommand = if defined(installDir) + then installDir + "/VarDict" + else "vardict" + + command { + set -e -o pipefail + ~{preCommand} + ~{toolCommand} \ + -G ~{refFasta} \ + -N ~{tumorSampleName} \ + -b "~{tumorBam}|~{normalBam}" \ + -c ~{chromosomeColumn} \ + -S ~{startColumn} \ + -E ~{endColumn} \ + -g ~{geneColumn} \ + ~{bedFile} | \ + ~{installDir + "/"}testsomatic.R | \ + ~{installDir + "/"}var2vcf_paired.pl \ + -N "~{tumorSampleName}|~{normalSampleName}" \ + > ~{outputVcf} + } + + output { + File vcfFile = outputVcf + } +} \ No newline at end of file