From d101e77cf3211079a7b7ca50c0203ffea811919b Mon Sep 17 00:00:00 2001 From: JasperBoom <jboom@infernum.nl> Date: Mon, 2 Nov 2020 15:38:52 +0100 Subject: [PATCH] Add last set of updates. --- .github/PULL_REQUEST_TEMPLATE.md | 3 +- CHANGELOG.md | 2 + pacbio.wdl | 89 +++++++++++++++++++++++++ samtools.wdl | 2 +- seqtk.wdl | 9 +-- smoove.wdl | 9 +-- somaticseq.wdl | 89 ++++++++++++------------- spades.wdl | 12 ++-- star.wdl | 20 +++--- strelka.wdl | 34 +++++----- stringtie.wdl | 18 ++--- survivor.wdl | 19 +++--- talon.wdl | 10 +-- transcriptclean.wdl | 21 +++--- umi-tools.wdl | 41 ++++++------ unicycler.wdl | 5 +- vardict.wdl | 41 ++++++------ vt.wdl | 20 +++--- whatshap.wdl | 111 ++++++++++++++++--------------- wisestork.wdl | 44 ++++++------ 20 files changed, 351 insertions(+), 248 deletions(-) create mode 100644 pacbio.wdl diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 199344f..1d52f50 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,3 @@ - ### Checklist -- [ ] Pull request details were added to CHANGELOG.md +- [ ] Pull request details were added to CHANGELOG.md. - [ ] `parameter_meta` for each task is up to date. diff --git a/CHANGELOG.md b/CHANGELOG.md index f0dfaf1..e7d7fed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Samtools: `Sort` task now has `threads` in runtime instead of `1`. + Picard: Add parameter_meta to `SortSam`. + pbmm2: Add parameter_meta for `sample`. + Centrifuge: Rename output in task `KReport` to `KrakenReport` to resolve @@ -20,6 +21,7 @@ version 5.0.0-dev + Bam2fastx: Add localisation of input files to Bam2Fasta task. + isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. ++ Add PacBio preprocessing specific tasks `mergePacBio` & `ccsChunks`. + CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. diff --git a/pacbio.wdl b/pacbio.wdl new file mode 100644 index 0000000..01f6d4f --- /dev/null +++ b/pacbio.wdl @@ -0,0 +1,89 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task mergePacBio { + input { + Array[File]+ reports + String mergedReport + + String memory = "4G" + String dockerImage = "lumc/pacbio-merge:0.2" + } + + command { + set -e + mkdir -p $(dirname ~{mergedReport}) + pacbio_merge \ + --reports ~{sep=" " reports} \ + --json-output ~{mergedReport} + } + + runtime { + memory: memory + docker: dockerImage + } + + output { + File MergedReport = mergedReport + } + + parameter_meta { + # inputs + reports: {description: "The PacBio report files to merge.", category: "required"} + mergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + +task ccsChunks { + input { + Int chunkCount + + String memory = "4G" + String dockerImage = "python:3.7-slim" + } + + command { + set -e + python <<CODE + for i in range(1, ~{chunkCount} + 1): + print(i, ~{chunkCount}, sep="/") + CODE + } + + runtime { + memory: memory + docker: dockerImage + } + + output { + Array[String] chunks = read_lines(stdout()) + } + + parameter_meta { + # inputs + chunkCount: {description: "The number of chunks to create.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} diff --git a/samtools.wdl b/samtools.wdl index 496cf23..e274cf5 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -431,7 +431,7 @@ task Sort { # outputs outputBam: {description: "Sorted BAM file."} - outputBamIndex: {description "Sorted BAM file index."} + outputBamIndex: {description: "Sorted BAM file index."} } } diff --git a/seqtk.wdl b/seqtk.wdl index 321ab13..f6fa422 100644 --- a/seqtk.wdl +++ b/seqtk.wdl @@ -24,11 +24,12 @@ task Sample { input { File sequenceFile String outFilePath = "subsampledReads.fq.gz" - String? preCommand - Int? seed Boolean twoPassMode = false - Float fractionOrNumber # when above 1.0 is the number of reads, otherwise it's a fraction + Float fractionOrNumber # When above 1.0 is the number of reads, otherwise it's a fraction. Boolean zip = true + + String? preCommand + Int? seed } command { @@ -47,4 +48,4 @@ task Sample { output { File subsampledReads = outFilePath } -} \ No newline at end of file +} diff --git a/smoove.wdl b/smoove.wdl index e8846f7..244c2ca 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -32,8 +30,8 @@ task Call { String outputDir = "./smoove" String memory = "15G" - String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" } command { @@ -52,9 +50,8 @@ task Call { runtime { memory: memory - docker: dockerImage time_minutes: timeMinutes - + docker: dockerImage } parameter_meta { @@ -63,8 +60,8 @@ task Call { bamIndex: {description: "The index of the bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } - outputDir: {description: "The location the output VCF file should be written.", category: "common"} sample: {description: "The name of the sample.", category: "required"} + outputDir: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/somaticseq.wdl b/somaticseq.wdl index 7b9a440..1c73fc5 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -22,17 +22,18 @@ version 1.0 task ParallelPaired { input { - File? classifierSNV - File? classifierIndel String outputDir File referenceFasta File referenceFastaFai - File? inclusionRegion - File? exclusionRegion File tumorBam File tumorBamIndex File normalBam File normalBamIndex + + File? classifierSNV + File? classifierIndel + File? inclusionRegion + File? exclusionRegion File? mutect2VCF File? varscanSNV File? varscanIndel @@ -95,17 +96,18 @@ task ParallelPaired { } parameter_meta { - classifierSNV: {description: "A somaticseq SNV classifier.", category: "common"} - classifierIndel: {description: "A somaticseq Indel classifier.", category: "common"} + # inputs outputDir: {description: "The directory to write the output to.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - inclusionRegion: {description: "A bed file describing regions to include.", category: "common"} - exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"} - normalBam: {description: "The normal/control sample's BAM file.", category: "required"} - normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"} tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} + normalBam: {description: "The normal/control sample's BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"} + classifierSNV: {description: "A somaticseq SNV classifier.", category: "common"} + classifierIndel: {description: "A somaticseq Indel classifier.", category: "common"} + inclusionRegion: {description: "A bed file describing regions to include.", category: "common"} + exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"} mutect2VCF: {description: "A VCF as produced by mutect2.", category: "advanced"} varscanSNV: {description: "An SNV VCF as produced by varscan.", category: "advanced"} varscanIndel: {description: "An indel VCF as produced by varscan.", category: "advanced"} @@ -118,11 +120,9 @@ task ParallelPaired { scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -133,12 +133,13 @@ task ParallelPairedTrain { String outputDir File referenceFasta File referenceFastaFai - File? inclusionRegion - File? exclusionRegion File tumorBam File tumorBamIndex File normalBam File normalBamIndex + + File? inclusionRegion + File? exclusionRegion File? mutect2VCF File? varscanSNV File? varscanIndel @@ -200,17 +201,18 @@ task ParallelPairedTrain { } parameter_meta { + # inputs truthSNV: {description: "A VCF of true SNVs.", category: "required"} truthIndel: {description: "A VCF of true indels.", category: "required"} outputDir: {description: "The directory to write the output to.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - inclusionRegion: {description: "A bed file describing regions to include.", category: "common"} - exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"} - normalBam: {description: "The normal/control sample's BAM file.", category: "required"} - normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"} tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} + normalBam: {description: "The normal/control sample's BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"} + inclusionRegion: {description: "A bed file describing regions to include.", category: "common"} + exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"} mutect2VCF: {description: "A VCF as produced by mutect2.", category: "advanced"} varscanSNV: {description: "An SNV VCF as produced by varscan.", category: "advanced"} varscanIndel: {description: "An indel VCF as produced by varscan.", category: "advanced"} @@ -223,25 +225,24 @@ task ParallelPairedTrain { scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task ParallelSingle { input { - File? classifierSNV - File? classifierIndel + File bam + File bamIndex String outputDir File referenceFasta File referenceFastaFai + + File? classifierSNV + File? classifierIndel File? inclusionRegion File? exclusionRegion - File bam - File bamIndex File? mutect2VCF File? varscanVCF File? vardictVCF @@ -291,40 +292,40 @@ task ParallelSingle { } parameter_meta { - classifierSNV: {description: "A somaticseq SNV classifier.", category: "common"} - classifierIndel: {description: "A somaticseq Indel classifier.", category: "common"} + # inputs + bam: {description: "The input BAM file.", category: "required"} + bamIndex: {description: "The index for the input BAM file.", category: "required"} outputDir: {description: "The directory to write the output to.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + classifierSNV: {description: "A somaticseq SNV classifier.", category: "common"} + classifierIndel: {description: "A somaticseq Indel classifier.", category: "common"} inclusionRegion: {description: "A bed file describing regions to include.", category: "common"} exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"} - bam: {description: "The input BAM file.", category: "required"} - bamIndex: {description: "The index for the input BAM file.", category: "required"} mutect2VCF: {description: "A VCF as produced by mutect2.", category: "advanced"} varscanVCF: {description: "A VCF as produced by varscan.", category: "advanced"} vardictVCF: {description: "A VCF as produced by vardict.", category: "advanced"} lofreqVCF: {description: "A VCF as produced by lofreq.", category: "advanced"} scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task ParallelSingleTrain { input { + File bam + File bamIndex File truthSNV File truthIndel String outputDir File referenceFasta File referenceFastaFai + File? inclusionRegion File? exclusionRegion - File bam - File bamIndex File? mutect2VCF File? varscanVCF File? vardictVCF @@ -373,6 +374,9 @@ task ParallelSingleTrain { } parameter_meta { + # inputs + bam: {description: "The input BAM file.", category: "required"} + bamIndex: {description: "The index for the input BAM file.", category: "required"} truthSNV: {description: "A VCF of true SNVs.", category: "required"} truthIndel: {description: "A VCF of true indels.", category: "required"} outputDir: {description: "The directory to write the output to.", category: "common"} @@ -380,19 +384,15 @@ task ParallelSingleTrain { referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} inclusionRegion: {description: "A bed file describing regions to include.", category: "common"} exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"} - bam: {description: "The input BAM file.", category: "required"} - bamIndex: {description: "The index for the input BAM file.", category: "required"} mutect2VCF: {description: "A VCF as produced by mutect2.", category: "advanced"} varscanVCF: {description: "A VCF as produced by varscan.", category: "advanced"} vardictVCF: {description: "A VCF as produced by vardict.", category: "advanced"} lofreqVCF: {description: "A VCF as produced by lofreq.", category: "advanced"} scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -400,17 +400,16 @@ task ModifyStrelka { input { File strelkaVCF String outputVCFName = basename(strelkaVCF, ".gz") - String dockerImage = "lethalfang/somaticseq:3.1.0" + Int timeMinutes = 20 + String dockerImage = "lethalfang/somaticseq:3.1.0" } command { set -e - /opt/somaticseq/vcfModifier/modify_Strelka.py \ -infile ~{strelkaVCF} \ -outfile "modified_strelka.vcf" - first_FORMAT_line_num=$(grep -n -m 1 '##FORMAT' "modified_strelka.vcf" | cut -d : -f 1) sed "$first_FORMAT_line_num"'i##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">' "modified_strelka.vcf" > ~{outputVCFName} } @@ -425,10 +424,10 @@ task ModifyStrelka { } parameter_meta { + # inputs strelkaVCF: {description: "A vcf file as produced by strelka.", category: "required"} outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/spades.wdl b/spades.wdl index 204dbfe..7cc16d2 100644 --- a/spades.wdl +++ b/spades.wdl @@ -22,10 +22,11 @@ version 1.0 task Spades { input { - String outputDir - String? preCommand File read1 File? read2 + String outputDir + + String? preCommand File? interlacedReads File? sangerReads File? pacbioReads @@ -44,12 +45,13 @@ task Spades { Boolean? disableGzipOutput Boolean? disableRepeatResolution File? dataset - Int threads = 1 - Int memoryGb = 16 File? tmpDir String? k Float? covCutoff Int? phredOffset + + Int threads = 1 + Int memoryGb = 16 } command { @@ -100,4 +102,4 @@ task Spades { cpu: threads memory: "~{memoryGb}G" } -} \ No newline at end of file +} diff --git a/star.wdl b/star.wdl index 3d0e2eb..68193fc 100644 --- a/star.wdl +++ b/star.wdl @@ -24,6 +24,7 @@ task GenomeGenerate { input { String genomeDir = "STAR_index" File referenceFasta + File? referenceGtf Int? sjdbOverhang @@ -61,8 +62,10 @@ task GenomeGenerate { File? sjdbListFromGtfOut = "~{genomeDir}/sjdbList.fromGTF.out.tab" File? sjdbListOut = "~{genomeDir}/sjdbList.out.tab" File? transcriptInfo = "~{genomeDir}/transcriptInfo.tab" - Array[File] starIndex = select_all([chrLength, chrNameLength, chrName, chrStart, genome, genomeParameters, - sa, saIndex, exonGeTrInfo, exonInfo, geneInfo, sjdbInfo, sjdbListFromGtfOut, + Array[File] starIndex = select_all([chrLength, chrNameLength, chrName, + chrStart, genome, genomeParameters, + sa, saIndex, exonGeTrInfo, exonInfo, + geneInfo, sjdbInfo, sjdbListFromGtfOut, sjdbListOut, transcriptInfo]) } @@ -74,16 +77,15 @@ task GenomeGenerate { } parameter_meta { + # inputs genomeDir: {description:"The directory the STAR index should be written to.", categroy: "common"} referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtf: {description: "The reference GTF file.", category: "common"} sjdbOverhang: {description: "Equivalent to STAR's `--sjdbOverhang` option.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -95,6 +97,8 @@ task Star { String outFileNamePrefix String outSAMtype = "BAM SortedByCoordinate" String readFilesCommand = "zcat" + Int outBAMcompression = 1 + Int? outFilterScoreMin Float? outFilterScoreMinOverLread Int? outFilterMatchNmin @@ -103,7 +107,6 @@ task Star { String? twopassMode = "Basic" Array[String]? outSAMattrRGline String? outSAMunmapped = "Within KeepPairs" - Int outBAMcompression = 1 Int? limitBAMsortRAM Int runThreadN = 4 @@ -119,7 +122,7 @@ task Star { # So we solve it with an optional memory string and using select_first # in the runtime section. - #TODO Could be extended for all possible output extensions + #TODO: Could be extended for all possible output extensions. Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"} command { @@ -157,12 +160,14 @@ task Star { } parameter_meta { + # inputs inputR1: {description: "The first-/single-end FastQ files.", category: "required"} inputR2: {description: "The second-end FastQ files (in the same order as the first-end files).", category: "common"} indexFiles: {description: "The star index files.", category: "required"} outFileNamePrefix: {description: "The prefix for the output files. May include directories.", category: "required"} outSAMtype: {description: "The type of alignment file to be produced. Currently only `BAM SortedByCoordinate` is supported.", category: "advanced"} readFilesCommand: {description: "Equivalent to star's `--readFilesCommand` option.", category: "advanced"} + outBAMcompression: {description: "The compression level of the output BAM.", category: "advanced"} outFilterScoreMin: {description: "Equivalent to star's `--outFilterScoreMin` option.", category: "advanced"} outFilterScoreMinOverLread: {description: "Equivalent to star's `--outFilterScoreMinOverLread` option.", category: "advanced"} outFilterMatchNmin: {description: "Equivalent to star's `--outFilterMatchNmin` option.", category: "advanced"} @@ -174,7 +179,6 @@ task Star { limitBAMsortRAM: {description: "Equivalent to star's `--limitBAMsortRAM` option.", category: "advanced"} runThreadN: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - outBAMcompression: {description: "The compression level of the output BAM.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/strelka.wdl b/strelka.wdl index 50c38b5..f4b9888 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -29,11 +29,12 @@ task Germline { Array[File]+ indexes File referenceFasta File referenceFastaFai - File? callRegions - File? callRegionsIndex Boolean exome = false Boolean rna = false + File? callRegions + File? callRegionsIndex + Int cores = 1 Int memoryGb = 4 Int timeMinutes = 90 @@ -61,28 +62,27 @@ task Germline { } runtime { - docker: dockerImage cpu: cores - time_minutes: timeMinutes memory: "~{memoryGb}G" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs runDir: {description: "The directory to use as run/output directory.", category: "common"} bams: {description: "The input BAM files.", category: "required"} indexes: {description: "The indexes for the input BAM files.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} - callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} rna: {description: "Whether or not the data is from RNA sequencing.", category: "common"} - + callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} + callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -95,11 +95,12 @@ task Somatic { File tumorBamIndex File referenceFasta File referenceFastaFai + Boolean exome = false + File? callRegions File? callRegionsIndex File? indelCandidatesVcf File? indelCandidatesVcfIndex - Boolean exome = false Int cores = 1 Int memoryGb = 4 @@ -133,13 +134,14 @@ task Somatic { } runtime { - docker: dockerImage cpu: cores - time_minutes: timeMinutes memory: "~{memoryGb}G" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs runDir: {description: "The directory to use as run/output directory.", category: "common"} normalBam: {description: "The normal/control sample's BAM file.", category: "required"} normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"} @@ -147,17 +149,15 @@ task Somatic { tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} indelCandidatesVcf: {description: "An indel candidates VCF file from manta.", category: "advanced"} indelCandidatesVcfIndex: {description: "The index for the indel candidates VCF file.", category: "advanced"} - exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} - cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } meta { @@ -165,4 +165,4 @@ task Somatic { exclude: ["doNotDefineThis"] } } -} \ No newline at end of file +} diff --git a/stringtie.wdl b/stringtie.wdl index 5ed62de..fff4140 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -24,9 +24,10 @@ task Stringtie { input { File bam File bamIndex - File? referenceGtf Boolean skipNovelTranscripts = false String assembledTranscriptsFile + + File? referenceGtf Boolean? firstStranded Boolean? secondStranded String? geneAbundanceFile @@ -64,19 +65,19 @@ task Stringtie { } parameter_meta { + # inputs bam: {description: "The input BAM file.", category: "required"} bamIndex: {description: "The input BAM file's index.", category: "required"} - referenceGtf: {description: "A reference GTF file to be used as guide.", category: "common"} skipNovelTranscripts: {description: "Whether new transcripts should be assembled or not.", category: "common"} assembledTranscriptsFile: {description: "Where the output of the assembly should be written.", category: "required"} + referenceGtf: {description: "A reference GTF file to be used as guide.", category: "common"} firstStranded: {description: "Equivalent to the --rf flag of stringtie.", category: "required"} secondStranded: {description: "Equivalent to the --fr flag of stringtie.", category: "required"} geneAbundanceFile: {description: "Where the abundance file should be written.", category: "common"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -84,13 +85,14 @@ task Merge { input { Array[File]+ gtfFiles String outputGtfPath + Boolean keepMergedTranscriptsWithRetainedIntrons = false + File? guideGtf Int? minimumLength Float? minimumCoverage Float? minimumFPKM Float? minimumTPM Float? minimumIsoformFraction - Boolean keepMergedTranscriptsWithRetainedIntrons = false String? label String memory = "10G" @@ -125,19 +127,19 @@ task Merge { } parameter_meta { + # inputs gtfFiles: {description: "The GTF files produced by stringtie.", category: "required"} outputGtfPath: {description: "Where the output should be written.", category: "required"} + keepMergedTranscriptsWithRetainedIntrons: {description: "Equivalent to the -i flag of 'stringtie --merge'.", category: "advanced"} guideGtf: {description: "Equivalent to the -G option of 'stringtie --merge'.", category: "advanced"} minimumLength: {description: "Equivalent to the -m option of 'stringtie --merge'.", category: "advanced"} minimumCoverage: {description: "Equivalent to the -c option of 'stringtie --merge'.", category: "advanced"} minimumFPKM: {description: "Equivalent to the -F option of 'stringtie --merge'.", category: "advanced"} minimumTPM: {description: "Equivalent to the -T option of 'stringtie --merge'.", category: "advanced"} minimumIsoformFraction: {description: "Equivalent to the -f option of 'stringtie --merge'.", category: "advanced"} - keepMergedTranscriptsWithRetainedIntrons: {description: "Equivalent to the -i flag of 'stringtie --merge'.", category: "advanced"} label: {description: "Equivalent to the -l option of 'stringtie --merge'.", category: "advanced"} memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/survivor.wdl b/survivor.wdl index b958300..c7b3105 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -32,6 +30,7 @@ task Merge { Boolean distanceBySvSize = false Int minSize = 30 String outputPath = "./survivor/merged.vcf" + String memory = "24G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" @@ -64,15 +63,15 @@ task Merge { parameter_meta { # inputs - filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR", category: "required"} - breakpointDistance: {description: "The distance between pairwise breakpoints between SVs", category: "advanced"} - suppVecs: {description: "The minimum number of SV callers to support the merging", category: "advanced"} - svType: {description: "A boolean to include the type SV to be merged", category: "advanced"} - strandType: {description: "A boolean to include strand type of an SV to be merged", category: "advanced"} - distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "advanced"} - minSize: {description: "The mimimum size of SV to be merged", category: "advanced"} + filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR.", category: "required"} + breakpointDistance: {description: "The distance between pairwise breakpoints between SVs.", category: "advanced"} + suppVecs: {description: "The minimum number of SV callers to support the merging.", category: "advanced"} + svType: {description: "A boolean to include the type SV to be merged.", category: "advanced"} + strandType: {description: "A boolean to include strand type of an SV to be merged.", category: "advanced"} + distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size.", category: "advanced"} + minSize: {description: "The mimimum size of SV to be merged.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The memory required to run the programs", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/talon.wdl b/talon.wdl index c11ab9e..61f5eb4 100644 --- a/talon.wdl +++ b/talon.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 7966130..efdd95f 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -54,10 +54,10 @@ task GetSJsFromGtf { parameter_meta { # inputs - gtfFile: {description: "Input gtf file", category: "required"} - genomeFile: {description: "Reference genome", category: "required"} - minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} + gtfFile: {description: "Input gtf file.", category: "required"} + genomeFile: {description: "Reference genome.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -97,7 +97,7 @@ task GetTranscriptCleanStats { parameter_meta { # inputs - inputSam: {description: "Output sam file from transcriptclean", category: "required"} + inputSam: {description: "Output sam file from transcriptclean.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -189,8 +189,7 @@ task TranscriptClean { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs fastaFile: {description: "Fasta file containing corrected reads."} diff --git a/umi-tools.wdl b/umi-tools.wdl index c5f3b14..7b0a399 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2017 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2017 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -26,9 +26,10 @@ task Extract { File? read2 String bcPattern String? bcPattern2 - Boolean threePrime = false String read1Output = "umi_extracted_R1.fastq.gz" String? read2Output = "umi_extracted_R2.fastq.gz" + Boolean threePrime = false + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" } @@ -50,21 +51,21 @@ task Extract { } runtime { - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs read1: {description: "The first/single-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} bcPattern: {description: "The pattern to be used for UMI extraction. See the umi_tools docs for more information.", category: "required"} bcPattern2: {description: "The pattern to be used for UMI extraction in the second-end reads. See the umi_tools docs for more information.", category: "advanced"} - threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} read1Output: {description: "The location to write the first/single-end output fastq file to.", category: "advanced"} read2Output: {description: "The location to write the second-end output fastq file to.", category: "advanced"} + threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -72,15 +73,15 @@ task Dedup { input { File inputBam File inputBamIndex - String? umiSeparator String outputBamPath - String? statsPrefix Boolean paired = true + String? umiSeparator + String? statsPrefix + String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - - # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) + # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9). String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" } @@ -107,21 +108,21 @@ task Dedup { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The input BAM file.", categrory: "required"} inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} - statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} - umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} + umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} + statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} memory: {description: "The amount of memory required for the task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/unicycler.wdl b/unicycler.wdl index fc39360..938d0c7 100644 --- a/unicycler.wdl +++ b/unicycler.wdl @@ -22,12 +22,13 @@ version 1.0 task Unicycler { input { + String out + String? preCommand File? short1 File? short2 File? unpaired File? long - String out Int? verbosity Int? minFastaLength Int? keep @@ -125,4 +126,4 @@ task Unicycler { cpu: threads memory: memory } -} \ No newline at end of file +} diff --git a/vardict.wdl b/vardict.wdl index 92beb32..fc37c9e 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -27,29 +27,28 @@ task VarDict { String tumorSampleName File tumorBam File tumorBamIndex - String? normalSampleName - File? normalBam - File? normalBamIndex File referenceFasta File referenceFastaFai File bedFile String outputVcf - - Int chromosomeColumn = 1 - Int startColumn = 2 - Int endColumn = 3 - Int geneColumn = 4 - Boolean outputCandidateSomaticOnly = true Boolean outputAllVariantsAtSamePosition = true Float mappingQuality = 20 Int minimumTotalDepth = 8 Int minimumVariantDepth = 4 Float minimumAlleleFrequency = 0.02 + Int chromosomeColumn = 1 + Int startColumn = 2 + Int endColumn = 3 + Int geneColumn = 4 + + String? normalSampleName + File? normalBam + File? normalBamIndex + String javaXmx = "16G" Int threads = 1 String memory = "18G" - String javaXmx = "16G" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1" } @@ -93,33 +92,31 @@ task VarDict { } parameter_meta { + # inputs tumorSampleName: {description: "The name of the tumor/case sample.", category: "required"} tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} - normalSampleName: {description: "The name of the normal/control sample.", category: "common"} - normalBam: {description: "The normal/control sample's BAM file.", category: "common"} - normalBamIndex: {description: "The normal/control sample's BAM file.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} bedFile: {description: "A bed file describing the regions to operate on. These regions must be below 1e6 bases in size.", category: "required"} outputVcf: {description: "The location to write the output VCF file to.", category: "required"} - chromosomeColumn: {description: "Equivalent to vardict-java's `-c` option.", category: "advanced"} - startColumn: {description: "Equivalent to vardict-java's `-S` option.", category: "advanced"} - endColumn: {description: "Equivalent to vardict-java's `-E` option.", category: "advanced"} - geneColumn: {description: "Equivalent to vardict-java's `-g` option.", category: "advanced"} outputCandidateSomaticOnly: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-M` flag.", category: "advanced"} outputAllVariantsAtSamePosition: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-A` flag.", category: "advanced"} mappingQuality: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-Q` option.", category: "advanced"} minimumTotalDepth: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-d` option.", category: "advanced"} minimumVariantDepth: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-v` option.", category: "advanced"} minimumAlleleFrequency: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-f` option.", category: "advanced"} - + chromosomeColumn: {description: "Equivalent to vardict-java's `-c` option.", category: "advanced"} + startColumn: {description: "Equivalent to vardict-java's `-S` option.", category: "advanced"} + endColumn: {description: "Equivalent to vardict-java's `-E` option.", category: "advanced"} + geneColumn: {description: "Equivalent to vardict-java's `-g` option.", category: "advanced"} + normalSampleName: {description: "The name of the normal/control sample.", category: "common"} + normalBam: {description: "The normal/control sample's BAM file.", category: "common"} + normalBamIndex: {description: "The normal/control sample's BAM file.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/vt.wdl b/vt.wdl index 99cc131..95585ff 100644 --- a/vt.wdl +++ b/vt.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -28,9 +28,10 @@ task Normalize { File referenceFastaFai Boolean ignoreMaskedRef = false String outputPath = "./vt/normalized_decomposed.vcf" - String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" + String memory = "4G" Int timeMinutes = 30 + String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" } command { @@ -56,13 +57,12 @@ task Normalize { # inputs inputVCF: {description: "The VCF file to process.", category: "required"} inputVCFIndex: {description: "The index of the VCF file to be processed.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced"} - memory: {description: "The memory required to run the programs", category: "advanced"} + ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs.", category: "advanced"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - diff --git a/whatshap.wdl b/whatshap.wdl index 9362459..5c69400 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -20,10 +20,14 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - task Phase { input { String outputVCF + File vcf + File vcfIndex + File phaseInput + File phaseInputIndex + File? reference File? referenceIndex String? tag @@ -33,20 +37,15 @@ task Phase { String? chromosome String? threshold String? ped - File vcf - File vcfIndex - File phaseInput - File phaseInputIndex String memory = "4G" Int timeMinutes = 120 - # Whatshap 1.0, tabix 0.2.5 + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { set -e - whatshap phase \ ~{vcf} \ ~{phaseInput} \ @@ -69,24 +68,27 @@ task Phase { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs outputVCF: {description: "Output VCF file. Add .gz to the file name to get compressed output. If omitted, use standard output.", category: "common"} - reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created", category: "common"} - tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS)", category: "common"} - algorithm: {description: "Phasing algorithm to use (default: {description: whatshap)", category: "advanced"} - indels: {description: "Also phase indels (default: {description: do not phase indels)", category: "common"} + vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed).", category: "required"} + vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased.", category: "required"} + phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF).", category: "required"} + phaseInputIndex: {description: "Index of BAM, CRAM, VCF or BCF file(s) with phase information.", category: "required"} + reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created.", category: "common"} + referenceIndex: {description: "Index of reference file.", category: "common"} + tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS).", category: "common"} + algorithm: {description: "Phasing algorithm to use (default: {description: whatshap).", category: "advanced"} + indels: {description: "Also phase indels (default: {description: do not phase indels).", category: "common"} sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} chromosome: {description: "Name of chromosome to phase. If not given, all chromosomes in the input VCF are phased. Can be used multiple times.", category: "common"} threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from the same haplotype and different haplotypes in the read merging model (default: {description: 1000000).", category: "advanced"} ped: {description: "Use pedigree information in PED file to improve phasing (switches to PedMEC algorithm). Columns 2, 3, 4 must refer to child, mother, and father sample names as used in the VCF and BAM/CRAM. Other columns are ignored.", category: "advanced"} - vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed)", category: "required"} - vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased", category: "required"} - phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF)", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -95,16 +97,17 @@ task Phase { task Stats { input { + File vcf + String? gtf String? sample String? tsv String? blockList String? chromosome - File vcf String memory = "4G" Int timeMinutes = 120 - # Whatshap 1.0, tabix 0.2.5 + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } @@ -125,18 +128,19 @@ task Stats { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - gtf: "Write phased blocks to GTF file." - sample: "Name of the sample to process. If not given, use first sample found in VCF." - tsv: "Filename to write statistics to (tab-separated)." - blockList: "Filename to write list of all blocks to (one block per line)." - chromosome: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered." - vcf: "Phased VCF file" + # inputs + vcf: {description: "Phased VCF file.", category: "required"} + gtf: {description: "Write phased blocks to GTF file.", category: "common"} + sample: {description: "Name of the sample to process. If not given, use first sample found in VCF.", category: "common"} + tsv: {description: "Filename to write statistics to (tab-separated).", category: "common"} + blockList: {description: "Filename to write list of all blocks to (one block per line).", category: "advanced"} + chromosome: {description: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -145,57 +149,58 @@ task Stats { task Haplotag { input { + File vcf + File vcfIndex + File alignments + File alignmentsIndex String outputFile + File? reference File? referenceFastaIndex String? regions String? sample - File vcf - File vcfIndex - File alignments - File alignmentsIndex String memory = "4G" Int timeMinutes = 120 - # Whatshap 1.0, tabix 0.2.5 + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { set -e - whatshap haplotag \ - ~{vcf} \ - ~{alignments} \ - ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ - ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ - ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ - ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} - - python3 -c "import pysam; pysam.index('~{outputFile}')" + ~{vcf} \ + ~{alignments} \ + ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ + ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ + ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} + + python3 -c "import pysam; pysam.index('~{outputFile}')" } output { - File bam = outputFile - File bamIndex = outputFile + ".bai" + File bam = outputFile + File bamIndex = outputFile + ".bai" } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - outputFile: "Output file. If omitted, use standard output." - reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created." - referenceFastaIndex: "Index for the reference file." - regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." - sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." - vcf: "VCF file with phased variants (must be gzip-compressed and indexed)." - vcfIndex: "Index for the VCF or BCF file with variants to be phased." - alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype." - alignmentsIndex: "Index for the alignment file." + # inputs + vcf: {description: "VCF file with phased variants (must be gzip-compressed and indexed).", category: "required"} + vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased.", category: "required"} + alignments: {description: "File (BAM/CRAM) with read alignments to be tagged by haplotype.", category: "required"} + alignmentsIndex: {description: "Index for the alignment file.", category: "required"} + outputFile: {description: "Output file. If omitted, use standard output.", category: "required"} + reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created.", category: "common"} + referenceFastaIndex: {description: "Index for the reference file.", category: "common"} + regions: {description: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome).", category: "advanced"} + sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/wisestork.wdl b/wisestork.wdl index 0fd812b..6be3216 100644 --- a/wisestork.wdl +++ b/wisestork.wdl @@ -22,13 +22,15 @@ version 1.0 task Count { input { - Int? binSize - File reference - File referenceIndex - File? binFile File inputBam File inputBamIndex + File reference + File referenceIndex String outputBed = "output.bed" + + Int? binSize + File? binFile + String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -54,15 +56,17 @@ task Count { task GcCorrect { input { - Int? binSize File reference File referenceIndex - File? binFile File inputBed String outputBed = "output.bed" + + Int? binSize + File? binFile Float? fracN Int? iter Float? fracLowess + String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -91,13 +95,16 @@ task GcCorrect { task Newref { input { - Int? binSize File reference File referenceIndex - File? binFile Array[File]+ inputBeds String outputBed = "output.bed" + + Int? binSize + File? binFile Int? nBins + + Int memory = 2 + ceil(length(inputBeds) * 0.15) String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -106,36 +113,36 @@ task Newref { mkdir -p $(dirname ~{outputBed}) wisestork newref \ ~{"--binsize " + binSize} \ - --reference ~{reference} \ - ~{"--bin-file " + binFile} \ - --output ~{outputBed} \ - -I ~{sep=" -I " inputBeds} \ - ~{"--n-bins " + nBins} + --reference ~{reference} \ + ~{"--bin-file " + binFile} \ + --output ~{outputBed} \ + -I ~{sep=" -I " inputBeds} \ + ~{"--n-bins " + nBins} } output { File bedFile = outputBed } - Int memory = 2 + ceil(length(inputBeds) * 0.15) - runtime { - docker: dockerImage memory: "~{memory}G" + docker: dockerImage } } task Zscore { input { - Int? binSize File reference File referenceIndex - File? binFile File inputBed File inputBedIndex File dictionaryFile File dictionaryFileIndex String outputBed = "output.bed" + + Int? binSize + File? binFile + String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -159,4 +166,3 @@ task Zscore { docker: dockerImage } } - -- GitLab