Skip to content
Snippets Groups Projects
Unverified Commit c9ec2639 authored by Jasper Boom's avatar Jasper Boom Committed by GitHub
Browse files

Merge pull request #161 from biowdl/BIOWDL-354

BIOWDL-354: Update parameter_meta and TALON task cores.
parents 1ea50377 07bac37b
Branches BIOWDL-450
No related tags found
No related merge requests found
......@@ -11,53 +11,55 @@ that users understand how the changes affect the new version.
version 2.1.0-dev
---------------------------
+ Updated TALON to version 4.4
+ Updated parameter_meta sections for Minimap2 and TranscriptClean to wdl-aid format.
+ Updated cores variable for TALON, the default is now 4.
+ Updated TALON to version 4.4.
+ Added parameter_meta sections to the following tools:
+ htseq
+ cutadapt
+ collect-columns
+ stringtie
+ fastqc
+ Updated star default image to 2.7.3a
+ Updated star default image to 2.7.3a.
+ Hisat2 now indexes the resulting BAM file.
+ Samtools index now also works without setting a path for the output
+ Bugfix: Biowdl-input-converter now makes sure the output directory exists
+ Samtools index now also works without setting a path for the output.
+ Bugfix: Biowdl-input-converter now makes sure the output directory exists.
version 2.0.0
---------------------------
+ TranscriptClean: Update TranscriptClean to version 2.0.2
+ TranscriptClean: Update TranscriptClean to version 2.0.2.
+ Memory runtime attributes are now Strings indicating total memory, as opposed to Ints indicating memory per core.
+ Memory inputs for most tasks are now Strings, remaining Int memory inputs are renamed to "memoryGb".
+ Use the biowdl-input-converter container for JsonToYaml, to reduce the amount of containers needed.
+ Add biowdl-input-converter and remove SampleConfigToSampleReadgroupLists which it replaces.
+ GATK.GenotypeGVCFs: Increased memoryMultiplier from 2.0 to 3.0
+ Minimap2: Add -k option to minimap2 mapping
+ Added bwakit task
+ Minimap2: Add the option for --MD tag
+ TALON: Update average memory needs for main TALON process
+ GATK.GenotypeGVCFs: Increased memoryMultiplier from 2.0 to 3.0 .
+ Minimap2: Add -k option to minimap2 mapping.
+ Added bwakit task.
+ Minimap2: Add the option for --MD tag.
+ TALON: Update average memory needs for main TALON process.
version 1.0.0
---------------------------
+ Common: Add "SampleConfigToSampleReadgroupLists" task
+ MultiQC: the "interactive" input is now set to true by default
+ Common: Add "SampleConfigToSampleReadgroupLists" task.
+ MultiQC: the "interactive" input is now set to true by default.
+ Removed deprecated tasks:
+ bioconda.installPrefix
+ mergecounts.MergeCounts
+ GATK.BaseRecalibrator: "knownIndelsSitesVCFs" and "knownIndelsSitesVCFIndexes" are no longer optional, but now have a default of "[]"
+ Removed BWA index task
+ Removed unused "picardJar" input from bwa.wdl
+ All inputs to bedtools Sort are now reflected in the generated command
+ TranscriptClean: Update TranscriptClean container to version 1.0.8
+ Removed "pipefail" from command sections TALON and TranscriptClean
+ Add WDL task for Minimap2
+ Add WDL task for TALON
+ Add WDL task for TranscriptClean
+ Fastqsplitter: fix mkdir command to work with biocontainer's busybox mkdir
+ Cutadapt: simplify interface
+ Bigger memory multiplier in mutect to take in account bigger vmem usage
+ Cutadapt: Remove default adapter
+ GATK.BaseRecalibrator: "knownIndelsSitesVCFs" and "knownIndelsSitesVCFIndexes" are no longer optional, but now have a default of "[]".
+ Removed BWA index task.
+ Removed unused "picardJar" input from bwa.wdl.
+ All inputs to bedtools Sort are now reflected in the generated command.
+ TranscriptClean: Update TranscriptClean container to version 1.0.8.
+ Removed "pipefail" from command sections TALON and TranscriptClean.
+ Add WDL task for Minimap2.
+ Add WDL task for TALON.
+ Add WDL task for TranscriptClean.
+ Fastqsplitter: fix mkdir command to work with biocontainer's busybox mkdir.
+ Cutadapt: simplify interface.
+ Bigger memory multiplier in mutect to take in account bigger vmem usage.
+ Cutadapt: Remove default adapter.
+ Fastqsplitter: use version 1.1.
+ Picard: Use version 2.20.5 of the biocontainer as this includes the R dependency
+ Picard: Use version 2.20.5 of the biocontainer as this includes the R dependency.
+ Common: Update dockerTag to dockerImage.
+ GATK: Add CombineVariants task that allows, e.g., to merge VCFs from different callers.
+ Mutect2: Add GATK tasks related to variant filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, CalculateContamination and FilterMutectCalls).
......
......@@ -22,11 +22,11 @@ version 1.0
task Indexing {
input {
File referenceFile
String outputPrefix
Boolean useHomopolymerCompressedKmer = false
Int kmerSize = 15
Int minimizerWindowSize = 10
String outputPrefix
File referenceFile
Int? splitIndex
......@@ -42,9 +42,9 @@ task Indexing {
~{true="-H" false="" useHomopolymerCompressedKmer} \
~{"-k " + kmerSize} \
~{"-w " + minimizerWindowSize} \
~{"-I " + splitIndex} \
~{"-d " + outputPrefix + ".mmi"} \
~{"-t " + cores} \
~{"-I " + splitIndex} \
~{referenceFile}
}
......@@ -59,35 +59,55 @@ task Indexing {
}
parameter_meta {
referenceFile: "Reference fasta file."
outputPrefix: "Output directory path + output file prefix."
useHomopolymerCompressedKmer: "Use homopolymer-compressed k-mer (preferrable for PacBio)."
kmerSize: "K-mer size (no larger than 28)."
minimizerWindowSize: "Minimizer window size."
splitIndex: "Split index for every ~NUM input bases."
outputIndexFile: "Indexed reference file."
useHomopolymerCompressedKmer: {
description: "Use homopolymer-compressed k-mer (preferrable for PacBio).",
category: "advanced"
}
kmerSize: {
description: "K-mer size (no larger than 28).",
category: "advanced"
}
minimizerWindowSize: {
description: "Minimizer window size.",
category: "advanced"
}
outputPrefix: {
description: "Output directory path + output file prefix.",
category: "required"
}
referenceFile: {
description: "Reference fasta file.",
category: "required"
}
splitIndex: {
description: "Split index for every ~NUM input bases.",
category: "advanced"
}
outputIndexFile: {
description: "Indexed reference file.",
category: "required"
}
}
}
task Mapping {
input {
File queryFile
File referenceFile
String outputPrefix
String presetOption
Boolean outputSAM = false
Int kmerSize = 15
Boolean skipSelfAndDualMappings = false
Boolean outputSAM = false
String outputPrefix
Boolean addMDtagToSAM = false
Boolean secondaryAlignment = false
File referenceFile
File queryFile
Int? maxFragmentLength
Int? maxIntronLength
Boolean? skipSelfAndDualMappings
Int? maxFragmentLength
Int? retainMaxSecondaryAlignments
Int? matchingScore
Int? mismatchPenalty
String? howToFindGTAG
Boolean? secondaryAlignment
Boolean? addMDtagToSAM
Int cores = 4
String memory = "30G"
......@@ -99,19 +119,19 @@ task Mapping {
mkdir -p $(dirname ~{outputPrefix})
minimap2 \
~{"-x " + presetOption} \
~{"-k " + kmerSize} \
~{true="-X" false="" skipSelfAndDualMappings} \
~{true="-a" false="" outputSAM} \
~{"-o " + outputPrefix} \
~{true="--MD" false="" addMDtagToSAM} \
--secondary=~{true="yes" false="no" secondaryAlignment} \
~{"-t " + cores} \
~{"-G " + maxIntronLength} \
~{"-F " + maxFragmentLength} \
~{"-k " + kmerSize} \
~{true="-X" false="" skipSelfAndDualMappings} \
~{"-N " + retainMaxSecondaryAlignments} \
~{"-A " + matchingScore} \
~{"-B " + mismatchPenalty} \
~{"-u " + howToFindGTAG} \
--secondary=~{true="yes" false="no" secondaryAlignment} \
~{true="--MD" false="" addMDtagToSAM} \
~{"-o " + outputPrefix} \
~{"-t " + cores} \
~{referenceFile} \
~{queryFile}
}
......@@ -127,22 +147,69 @@ task Mapping {
}
parameter_meta {
queryFile: "Input fasta file."
referenceFile: "Reference fasta file."
outputPrefix: "Output directory path + output file prefix."
presetOption: "This option applies multiple options at the same time."
outputSAM: "Output in the SAM format."
maxFragmentLength: "Max fragment length (effective with -xsr or in the fragment mode)."
maxIntronLength: "Max intron length (effective with -xsplice; changing -r)."
kmerSize: "K-mer size (no larger than 28)."
skipSelfAndDualMappings: "Skip self and dual mappings (for the all-vs-all mode)."
retainMaxSecondaryAlignments: "Retain at most INT secondary alignments."
matchingScore: "Matching score."
mismatchPenalty: "Mismatch penalty."
howToFindGTAG: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG."
secondaryAlignment: "Whether to output secondary alignments."
addMDtagToSAM: "Adds a MD tag to the SAM output file."
outputAlignmentFile: "Mapping and alignment between collections of DNA sequences file."
presetOption: {
description: "This option applies multiple options at the same time.",
category: "common"
}
kmerSize: {
description: "K-mer size (no larger than 28).",
category: "advanced"
}
outputSAM: {
description: "Output in the SAM format.",
category: "common"
}
outputPrefix: {
description: "Output directory path + output file prefix.",
category: "required"
}
maxIntronLength: {
description: "Max intron length (effective with -xsplice; changing -r).",
category: "advanced"
}
maxFragmentLength: {
description: "Max fragment length (effective with -xsr or in the fragment mode).",
category: "advanced"
}
skipSelfAndDualMappings: {
description: "Skip self and dual mappings (for the all-vs-all mode).",
category: "advanced"
}
retainMaxSecondaryAlignments: {
description: "Retain at most INT secondary alignments.",
category: "advanced"
}
matchingScore: {
description: "Matching score.",
category: "advanced"
}
mismatchPenalty: {
description: "Mismatch penalty.",
category: "advanced"
}
howToFindGTAG: {
description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.",
category: "common"
}
addMDtagToSAM: {
description: "Adds a MD tag to the SAM output file.",
category: "common"
}
secondaryAlignment: {
description: "Whether to output secondary alignments.",
category: "advanced"
}
referenceFile: {
description: "Reference fasta file.",
category: "required"
}
queryFile: {
description: "Input fasta file.",
category: "required"
}
outputAlignmentFile: {
description: "Mapping and alignment between collections of DNA sequences file.",
category: "required"
}
}
}
Subproject commit e00dc247dac8f4aa91a77d6d307f928cc8449527
Subproject commit 6eaa313f172f3efb9e62f2140b8d7fb34da6bd9a
......@@ -456,7 +456,7 @@ task Talon {
String configFileName = basename(configFile)
String SAMfileName = basename(SAMfile)
Int cores = 1
Int cores = 4
String memory = "20G"
String dockerImage = "biocontainers/talon:v4.4_cv1"
}
......
......@@ -38,8 +38,8 @@ task GetSJsFromGtf {
get_SJs_from_gtf \
~{"--f=" + GTFfile} \
~{"--g=" + genomeFile} \
~{"--o=" + outputPrefix + ".tsv"} \
~{"--minIntronSize=" + minIntronSize}
~{"--minIntronSize=" + minIntronSize} \
~{"--o=" + outputPrefix + ".tsv"}
}
output {
......@@ -53,12 +53,26 @@ task GetSJsFromGtf {
}
parameter_meta {
GTFfile: "Input GTF file"
genomeFile: "Reference genome"
outputPrefix: "Output directory path + output file prefix."
minIntronSize: "Minimum size of intron to consider a junction."
outputSJsFile: "Extracted splice junctions."
GTFfile: {
description: "Input GTF file",
category: "required"
}
genomeFile: {
description: "Reference genome",
category: "required"
}
minIntronSize: {
description: "Minimum size of intron to consider a junction.",
category: "advanced"
}
outputPrefix: {
description: "Output directory path + output file prefix.",
category: "required"
}
outputSJsFile: {
description: "Extracted splice junctions.",
category: "required"
}
}
}
......@@ -91,10 +105,18 @@ task GetTranscriptCleanStats {
}
parameter_meta {
transcriptCleanSAMfile: "Output SAM file from TranscriptClean"
outputPrefix: "Output directory path + output file prefix."
outputStatsFile: "Summary stats from TranscriptClean run."
transcriptCleanSAMfile: {
description: "Output SAM file from TranscriptClean",
category: "required"
}
outputPrefix: {
description: "Output directory path + output file prefix.",
category: "required"
}
outputStatsFile: {
description: "Summary stats from TranscriptClean run.",
category: "required"
}
}
}
......@@ -102,9 +124,9 @@ task TranscriptClean {
input {
File SAMfile
File referenceGenome
String outputPrefix
Int maxLenIndel = 5
Int maxSJoffset = 5
String outputPrefix
Boolean correctMismatches = true
Boolean correctIndels = true
Boolean correctSJs = true
......@@ -112,6 +134,7 @@ task TranscriptClean {
Boolean primaryOnly = false
Boolean canonOnly = false
Int bufferSize = 100
Boolean deleteTmp = true
File? spliceJunctionAnnotation
File? variantFile
......@@ -127,11 +150,10 @@ task TranscriptClean {
TranscriptClean \
~{"-s " + SAMfile} \
~{"-g " + referenceGenome} \
~{"-o " + outputPrefix} \
~{"-j " + spliceJunctionAnnotation} \
~{"-v " + variantFile} \
~{"-t " + cores} \
~{"--maxLenIndel=" + maxLenIndel} \
~{"--maxSJOffset=" + maxSJoffset} \
~{"-o " + outputPrefix} \
~{true="-m true" false="-m false" correctMismatches} \
~{true="-i true" false="-i false" correctIndels} \
~{true="--correctSJs=true" false="--correctSJs=false" correctSJs} \
......@@ -139,7 +161,9 @@ task TranscriptClean {
~{true="--primaryOnly" false="" primaryOnly} \
~{true="--canonOnly" false="" canonOnly} \
~{"--bufferSize=" + bufferSize} \
~{"-t " + cores}
~{true="--deleteTmp" false="" deleteTmp} \
~{"-j " + spliceJunctionAnnotation} \
~{"-v " + variantFile}
}
output {
......@@ -156,24 +180,81 @@ task TranscriptClean {
}
parameter_meta {
SAMfile: "Input SAM file containing transcripts to correct."
referenceGenome: "Reference genome fasta file."
outputPrefix: "Output directory path + output file prefix."
spliceJunctionAnnotation: "Splice junction file."
variantFile: "VCF formatted file of variants."
maxLenIndel: "Maximum size indel to correct."
maxSJoffset: "Maximum distance from annotated splice junction to correct."
correctMismatches: "Set this to make TranscriptClean correct mismatches."
correctIndels: "Set this to make TranscriptClean correct indels."
correctSJs: "Set this to make TranscriptClean correct splice junctions."
dryRun: "TranscriptClean will read in the data but don't do any correction."
primaryOnly: "TranscriptClean will only output primary mappings of transcripts."
canonOnly: "TranscriptClean will output only canonical transcripts and transcript containing annotated noncanonical junctions."
bufferSize: "Number of lines to output to file at once by each thread during run."
outputTranscriptCleanFasta: "Fasta file containing corrected reads."
outputTranscriptCleanLog: "Log file of TranscriptClean run."
outputTranscriptCleanSAM: "SAM file containing corrected aligned reads."
outputTranscriptCleanTElog: "TE log file of TranscriptClean run."
SAMfile: {
description: "Input SAM file containing transcripts to correct.",
category: "required"
}
referenceGenome: {
description: "Reference genome fasta file.",
category: "required"
}
maxLenIndel: {
description: "Maximum size indel to correct.",
category: "advanced"
}
maxSJoffset: {
description: "Maximum distance from annotated splice junction to correct.",
category: "advanced"
}
outputPrefix: {
description: "Output directory path + output file prefix.",
category: "required"
}
correctMismatches: {
description: "Set this to make TranscriptClean correct mismatches.",
category: "common"
}
correctIndels: {
description: "Set this to make TranscriptClean correct indels.",
category: "common"
}
correctSJs: {
description: "Set this to make TranscriptClean correct splice junctions.",
category: "common"
}
dryRun: {
description: "TranscriptClean will read in the data but don't do any correction.",
category: "advanced"
}
primaryOnly: {
description: "Only output primary mappings of transcripts.",
category: "advanced"
}
canonOnly: {
description: "Only output canonical transcripts and transcript containing annotated noncanonical junctions.",
category: "advanced"
}
bufferSize: {
description: "Number of lines to output to file at once by each thread during run.",
category: "common"
}
deleteTmp: {
description: "The temporary directory generated by TranscriptClean will be removed.",
category: "common"
}
spliceJunctionAnnotation: {
description: "Splice junction file.",
category: "common"
}
variantFile: {
description: "VCF formatted file of variants.",
category: "common"
}
outputTranscriptCleanFasta: {
description: "Fasta file containing corrected reads.",
category: "required"
}
outputTranscriptCleanLog: {
description: "Log file of TranscriptClean run.",
category: "required"
}
outputTranscriptCleanSAM: {
description: "SAM file containing corrected aligned reads.",
category: "required"
}
outputTranscriptCleanTElog: {
description: "TE log file of TranscriptClean run.",
category: "required"
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment