diff --git a/CHANGELOG.md b/CHANGELOG.md index 394eb61f28e05b68cf2bd3b94db21b904b5e4f84..7d5652fa9dd1846dfa6646d246fdcf9766bf95d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,53 +11,55 @@ that users understand how the changes affect the new version. version 2.1.0-dev --------------------------- -+ Updated TALON to version 4.4 ++ Updated parameter_meta sections for Minimap2 and TranscriptClean to wdl-aid format. ++ Updated cores variable for TALON, the default is now 4. ++ Updated TALON to version 4.4. + Added parameter_meta sections to the following tools: + htseq + cutadapt + collect-columns + stringtie + fastqc -+ Updated star default image to 2.7.3a ++ Updated star default image to 2.7.3a. + Hisat2 now indexes the resulting BAM file. -+ Samtools index now also works without setting a path for the output -+ Bugfix: Biowdl-input-converter now makes sure the output directory exists ++ Samtools index now also works without setting a path for the output. ++ Bugfix: Biowdl-input-converter now makes sure the output directory exists. version 2.0.0 --------------------------- -+ TranscriptClean: Update TranscriptClean to version 2.0.2 ++ TranscriptClean: Update TranscriptClean to version 2.0.2. + Memory runtime attributes are now Strings indicating total memory, as opposed to Ints indicating memory per core. + Memory inputs for most tasks are now Strings, remaining Int memory inputs are renamed to "memoryGb". + Use the biowdl-input-converter container for JsonToYaml, to reduce the amount of containers needed. + Add biowdl-input-converter and remove SampleConfigToSampleReadgroupLists which it replaces. -+ GATK.GenotypeGVCFs: Increased memoryMultiplier from 2.0 to 3.0 -+ Minimap2: Add -k option to minimap2 mapping -+ Added bwakit task -+ Minimap2: Add the option for --MD tag -+ TALON: Update average memory needs for main TALON process ++ GATK.GenotypeGVCFs: Increased memoryMultiplier from 2.0 to 3.0 . ++ Minimap2: Add -k option to minimap2 mapping. ++ Added bwakit task. ++ Minimap2: Add the option for --MD tag. ++ TALON: Update average memory needs for main TALON process. version 1.0.0 --------------------------- -+ Common: Add "SampleConfigToSampleReadgroupLists" task -+ MultiQC: the "interactive" input is now set to true by default ++ Common: Add "SampleConfigToSampleReadgroupLists" task. ++ MultiQC: the "interactive" input is now set to true by default. + Removed deprecated tasks: + bioconda.installPrefix + mergecounts.MergeCounts -+ GATK.BaseRecalibrator: "knownIndelsSitesVCFs" and "knownIndelsSitesVCFIndexes" are no longer optional, but now have a default of "[]" -+ Removed BWA index task -+ Removed unused "picardJar" input from bwa.wdl -+ All inputs to bedtools Sort are now reflected in the generated command -+ TranscriptClean: Update TranscriptClean container to version 1.0.8 -+ Removed "pipefail" from command sections TALON and TranscriptClean -+ Add WDL task for Minimap2 -+ Add WDL task for TALON -+ Add WDL task for TranscriptClean -+ Fastqsplitter: fix mkdir command to work with biocontainer's busybox mkdir -+ Cutadapt: simplify interface -+ Bigger memory multiplier in mutect to take in account bigger vmem usage -+ Cutadapt: Remove default adapter ++ GATK.BaseRecalibrator: "knownIndelsSitesVCFs" and "knownIndelsSitesVCFIndexes" are no longer optional, but now have a default of "[]". ++ Removed BWA index task. ++ Removed unused "picardJar" input from bwa.wdl. ++ All inputs to bedtools Sort are now reflected in the generated command. ++ TranscriptClean: Update TranscriptClean container to version 1.0.8. ++ Removed "pipefail" from command sections TALON and TranscriptClean. ++ Add WDL task for Minimap2. ++ Add WDL task for TALON. ++ Add WDL task for TranscriptClean. ++ Fastqsplitter: fix mkdir command to work with biocontainer's busybox mkdir. ++ Cutadapt: simplify interface. ++ Bigger memory multiplier in mutect to take in account bigger vmem usage. ++ Cutadapt: Remove default adapter. + Fastqsplitter: use version 1.1. -+ Picard: Use version 2.20.5 of the biocontainer as this includes the R dependency ++ Picard: Use version 2.20.5 of the biocontainer as this includes the R dependency. + Common: Update dockerTag to dockerImage. + GATK: Add CombineVariants task that allows, e.g., to merge VCFs from different callers. + Mutect2: Add GATK tasks related to variant filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, CalculateContamination and FilterMutectCalls). diff --git a/minimap2.wdl b/minimap2.wdl index 6ff8cf3eaca431098e6234d6bbbd4f7ae9d0670c..c29f3314124d5120dcf2587967415d0fad576194 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -22,11 +22,11 @@ version 1.0 task Indexing { input { - File referenceFile - String outputPrefix Boolean useHomopolymerCompressedKmer = false Int kmerSize = 15 Int minimizerWindowSize = 10 + String outputPrefix + File referenceFile Int? splitIndex @@ -42,9 +42,9 @@ task Indexing { ~{true="-H" false="" useHomopolymerCompressedKmer} \ ~{"-k " + kmerSize} \ ~{"-w " + minimizerWindowSize} \ - ~{"-I " + splitIndex} \ ~{"-d " + outputPrefix + ".mmi"} \ ~{"-t " + cores} \ + ~{"-I " + splitIndex} \ ~{referenceFile} } @@ -59,35 +59,55 @@ task Indexing { } parameter_meta { - referenceFile: "Reference fasta file." - outputPrefix: "Output directory path + output file prefix." - useHomopolymerCompressedKmer: "Use homopolymer-compressed k-mer (preferrable for PacBio)." - kmerSize: "K-mer size (no larger than 28)." - minimizerWindowSize: "Minimizer window size." - splitIndex: "Split index for every ~NUM input bases." - - outputIndexFile: "Indexed reference file." + useHomopolymerCompressedKmer: { + description: "Use homopolymer-compressed k-mer (preferrable for PacBio).", + category: "advanced" + } + kmerSize: { + description: "K-mer size (no larger than 28).", + category: "advanced" + } + minimizerWindowSize: { + description: "Minimizer window size.", + category: "advanced" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + referenceFile: { + description: "Reference fasta file.", + category: "required" + } + splitIndex: { + description: "Split index for every ~NUM input bases.", + category: "advanced" + } + outputIndexFile: { + description: "Indexed reference file.", + category: "required" + } } } task Mapping { input { - File queryFile - File referenceFile - String outputPrefix String presetOption - Boolean outputSAM = false Int kmerSize = 15 + Boolean skipSelfAndDualMappings = false + Boolean outputSAM = false + String outputPrefix + Boolean addMDtagToSAM = false + Boolean secondaryAlignment = false + File referenceFile + File queryFile - Int? maxFragmentLength Int? maxIntronLength - Boolean? skipSelfAndDualMappings + Int? maxFragmentLength Int? retainMaxSecondaryAlignments Int? matchingScore Int? mismatchPenalty String? howToFindGTAG - Boolean? secondaryAlignment - Boolean? addMDtagToSAM Int cores = 4 String memory = "30G" @@ -99,19 +119,19 @@ task Mapping { mkdir -p $(dirname ~{outputPrefix}) minimap2 \ ~{"-x " + presetOption} \ + ~{"-k " + kmerSize} \ + ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="-a" false="" outputSAM} \ + ~{"-o " + outputPrefix} \ + ~{true="--MD" false="" addMDtagToSAM} \ + --secondary=~{true="yes" false="no" secondaryAlignment} \ + ~{"-t " + cores} \ ~{"-G " + maxIntronLength} \ ~{"-F " + maxFragmentLength} \ - ~{"-k " + kmerSize} \ - ~{true="-X" false="" skipSelfAndDualMappings} \ ~{"-N " + retainMaxSecondaryAlignments} \ ~{"-A " + matchingScore} \ ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ - --secondary=~{true="yes" false="no" secondaryAlignment} \ - ~{true="--MD" false="" addMDtagToSAM} \ - ~{"-o " + outputPrefix} \ - ~{"-t " + cores} \ ~{referenceFile} \ ~{queryFile} } @@ -127,22 +147,69 @@ task Mapping { } parameter_meta { - queryFile: "Input fasta file." - referenceFile: "Reference fasta file." - outputPrefix: "Output directory path + output file prefix." - presetOption: "This option applies multiple options at the same time." - outputSAM: "Output in the SAM format." - maxFragmentLength: "Max fragment length (effective with -xsr or in the fragment mode)." - maxIntronLength: "Max intron length (effective with -xsplice; changing -r)." - kmerSize: "K-mer size (no larger than 28)." - skipSelfAndDualMappings: "Skip self and dual mappings (for the all-vs-all mode)." - retainMaxSecondaryAlignments: "Retain at most INT secondary alignments." - matchingScore: "Matching score." - mismatchPenalty: "Mismatch penalty." - howToFindGTAG: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG." - secondaryAlignment: "Whether to output secondary alignments." - addMDtagToSAM: "Adds a MD tag to the SAM output file." - - outputAlignmentFile: "Mapping and alignment between collections of DNA sequences file." + presetOption: { + description: "This option applies multiple options at the same time.", + category: "common" + } + kmerSize: { + description: "K-mer size (no larger than 28).", + category: "advanced" + } + outputSAM: { + description: "Output in the SAM format.", + category: "common" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + maxIntronLength: { + description: "Max intron length (effective with -xsplice; changing -r).", + category: "advanced" + } + maxFragmentLength: { + description: "Max fragment length (effective with -xsr or in the fragment mode).", + category: "advanced" + } + skipSelfAndDualMappings: { + description: "Skip self and dual mappings (for the all-vs-all mode).", + category: "advanced" + } + retainMaxSecondaryAlignments: { + description: "Retain at most INT secondary alignments.", + category: "advanced" + } + matchingScore: { + description: "Matching score.", + category: "advanced" + } + mismatchPenalty: { + description: "Mismatch penalty.", + category: "advanced" + } + howToFindGTAG: { + description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", + category: "common" + } + addMDtagToSAM: { + description: "Adds a MD tag to the SAM output file.", + category: "common" + } + secondaryAlignment: { + description: "Whether to output secondary alignments.", + category: "advanced" + } + referenceFile: { + description: "Reference fasta file.", + category: "required" + } + queryFile: { + description: "Input fasta file.", + category: "required" + } + outputAlignmentFile: { + description: "Mapping and alignment between collections of DNA sequences file.", + category: "required" + } } } diff --git a/scripts b/scripts index e00dc247dac8f4aa91a77d6d307f928cc8449527..6eaa313f172f3efb9e62f2140b8d7fb34da6bd9a 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit e00dc247dac8f4aa91a77d6d307f928cc8449527 +Subproject commit 6eaa313f172f3efb9e62f2140b8d7fb34da6bd9a diff --git a/talon.wdl b/talon.wdl index 5518ea51a484e4a22b38adee63d65a0076791226..9d3b5304fb5bc0bf665e289fce6aca86a7c85220 100644 --- a/talon.wdl +++ b/talon.wdl @@ -456,7 +456,7 @@ task Talon { String configFileName = basename(configFile) String SAMfileName = basename(SAMfile) - Int cores = 1 + Int cores = 4 String memory = "20G" String dockerImage = "biocontainers/talon:v4.4_cv1" } diff --git a/transcriptclean.wdl b/transcriptclean.wdl index f0053b25e9cb73f780180ca9a0090a4845bd9634..df187fd2a2a7313d1f718895e399de4762e0a1cc 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -38,8 +38,8 @@ task GetSJsFromGtf { get_SJs_from_gtf \ ~{"--f=" + GTFfile} \ ~{"--g=" + genomeFile} \ - ~{"--o=" + outputPrefix + ".tsv"} \ - ~{"--minIntronSize=" + minIntronSize} + ~{"--minIntronSize=" + minIntronSize} \ + ~{"--o=" + outputPrefix + ".tsv"} } output { @@ -53,12 +53,26 @@ task GetSJsFromGtf { } parameter_meta { - GTFfile: "Input GTF file" - genomeFile: "Reference genome" - outputPrefix: "Output directory path + output file prefix." - minIntronSize: "Minimum size of intron to consider a junction." - - outputSJsFile: "Extracted splice junctions." + GTFfile: { + description: "Input GTF file", + category: "required" + } + genomeFile: { + description: "Reference genome", + category: "required" + } + minIntronSize: { + description: "Minimum size of intron to consider a junction.", + category: "advanced" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + outputSJsFile: { + description: "Extracted splice junctions.", + category: "required" + } } } @@ -91,10 +105,18 @@ task GetTranscriptCleanStats { } parameter_meta { - transcriptCleanSAMfile: "Output SAM file from TranscriptClean" - outputPrefix: "Output directory path + output file prefix." - - outputStatsFile: "Summary stats from TranscriptClean run." + transcriptCleanSAMfile: { + description: "Output SAM file from TranscriptClean", + category: "required" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + outputStatsFile: { + description: "Summary stats from TranscriptClean run.", + category: "required" + } } } @@ -102,9 +124,9 @@ task TranscriptClean { input { File SAMfile File referenceGenome - String outputPrefix Int maxLenIndel = 5 Int maxSJoffset = 5 + String outputPrefix Boolean correctMismatches = true Boolean correctIndels = true Boolean correctSJs = true @@ -112,6 +134,7 @@ task TranscriptClean { Boolean primaryOnly = false Boolean canonOnly = false Int bufferSize = 100 + Boolean deleteTmp = true File? spliceJunctionAnnotation File? variantFile @@ -127,11 +150,10 @@ task TranscriptClean { TranscriptClean \ ~{"-s " + SAMfile} \ ~{"-g " + referenceGenome} \ - ~{"-o " + outputPrefix} \ - ~{"-j " + spliceJunctionAnnotation} \ - ~{"-v " + variantFile} \ + ~{"-t " + cores} \ ~{"--maxLenIndel=" + maxLenIndel} \ ~{"--maxSJOffset=" + maxSJoffset} \ + ~{"-o " + outputPrefix} \ ~{true="-m true" false="-m false" correctMismatches} \ ~{true="-i true" false="-i false" correctIndels} \ ~{true="--correctSJs=true" false="--correctSJs=false" correctSJs} \ @@ -139,7 +161,9 @@ task TranscriptClean { ~{true="--primaryOnly" false="" primaryOnly} \ ~{true="--canonOnly" false="" canonOnly} \ ~{"--bufferSize=" + bufferSize} \ - ~{"-t " + cores} + ~{true="--deleteTmp" false="" deleteTmp} \ + ~{"-j " + spliceJunctionAnnotation} \ + ~{"-v " + variantFile} } output { @@ -156,24 +180,81 @@ task TranscriptClean { } parameter_meta { - SAMfile: "Input SAM file containing transcripts to correct." - referenceGenome: "Reference genome fasta file." - outputPrefix: "Output directory path + output file prefix." - spliceJunctionAnnotation: "Splice junction file." - variantFile: "VCF formatted file of variants." - maxLenIndel: "Maximum size indel to correct." - maxSJoffset: "Maximum distance from annotated splice junction to correct." - correctMismatches: "Set this to make TranscriptClean correct mismatches." - correctIndels: "Set this to make TranscriptClean correct indels." - correctSJs: "Set this to make TranscriptClean correct splice junctions." - dryRun: "TranscriptClean will read in the data but don't do any correction." - primaryOnly: "TranscriptClean will only output primary mappings of transcripts." - canonOnly: "TranscriptClean will output only canonical transcripts and transcript containing annotated noncanonical junctions." - bufferSize: "Number of lines to output to file at once by each thread during run." - - outputTranscriptCleanFasta: "Fasta file containing corrected reads." - outputTranscriptCleanLog: "Log file of TranscriptClean run." - outputTranscriptCleanSAM: "SAM file containing corrected aligned reads." - outputTranscriptCleanTElog: "TE log file of TranscriptClean run." + SAMfile: { + description: "Input SAM file containing transcripts to correct.", + category: "required" + } + referenceGenome: { + description: "Reference genome fasta file.", + category: "required" + } + maxLenIndel: { + description: "Maximum size indel to correct.", + category: "advanced" + } + maxSJoffset: { + description: "Maximum distance from annotated splice junction to correct.", + category: "advanced" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + correctMismatches: { + description: "Set this to make TranscriptClean correct mismatches.", + category: "common" + } + correctIndels: { + description: "Set this to make TranscriptClean correct indels.", + category: "common" + } + correctSJs: { + description: "Set this to make TranscriptClean correct splice junctions.", + category: "common" + } + dryRun: { + description: "TranscriptClean will read in the data but don't do any correction.", + category: "advanced" + } + primaryOnly: { + description: "Only output primary mappings of transcripts.", + category: "advanced" + } + canonOnly: { + description: "Only output canonical transcripts and transcript containing annotated noncanonical junctions.", + category: "advanced" + } + bufferSize: { + description: "Number of lines to output to file at once by each thread during run.", + category: "common" + } + deleteTmp: { + description: "The temporary directory generated by TranscriptClean will be removed.", + category: "common" + } + spliceJunctionAnnotation: { + description: "Splice junction file.", + category: "common" + } + variantFile: { + description: "VCF formatted file of variants.", + category: "common" + } + outputTranscriptCleanFasta: { + description: "Fasta file containing corrected reads.", + category: "required" + } + outputTranscriptCleanLog: { + description: "Log file of TranscriptClean run.", + category: "required" + } + outputTranscriptCleanSAM: { + description: "SAM file containing corrected aligned reads.", + category: "required" + } + outputTranscriptCleanTElog: { + description: "TE log file of TranscriptClean run.", + category: "required" + } } }