diff --git a/CHANGELOG.md b/CHANGELOG.md index 394eb61f28e05b68cf2bd3b94db21b904b5e4f84..13573295bd794e798758dba326d8b8556b73f010 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 2.1.0-dev --------------------------- ++ Updated parameter_meta sections for Minimap2 and TranscriptClean. ++ Updated cores variable for TALON. + Updated TALON to version 4.4 + Added parameter_meta sections to the following tools: + htseq diff --git a/minimap2.wdl b/minimap2.wdl index 6ff8cf3eaca431098e6234d6bbbd4f7ae9d0670c..f63817c827f5eb52b7cc8d776f4ae09ffd95923d 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -22,11 +22,11 @@ version 1.0 task Indexing { input { - File referenceFile - String outputPrefix Boolean useHomopolymerCompressedKmer = false Int kmerSize = 15 Int minimizerWindowSize = 10 + String outputPrefix + File referenceFile Int? splitIndex @@ -42,9 +42,9 @@ task Indexing { ~{true="-H" false="" useHomopolymerCompressedKmer} \ ~{"-k " + kmerSize} \ ~{"-w " + minimizerWindowSize} \ - ~{"-I " + splitIndex} \ ~{"-d " + outputPrefix + ".mmi"} \ ~{"-t " + cores} \ + ~{"-I " + splitIndex} \ ~{referenceFile} } @@ -59,35 +59,55 @@ task Indexing { } parameter_meta { - referenceFile: "Reference fasta file." - outputPrefix: "Output directory path + output file prefix." - useHomopolymerCompressedKmer: "Use homopolymer-compressed k-mer (preferrable for PacBio)." - kmerSize: "K-mer size (no larger than 28)." - minimizerWindowSize: "Minimizer window size." - splitIndex: "Split index for every ~NUM input bases." - - outputIndexFile: "Indexed reference file." + useHomopolymerCompressedKmer: { + description: "Use homopolymer-compressed k-mer (preferrable for PacBio).", + category: "advanced" + } + kmerSize: { + description: "K-mer size (no larger than 28).", + category: "advanced" + } + minimizerWindowSize: { + description: "Minimizer window size.", + category: "advanced" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + referenceFile: { + description: "Reference fasta file.", + category: "required" + } + splitIndex: { + description: "Split index for every ~NUM input bases." + category: "advanced" + } + outputIndexFile: { + description: "Indexed reference file." + category: "required" + } } } task Mapping { input { - File queryFile - File referenceFile - String outputPrefix String presetOption - Boolean outputSAM = false Int kmerSize = 15 + Boolean outputSAM = false + String outputPrefix + File referenceFile + File queryFile - Int? maxFragmentLength Int? maxIntronLength + Int? maxFragmentLength Boolean? skipSelfAndDualMappings Int? retainMaxSecondaryAlignments Int? matchingScore Int? mismatchPenalty String? howToFindGTAG - Boolean? secondaryAlignment Boolean? addMDtagToSAM + Boolean? secondaryAlignment Int cores = 4 String memory = "30G" @@ -99,19 +119,19 @@ task Mapping { mkdir -p $(dirname ~{outputPrefix}) minimap2 \ ~{"-x " + presetOption} \ + ~{"-k " + kmerSize} \ ~{true="-a" false="" outputSAM} \ + ~{"-o " + outputPrefix} \ + ~{"-t " + cores} \ ~{"-G " + maxIntronLength} \ ~{"-F " + maxFragmentLength} \ - ~{"-k " + kmerSize} \ ~{true="-X" false="" skipSelfAndDualMappings} \ ~{"-N " + retainMaxSecondaryAlignments} \ ~{"-A " + matchingScore} \ ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ - --secondary=~{true="yes" false="no" secondaryAlignment} \ ~{true="--MD" false="" addMDtagToSAM} \ - ~{"-o " + outputPrefix} \ - ~{"-t " + cores} \ + --secondary=~{true="yes" false="no" secondaryAlignment} \ ~{referenceFile} \ ~{queryFile} } @@ -127,22 +147,69 @@ task Mapping { } parameter_meta { - queryFile: "Input fasta file." - referenceFile: "Reference fasta file." - outputPrefix: "Output directory path + output file prefix." - presetOption: "This option applies multiple options at the same time." - outputSAM: "Output in the SAM format." - maxFragmentLength: "Max fragment length (effective with -xsr or in the fragment mode)." - maxIntronLength: "Max intron length (effective with -xsplice; changing -r)." - kmerSize: "K-mer size (no larger than 28)." - skipSelfAndDualMappings: "Skip self and dual mappings (for the all-vs-all mode)." - retainMaxSecondaryAlignments: "Retain at most INT secondary alignments." - matchingScore: "Matching score." - mismatchPenalty: "Mismatch penalty." - howToFindGTAG: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG." - secondaryAlignment: "Whether to output secondary alignments." - addMDtagToSAM: "Adds a MD tag to the SAM output file." - - outputAlignmentFile: "Mapping and alignment between collections of DNA sequences file." + presetOption: { + description: "This option applies multiple options at the same time.", + category: "common" + } + kmerSize: { + description: "K-mer size (no larger than 28).", + category: "advanced" + } + outputSAM: { + description: "Output in the SAM format.", + category: "common" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + maxIntronLength: { + description: "Max intron length (effective with -xsplice; changing -r).", + category: "advanced" + } + maxFragmentLength: { + description: "Max fragment length (effective with -xsr or in the fragment mode).", + category: "advanced" + } + skipSelfAndDualMappings: { + description: "Skip self and dual mappings (for the all-vs-all mode).", + category: "advanced" + } + retainMaxSecondaryAlignments: { + description: "Retain at most INT secondary alignments.", + category: "advanced" + } + matchingScore: { + description: "Matching score.", + category: "advanced" + } + mismatchPenalty: { + description: "Mismatch penalty.", + category: "advanced" + } + howToFindGTAG: { + description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", + category: "common" + } + addMDtagToSAM: { + description: "Adds a MD tag to the SAM output file.", + category: "common" + } + secondaryAlignment: { + description: "Whether to output secondary alignments.", + category: "advanced" + } + referenceFile: { + description: "Reference fasta file.", + category: "required" + } + queryFile: { + description: "Input fasta file.", + category: "required" + } + outputAlignmentFile: { + description: "Mapping and alignment between collections of DNA sequences file.", + category: "required" + } } } diff --git a/transcriptclean.wdl b/transcriptclean.wdl index f0053b25e9cb73f780180ca9a0090a4845bd9634..de2c0cf5e4fa34cfb8d1be35786a7a5eeb661d6c 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -38,8 +38,8 @@ task GetSJsFromGtf { get_SJs_from_gtf \ ~{"--f=" + GTFfile} \ ~{"--g=" + genomeFile} \ - ~{"--o=" + outputPrefix + ".tsv"} \ - ~{"--minIntronSize=" + minIntronSize} + ~{"--minIntronSize=" + minIntronSize} \ + ~{"--o=" + outputPrefix + ".tsv"} } output { @@ -53,12 +53,26 @@ task GetSJsFromGtf { } parameter_meta { - GTFfile: "Input GTF file" - genomeFile: "Reference genome" - outputPrefix: "Output directory path + output file prefix." - minIntronSize: "Minimum size of intron to consider a junction." - - outputSJsFile: "Extracted splice junctions." + GTFfile: { + description: "Input GTF file", + category: "required" + } + genomeFile: { + description: "Reference genome", + category: "required" + } + minIntronSize: { + description: "Minimum size of intron to consider a junction.", + category: "advanced" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + outputSJsFile: { + description: "Extracted splice junctions.", + category: "required" + } } } @@ -91,10 +105,18 @@ task GetTranscriptCleanStats { } parameter_meta { - transcriptCleanSAMfile: "Output SAM file from TranscriptClean" - outputPrefix: "Output directory path + output file prefix." - - outputStatsFile: "Summary stats from TranscriptClean run." + transcriptCleanSAMfile: { + description: "Output SAM file from TranscriptClean", + category: "required" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + outputStatsFile: { + description: "Summary stats from TranscriptClean run." + category: "required" + } } } @@ -102,9 +124,9 @@ task TranscriptClean { input { File SAMfile File referenceGenome - String outputPrefix Int maxLenIndel = 5 Int maxSJoffset = 5 + String outputPrefix Boolean correctMismatches = true Boolean correctIndels = true Boolean correctSJs = true @@ -112,6 +134,7 @@ task TranscriptClean { Boolean primaryOnly = false Boolean canonOnly = false Int bufferSize = 100 + Boolean deleteTmp = true File? spliceJunctionAnnotation File? variantFile @@ -127,11 +150,10 @@ task TranscriptClean { TranscriptClean \ ~{"-s " + SAMfile} \ ~{"-g " + referenceGenome} \ - ~{"-o " + outputPrefix} \ - ~{"-j " + spliceJunctionAnnotation} \ - ~{"-v " + variantFile} \ + ~{"-t " + cores} \ ~{"--maxLenIndel=" + maxLenIndel} \ ~{"--maxSJOffset=" + maxSJoffset} \ + ~{"-o " + outputPrefix} \ ~{true="-m true" false="-m false" correctMismatches} \ ~{true="-i true" false="-i false" correctIndels} \ ~{true="--correctSJs=true" false="--correctSJs=false" correctSJs} \ @@ -139,7 +161,9 @@ task TranscriptClean { ~{true="--primaryOnly" false="" primaryOnly} \ ~{true="--canonOnly" false="" canonOnly} \ ~{"--bufferSize=" + bufferSize} \ - ~{"-t " + cores} + ~{true="--deleteTmp" false="" deleteTmp} \ + ~{"-j " + spliceJunctionAnnotation} \ + ~{"-v " + variantFile} } output { @@ -156,24 +180,81 @@ task TranscriptClean { } parameter_meta { - SAMfile: "Input SAM file containing transcripts to correct." - referenceGenome: "Reference genome fasta file." - outputPrefix: "Output directory path + output file prefix." - spliceJunctionAnnotation: "Splice junction file." - variantFile: "VCF formatted file of variants." - maxLenIndel: "Maximum size indel to correct." - maxSJoffset: "Maximum distance from annotated splice junction to correct." - correctMismatches: "Set this to make TranscriptClean correct mismatches." - correctIndels: "Set this to make TranscriptClean correct indels." - correctSJs: "Set this to make TranscriptClean correct splice junctions." - dryRun: "TranscriptClean will read in the data but don't do any correction." - primaryOnly: "TranscriptClean will only output primary mappings of transcripts." - canonOnly: "TranscriptClean will output only canonical transcripts and transcript containing annotated noncanonical junctions." - bufferSize: "Number of lines to output to file at once by each thread during run." - - outputTranscriptCleanFasta: "Fasta file containing corrected reads." - outputTranscriptCleanLog: "Log file of TranscriptClean run." - outputTranscriptCleanSAM: "SAM file containing corrected aligned reads." - outputTranscriptCleanTElog: "TE log file of TranscriptClean run." + SAMfile: { + description: "Input SAM file containing transcripts to correct.", + category: "required" + } + referenceGenome: { + description: "Reference genome fasta file.", + category: "required" + } + maxLenIndel: { + description: "Maximum size indel to correct.", + category: "advanced" + } + maxSJoffset: { + description: "Maximum distance from annotated splice junction to correct.", + category: "advanced" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + correctMismatches: { + description: "Set this to make TranscriptClean correct mismatches.", + category: "common" + } + correctIndels: { + description: "Set this to make TranscriptClean correct indels.", + category: "common" + } + correctSJs: { + description: "Set this to make TranscriptClean correct splice junctions.", + category: "common" + } + dryRun: { + description: "TranscriptClean will read in the data but don't do any correction.", + category: "advanced" + } + primaryOnly: { + description: "Only output primary mappings of transcripts.", + category: "advanced" + } + canonOnly: { + description: "Only output canonical transcripts and transcript containing annotated noncanonical junctions.", + category: "advanced" + } + bufferSize: { + description: "Number of lines to output to file at once by each thread during run.", + category: "common" + } + deleteTmp: { + description: "The temporary directory generated by TranscriptClean will be removed.", + category: "common" + } + spliceJunctionAnnotation: { + description: "Splice junction file.", + category: "common" + } + variantFile: { + description: "VCF formatted file of variants.", + category: "common" + } + outputTranscriptCleanFasta: { + description: "Fasta file containing corrected reads.", + category: "required" + } + outputTranscriptCleanLog: { + description: "Log file of TranscriptClean run.", + category: "required" + } + outputTranscriptCleanSAM: { + description: "SAM file containing corrected aligned reads.", + category: "required" + } + outputTranscriptCleanTElog: { + description: "TE log file of TranscriptClean run.", + category: "required" + } } }