Update parameter_meta sections for Minimap2 and TranscriptClean.

34b221f2 · JB · 08f5b977 · 34b221f2 · 34b221f2 · 34b221f2
Commit 34b221f2 authored 5 years ago by JB
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,8 @@ that users understand how the changes affect the new version.

 version 2.1.0-dev
 ---------------------------
+ Updated parameter_meta sections for Minimap2 and TranscriptClean.
+ Updated cores variable for TALON.
 + Updated TALON to version 4.4
 + Added parameter_meta sections to the following tools:
    + htseq

--- a/minimap2.wdl
+++ b/minimap2.wdl
@@ -22,11 +22,11 @@ version 1.0

 task Indexing {
    input {
-        File referenceFile
-        String outputPrefix
        Boolean useHomopolymerCompressedKmer = false
        Int kmerSize = 15
        Int minimizerWindowSize = 10
+        String outputPrefix
+        File referenceFile

        Int? splitIndex

@@ -42,9 +42,9 @@ task Indexing {
        ~{true="-H" false="" useHomopolymerCompressedKmer} \
        ~{"-k " + kmerSize} \
        ~{"-w " + minimizerWindowSize} \
-        ~{"-I " + splitIndex} \
        ~{"-d " + outputPrefix + ".mmi"} \
        ~{"-t " + cores} \
+        ~{"-I " + splitIndex} \
        ~{referenceFile}
    }

@@ -59,35 +59,55 @@ task Indexing {
    }

    parameter_meta {
-        referenceFile: "Reference fasta file."
-        outputPrefix: "Output directory path + output file prefix."
-        useHomopolymerCompressedKmer: "Use homopolymer-compressed k-mer (preferrable for PacBio)."
-        kmerSize: "K-mer size (no larger than 28)."
-        minimizerWindowSize: "Minimizer window size."
-        splitIndex: "Split index for every ~NUM input bases."
-
-        outputIndexFile: "Indexed reference file."
+        useHomopolymerCompressedKmer: {
+            description: "Use homopolymer-compressed k-mer (preferrable for PacBio).",
+            category: "advanced"
+        }
+        kmerSize: {
+            description: "K-mer size (no larger than 28).",
+            category: "advanced"
+        }
+        minimizerWindowSize: {
+            description: "Minimizer window size.",
+            category: "advanced"
+        }
+        outputPrefix: {
+            description: "Output directory path + output file prefix.",
+            category: "required"
+        }
+        referenceFile: {
+            description: "Reference fasta file.",
+            category: "required"
+        }
+        splitIndex: {
+            description: "Split index for every ~NUM input bases."
+            category: "advanced"
+        }
+        outputIndexFile: {
+            description: "Indexed reference file."
+            category: "required"
+        }
    }
 }

 task Mapping {
    input {
-        File queryFile
-        File referenceFile
-        String outputPrefix
        String presetOption
-        Boolean outputSAM = false
        Int kmerSize = 15
+        Boolean outputSAM = false
+        String outputPrefix
+        File referenceFile
+        File queryFile

-        Int? maxFragmentLength
        Int? maxIntronLength
+        Int? maxFragmentLength
        Boolean? skipSelfAndDualMappings
        Int? retainMaxSecondaryAlignments
        Int? matchingScore
        Int? mismatchPenalty
        String? howToFindGTAG
-        Boolean? secondaryAlignment
        Boolean? addMDtagToSAM
+        Boolean? secondaryAlignment

        Int cores = 4
        String memory = "30G"
@@ -99,19 +119,19 @@ task Mapping {
        mkdir -p $(dirname ~{outputPrefix})
        minimap2 \
        ~{"-x " + presetOption} \
+        ~{"-k " + kmerSize} \
        ~{true="-a" false="" outputSAM} \
+        ~{"-o " + outputPrefix} \
+        ~{"-t " + cores} \
        ~{"-G " + maxIntronLength} \
        ~{"-F " + maxFragmentLength} \
-        ~{"-k " + kmerSize} \
        ~{true="-X" false="" skipSelfAndDualMappings} \
        ~{"-N " + retainMaxSecondaryAlignments} \
        ~{"-A " + matchingScore} \
        ~{"-B " + mismatchPenalty} \
        ~{"-u " + howToFindGTAG} \
-        --secondary=~{true="yes" false="no" secondaryAlignment} \
        ~{true="--MD" false="" addMDtagToSAM} \
-        ~{"-o " + outputPrefix} \
-        ~{"-t " + cores} \
+        --secondary=~{true="yes" false="no" secondaryAlignment} \
        ~{referenceFile} \
        ~{queryFile}
    }
@@ -127,22 +147,69 @@ task Mapping {
    }

    parameter_meta {
-        queryFile: "Input fasta file."
-        referenceFile: "Reference fasta file."
-        outputPrefix: "Output directory path + output file prefix."
-        presetOption: "This option applies multiple options at the same time."
-        outputSAM: "Output in the SAM format."
-        maxFragmentLength: "Max fragment length (effective with -xsr or in the fragment mode)."
-        maxIntronLength: "Max intron length (effective with -xsplice; changing -r)."
-        kmerSize: "K-mer size (no larger than 28)."
-        skipSelfAndDualMappings: "Skip self and dual mappings (for the all-vs-all mode)."
-        retainMaxSecondaryAlignments: "Retain at most INT secondary alignments."
-        matchingScore: "Matching score."
-        mismatchPenalty: "Mismatch penalty."
-        howToFindGTAG: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG."
-        secondaryAlignment: "Whether to output secondary alignments."
-        addMDtagToSAM: "Adds a MD tag to the SAM output file."
-
-        outputAlignmentFile: "Mapping and alignment between collections of DNA sequences file."
+        presetOption: {
+            description: "This option applies multiple options at the same time.",
+            category: "common"
+        }
+        kmerSize: {
+            description: "K-mer size (no larger than 28).",
+            category: "advanced"
+        }
+        outputSAM: {
+            description: "Output in the SAM format.",
+            category: "common"
+        }
+        outputPrefix: {
+            description: "Output directory path + output file prefix.",
+            category: "required"
+        }
+        maxIntronLength: {
+            description: "Max intron length (effective with -xsplice; changing -r).",
+            category: "advanced"
+        }
+        maxFragmentLength: {
+            description: "Max fragment length (effective with -xsr or in the fragment mode).",
+            category: "advanced"
+        }
+        skipSelfAndDualMappings: {
+            description: "Skip self and dual mappings (for the all-vs-all mode).",
+            category: "advanced"
+        }
+        retainMaxSecondaryAlignments: {
+            description: "Retain at most INT secondary alignments.",
+            category: "advanced"
+        }
+        matchingScore: {
+            description: "Matching score.",
+            category: "advanced"
+        }
+        mismatchPenalty: {
+            description: "Mismatch penalty.",
+            category: "advanced"
+        }
+        howToFindGTAG: {
+            description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.",
+            category: "common"
+        }
+        addMDtagToSAM: {
+            description: "Adds a MD tag to the SAM output file.",
+            category: "common"
+        }
+        secondaryAlignment: {
+            description: "Whether to output secondary alignments.",
+            category: "advanced"
+        }
+        referenceFile: {
+            description: "Reference fasta file.",
+            category: "required"
+        }
+        queryFile: {
+            description: "Input fasta file.",
+            category: "required"
+        }
+        outputAlignmentFile: {
+            description: "Mapping and alignment between collections of DNA sequences file.",
+            category: "required"
+        }
    }
 }
--- a/transcriptclean.wdl
+++ b/transcriptclean.wdl
@@ -38,8 +38,8 @@ task GetSJsFromGtf {
        get_SJs_from_gtf \
        ~{"--f=" + GTFfile} \
        ~{"--g=" + genomeFile} \
-        ~{"--o=" + outputPrefix + ".tsv"} \
-        ~{"--minIntronSize=" + minIntronSize}
+        ~{"--minIntronSize=" + minIntronSize} \
+        ~{"--o=" + outputPrefix + ".tsv"}
    }

    output {
@@ -53,12 +53,26 @@ task GetSJsFromGtf {
    }

    parameter_meta {
-        GTFfile: "Input GTF file"
-        genomeFile: "Reference genome"
-        outputPrefix: "Output directory path + output file prefix."
-        minIntronSize: "Minimum size of intron to consider a junction."
-
-        outputSJsFile: "Extracted splice junctions."
+        GTFfile: {
+            description: "Input GTF file",
+            category: "required"
+        }
+        genomeFile: {
+            description: "Reference genome",
+            category: "required"
+        }
+        minIntronSize: {
+            description: "Minimum size of intron to consider a junction.",
+            category: "advanced"
+        }
+        outputPrefix: {
+            description: "Output directory path + output file prefix.",
+            category: "required"
+        }
+        outputSJsFile: {
+            description: "Extracted splice junctions.",
+            category: "required"
+        }
    }
 }

@@ -91,10 +105,18 @@ task GetTranscriptCleanStats {
    }

    parameter_meta {
-        transcriptCleanSAMfile: "Output SAM file from TranscriptClean"
-        outputPrefix: "Output directory path + output file prefix."
-
-        outputStatsFile: "Summary stats from TranscriptClean run."
+        transcriptCleanSAMfile: {
+            description: "Output SAM file from TranscriptClean",
+            category: "required"
+        }
+        outputPrefix: {
+            description: "Output directory path + output file prefix.",
+            category: "required"
+        }
+        outputStatsFile: {
+            description: "Summary stats from TranscriptClean run."
+            category: "required"
+        }
    }
 }

@@ -102,9 +124,9 @@ task TranscriptClean {
    input {
        File SAMfile
        File referenceGenome
-        String outputPrefix
        Int maxLenIndel = 5
        Int maxSJoffset = 5
+        String outputPrefix
        Boolean correctMismatches = true
        Boolean correctIndels = true
        Boolean correctSJs = true
@@ -112,6 +134,7 @@ task TranscriptClean {
        Boolean primaryOnly = false
        Boolean canonOnly = false
        Int bufferSize = 100
+        Boolean deleteTmp = true

        File? spliceJunctionAnnotation
        File? variantFile
@@ -127,11 +150,10 @@ task TranscriptClean {
        TranscriptClean \
        ~{"-s " + SAMfile} \
        ~{"-g " + referenceGenome} \
-        ~{"-o " + outputPrefix} \
-        ~{"-j " + spliceJunctionAnnotation} \
-        ~{"-v " + variantFile} \
+        ~{"-t " + cores} \
        ~{"--maxLenIndel=" + maxLenIndel} \
        ~{"--maxSJOffset=" + maxSJoffset} \
+        ~{"-o " + outputPrefix} \
        ~{true="-m true" false="-m false" correctMismatches} \
        ~{true="-i true" false="-i false" correctIndels} \
        ~{true="--correctSJs=true" false="--correctSJs=false" correctSJs} \
@@ -139,7 +161,9 @@ task TranscriptClean {
        ~{true="--primaryOnly" false="" primaryOnly} \
        ~{true="--canonOnly" false="" canonOnly} \
        ~{"--bufferSize=" + bufferSize} \
-        ~{"-t " + cores}
+        ~{true="--deleteTmp" false="" deleteTmp} \
+        ~{"-j " + spliceJunctionAnnotation} \
+        ~{"-v " + variantFile}
    }

    output {
@@ -156,24 +180,81 @@ task TranscriptClean {
    }

    parameter_meta {
-        SAMfile: "Input SAM file containing transcripts to correct."
-        referenceGenome: "Reference genome fasta file."
-        outputPrefix: "Output directory path + output file prefix."
-        spliceJunctionAnnotation: "Splice junction file."
-        variantFile: "VCF formatted file of variants."
-        maxLenIndel: "Maximum size indel to correct."
-        maxSJoffset: "Maximum distance from annotated splice junction to correct."
-        correctMismatches: "Set this to make TranscriptClean correct mismatches."
-        correctIndels: "Set this to make TranscriptClean correct indels."
-        correctSJs: "Set this to make TranscriptClean correct splice junctions."
-        dryRun: "TranscriptClean will read in the data but don't do any correction."
-        primaryOnly: "TranscriptClean will only output primary mappings of transcripts."
-        canonOnly: "TranscriptClean will output only canonical transcripts and transcript containing annotated noncanonical junctions."
-        bufferSize: "Number of lines to output to file at once by each thread during run."
-
-        outputTranscriptCleanFasta: "Fasta file containing corrected reads."
-        outputTranscriptCleanLog: "Log file of TranscriptClean run."
-        outputTranscriptCleanSAM: "SAM file containing corrected aligned reads."
-        outputTranscriptCleanTElog: "TE log file of TranscriptClean run."
+        SAMfile: {
+            description: "Input SAM file containing transcripts to correct.",
+            category: "required"
+        }
+        referenceGenome: {
+            description: "Reference genome fasta file.",
+            category: "required"
+        }
+        maxLenIndel: {
+            description: "Maximum size indel to correct.",
+            category: "advanced"
+        }
+        maxSJoffset: {
+            description: "Maximum distance from annotated splice junction to correct.",
+            category: "advanced"
+        }
+        outputPrefix: {
+            description: "Output directory path + output file prefix.",
+            category: "required"
+        }
+        correctMismatches: {
+            description: "Set this to make TranscriptClean correct mismatches.",
+            category: "common"
+        }
+        correctIndels: {
+            description: "Set this to make TranscriptClean correct indels.",
+            category: "common"
+        }
+        correctSJs: {
+            description: "Set this to make TranscriptClean correct splice junctions.",
+            category: "common"
+        }
+        dryRun: {
+            description: "TranscriptClean will read in the data but don't do any correction.",
+            category: "advanced"
+        }
+        primaryOnly: {
+            description: "Only output primary mappings of transcripts.",
+            category: "advanced"
+        }
+        canonOnly: {
+            description: "Only output canonical transcripts and transcript containing annotated noncanonical junctions.",
+            category: "advanced"
+        }
+        bufferSize: {
+            description: "Number of lines to output to file at once by each thread during run.",
+            category: "common"
+        }
+        deleteTmp: {
+            description: "The temporary directory generated by TranscriptClean will be removed.",
+            category: "common"
+        }
+        spliceJunctionAnnotation: {
+            description: "Splice junction file.",
+            category: "common"
+        }
+        variantFile: {
+            description: "VCF formatted file of variants.",
+            category: "common"
+        }
+        outputTranscriptCleanFasta: {
+            description: "Fasta file containing corrected reads.",
+            category: "required"
+        }
+        outputTranscriptCleanLog: {
+            description: "Log file of TranscriptClean run.",
+            category: "required"
+        }
+        outputTranscriptCleanSAM: {
+            description: "SAM file containing corrected aligned reads.",
+            category: "required"
+        }
+        outputTranscriptCleanTElog: {
+            description: "TE log file of TranscriptClean run.",
+            category: "required"
+        }
   }
 }