Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tasks
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
biowdl
tasks
Commits
34b221f2
Commit
34b221f2
authored
5 years ago
by
JB
Browse files
Options
Downloads
Patches
Plain Diff
Update parameter_meta sections for Minimap2 and TranscriptClean.
parent
08f5b977
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
CHANGELOG.md
+2
-0
2 additions, 0 deletions
CHANGELOG.md
minimap2.wdl
+105
-38
105 additions, 38 deletions
minimap2.wdl
transcriptclean.wdl
+117
-36
117 additions, 36 deletions
transcriptclean.wdl
with
224 additions
and
74 deletions
CHANGELOG.md
+
2
−
0
View file @
34b221f2
...
...
@@ -11,6 +11,8 @@ that users understand how the changes affect the new version.
version 2.1.0-dev
---------------------------
+
Updated parameter_meta sections for Minimap2 and TranscriptClean.
+
Updated cores variable for TALON.
+
Updated TALON to version 4.4
+
Added parameter_meta sections to the following tools:
+
htseq
...
...
This diff is collapsed.
Click to expand it.
minimap2.wdl
+
105
−
38
View file @
34b221f2
...
...
@@ -22,11 +22,11 @@ version 1.0
task Indexing {
input {
File referenceFile
String outputPrefix
Boolean useHomopolymerCompressedKmer = false
Int kmerSize = 15
Int minimizerWindowSize = 10
String outputPrefix
File referenceFile
Int? splitIndex
...
...
@@ -42,9 +42,9 @@ task Indexing {
~{true="-H" false="" useHomopolymerCompressedKmer} \
~{"-k " + kmerSize} \
~{"-w " + minimizerWindowSize} \
~{"-I " + splitIndex} \
~{"-d " + outputPrefix + ".mmi"} \
~{"-t " + cores} \
~{"-I " + splitIndex} \
~{referenceFile}
}
...
...
@@ -59,35 +59,55 @@ task Indexing {
}
parameter_meta {
referenceFile: "Reference fasta file."
outputPrefix: "Output directory path + output file prefix."
useHomopolymerCompressedKmer: "Use homopolymer-compressed k-mer (preferrable for PacBio)."
kmerSize: "K-mer size (no larger than 28)."
minimizerWindowSize: "Minimizer window size."
splitIndex: "Split index for every ~NUM input bases."
outputIndexFile: "Indexed reference file."
useHomopolymerCompressedKmer: {
description: "Use homopolymer-compressed k-mer (preferrable for PacBio).",
category: "advanced"
}
kmerSize: {
description: "K-mer size (no larger than 28).",
category: "advanced"
}
minimizerWindowSize: {
description: "Minimizer window size.",
category: "advanced"
}
outputPrefix: {
description: "Output directory path + output file prefix.",
category: "required"
}
referenceFile: {
description: "Reference fasta file.",
category: "required"
}
splitIndex: {
description: "Split index for every ~NUM input bases."
category: "advanced"
}
outputIndexFile: {
description: "Indexed reference file."
category: "required"
}
}
}
task Mapping {
input {
File queryFile
File referenceFile
String outputPrefix
String presetOption
Boolean outputSAM = false
Int kmerSize = 15
Boolean outputSAM = false
String outputPrefix
File referenceFile
File queryFile
Int? maxFragmentLength
Int? maxIntronLength
Int? maxFragmentLength
Boolean? skipSelfAndDualMappings
Int? retainMaxSecondaryAlignments
Int? matchingScore
Int? mismatchPenalty
String? howToFindGTAG
Boolean? secondaryAlignment
Boolean? addMDtagToSAM
Boolean? secondaryAlignment
Int cores = 4
String memory = "30G"
...
...
@@ -99,19 +119,19 @@ task Mapping {
mkdir -p $(dirname ~{outputPrefix})
minimap2 \
~{"-x " + presetOption} \
~{"-k " + kmerSize} \
~{true="-a" false="" outputSAM} \
~{"-o " + outputPrefix} \
~{"-t " + cores} \
~{"-G " + maxIntronLength} \
~{"-F " + maxFragmentLength} \
~{"-k " + kmerSize} \
~{true="-X" false="" skipSelfAndDualMappings} \
~{"-N " + retainMaxSecondaryAlignments} \
~{"-A " + matchingScore} \
~{"-B " + mismatchPenalty} \
~{"-u " + howToFindGTAG} \
--secondary=~{true="yes" false="no" secondaryAlignment} \
~{true="--MD" false="" addMDtagToSAM} \
~{"-o " + outputPrefix} \
~{"-t " + cores} \
--secondary=~{true="yes" false="no" secondaryAlignment} \
~{referenceFile} \
~{queryFile}
}
...
...
@@ -127,22 +147,69 @@ task Mapping {
}
parameter_meta {
queryFile: "Input fasta file."
referenceFile: "Reference fasta file."
outputPrefix: "Output directory path + output file prefix."
presetOption: "This option applies multiple options at the same time."
outputSAM: "Output in the SAM format."
maxFragmentLength: "Max fragment length (effective with -xsr or in the fragment mode)."
maxIntronLength: "Max intron length (effective with -xsplice; changing -r)."
kmerSize: "K-mer size (no larger than 28)."
skipSelfAndDualMappings: "Skip self and dual mappings (for the all-vs-all mode)."
retainMaxSecondaryAlignments: "Retain at most INT secondary alignments."
matchingScore: "Matching score."
mismatchPenalty: "Mismatch penalty."
howToFindGTAG: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG."
secondaryAlignment: "Whether to output secondary alignments."
addMDtagToSAM: "Adds a MD tag to the SAM output file."
outputAlignmentFile: "Mapping and alignment between collections of DNA sequences file."
presetOption: {
description: "This option applies multiple options at the same time.",
category: "common"
}
kmerSize: {
description: "K-mer size (no larger than 28).",
category: "advanced"
}
outputSAM: {
description: "Output in the SAM format.",
category: "common"
}
outputPrefix: {
description: "Output directory path + output file prefix.",
category: "required"
}
maxIntronLength: {
description: "Max intron length (effective with -xsplice; changing -r).",
category: "advanced"
}
maxFragmentLength: {
description: "Max fragment length (effective with -xsr or in the fragment mode).",
category: "advanced"
}
skipSelfAndDualMappings: {
description: "Skip self and dual mappings (for the all-vs-all mode).",
category: "advanced"
}
retainMaxSecondaryAlignments: {
description: "Retain at most INT secondary alignments.",
category: "advanced"
}
matchingScore: {
description: "Matching score.",
category: "advanced"
}
mismatchPenalty: {
description: "Mismatch penalty.",
category: "advanced"
}
howToFindGTAG: {
description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.",
category: "common"
}
addMDtagToSAM: {
description: "Adds a MD tag to the SAM output file.",
category: "common"
}
secondaryAlignment: {
description: "Whether to output secondary alignments.",
category: "advanced"
}
referenceFile: {
description: "Reference fasta file.",
category: "required"
}
queryFile: {
description: "Input fasta file.",
category: "required"
}
outputAlignmentFile: {
description: "Mapping and alignment between collections of DNA sequences file.",
category: "required"
}
}
}
This diff is collapsed.
Click to expand it.
transcriptclean.wdl
+
117
−
36
View file @
34b221f2
...
...
@@ -38,8 +38,8 @@ task GetSJsFromGtf {
get_SJs_from_gtf \
~{"--f=" + GTFfile} \
~{"--g=" + genomeFile} \
~{"--
o=" + outputPrefix + ".tsv"
} \
~{"--
minIntronSize=" + minIntronSize
}
~{"--
minIntronSize=" + minIntronSize
} \
~{"--
o=" + outputPrefix + ".tsv"
}
}
output {
...
...
@@ -53,12 +53,26 @@ task GetSJsFromGtf {
}
parameter_meta {
GTFfile: "Input GTF file"
genomeFile: "Reference genome"
outputPrefix: "Output directory path + output file prefix."
minIntronSize: "Minimum size of intron to consider a junction."
outputSJsFile: "Extracted splice junctions."
GTFfile: {
description: "Input GTF file",
category: "required"
}
genomeFile: {
description: "Reference genome",
category: "required"
}
minIntronSize: {
description: "Minimum size of intron to consider a junction.",
category: "advanced"
}
outputPrefix: {
description: "Output directory path + output file prefix.",
category: "required"
}
outputSJsFile: {
description: "Extracted splice junctions.",
category: "required"
}
}
}
...
...
@@ -91,10 +105,18 @@ task GetTranscriptCleanStats {
}
parameter_meta {
transcriptCleanSAMfile: "Output SAM file from TranscriptClean"
outputPrefix: "Output directory path + output file prefix."
outputStatsFile: "Summary stats from TranscriptClean run."
transcriptCleanSAMfile: {
description: "Output SAM file from TranscriptClean",
category: "required"
}
outputPrefix: {
description: "Output directory path + output file prefix.",
category: "required"
}
outputStatsFile: {
description: "Summary stats from TranscriptClean run."
category: "required"
}
}
}
...
...
@@ -102,9 +124,9 @@ task TranscriptClean {
input {
File SAMfile
File referenceGenome
String outputPrefix
Int maxLenIndel = 5
Int maxSJoffset = 5
String outputPrefix
Boolean correctMismatches = true
Boolean correctIndels = true
Boolean correctSJs = true
...
...
@@ -112,6 +134,7 @@ task TranscriptClean {
Boolean primaryOnly = false
Boolean canonOnly = false
Int bufferSize = 100
Boolean deleteTmp = true
File? spliceJunctionAnnotation
File? variantFile
...
...
@@ -127,11 +150,10 @@ task TranscriptClean {
TranscriptClean \
~{"-s " + SAMfile} \
~{"-g " + referenceGenome} \
~{"-o " + outputPrefix} \
~{"-j " + spliceJunctionAnnotation} \
~{"-v " + variantFile} \
~{"-t " + cores} \
~{"--maxLenIndel=" + maxLenIndel} \
~{"--maxSJOffset=" + maxSJoffset} \
~{"-o " + outputPrefix} \
~{true="-m true" false="-m false" correctMismatches} \
~{true="-i true" false="-i false" correctIndels} \
~{true="--correctSJs=true" false="--correctSJs=false" correctSJs} \
...
...
@@ -139,7 +161,9 @@ task TranscriptClean {
~{true="--primaryOnly" false="" primaryOnly} \
~{true="--canonOnly" false="" canonOnly} \
~{"--bufferSize=" + bufferSize} \
~{"-t " + cores}
~{true="--deleteTmp" false="" deleteTmp} \
~{"-j " + spliceJunctionAnnotation} \
~{"-v " + variantFile}
}
output {
...
...
@@ -156,24 +180,81 @@ task TranscriptClean {
}
parameter_meta {
SAMfile: "Input SAM file containing transcripts to correct."
referenceGenome: "Reference genome fasta file."
outputPrefix: "Output directory path + output file prefix."
spliceJunctionAnnotation: "Splice junction file."
variantFile: "VCF formatted file of variants."
maxLenIndel: "Maximum size indel to correct."
maxSJoffset: "Maximum distance from annotated splice junction to correct."
correctMismatches: "Set this to make TranscriptClean correct mismatches."
correctIndels: "Set this to make TranscriptClean correct indels."
correctSJs: "Set this to make TranscriptClean correct splice junctions."
dryRun: "TranscriptClean will read in the data but don't do any correction."
primaryOnly: "TranscriptClean will only output primary mappings of transcripts."
canonOnly: "TranscriptClean will output only canonical transcripts and transcript containing annotated noncanonical junctions."
bufferSize: "Number of lines to output to file at once by each thread during run."
outputTranscriptCleanFasta: "Fasta file containing corrected reads."
outputTranscriptCleanLog: "Log file of TranscriptClean run."
outputTranscriptCleanSAM: "SAM file containing corrected aligned reads."
outputTranscriptCleanTElog: "TE log file of TranscriptClean run."
SAMfile: {
description: "Input SAM file containing transcripts to correct.",
category: "required"
}
referenceGenome: {
description: "Reference genome fasta file.",
category: "required"
}
maxLenIndel: {
description: "Maximum size indel to correct.",
category: "advanced"
}
maxSJoffset: {
description: "Maximum distance from annotated splice junction to correct.",
category: "advanced"
}
outputPrefix: {
description: "Output directory path + output file prefix.",
category: "required"
}
correctMismatches: {
description: "Set this to make TranscriptClean correct mismatches.",
category: "common"
}
correctIndels: {
description: "Set this to make TranscriptClean correct indels.",
category: "common"
}
correctSJs: {
description: "Set this to make TranscriptClean correct splice junctions.",
category: "common"
}
dryRun: {
description: "TranscriptClean will read in the data but don't do any correction.",
category: "advanced"
}
primaryOnly: {
description: "Only output primary mappings of transcripts.",
category: "advanced"
}
canonOnly: {
description: "Only output canonical transcripts and transcript containing annotated noncanonical junctions.",
category: "advanced"
}
bufferSize: {
description: "Number of lines to output to file at once by each thread during run.",
category: "common"
}
deleteTmp: {
description: "The temporary directory generated by TranscriptClean will be removed.",
category: "common"
}
spliceJunctionAnnotation: {
description: "Splice junction file.",
category: "common"
}
variantFile: {
description: "VCF formatted file of variants.",
category: "common"
}
outputTranscriptCleanFasta: {
description: "Fasta file containing corrected reads.",
category: "required"
}
outputTranscriptCleanLog: {
description: "Log file of TranscriptClean run.",
category: "required"
}
outputTranscriptCleanSAM: {
description: "SAM file containing corrected aligned reads.",
category: "required"
}
outputTranscriptCleanTElog: {
description: "TE log file of TranscriptClean run.",
category: "required"
}
}
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment