Skip to content
Snippets Groups Projects
Commit 4fbfe713 authored by JasperBoom's avatar JasperBoom
Browse files

Fix last set of tasks.

parent c248c5bb
No related branches found
No related tags found
No related merge requests found
......@@ -281,7 +281,7 @@ task Download {
task DownloadTaxonomy {
input {
String centrifugeTaxonomyDir
String taxonomyDir
String executable = "centrifuge-download"
String? preCommand
}
......@@ -290,19 +290,19 @@ task DownloadTaxonomy {
set -e -o pipefail
~{preCommand}
~{executable} \
-o ~{centrifugeTaxonomyDir} \
-o ~{taxonomyDir} \
taxonomy
}
output {
File taxonomyTree = centrifugeTaxonomyDir + "/nodes.dmp"
File nameTable = centrifugeTaxonomyDir + "/names.dmp"
File taxonomyTree = taxonomyDir + "/nodes.dmp"
File nameTable = taxonomyDir + "/names.dmp"
}
}
task KReport {
input {
File centrifugeClassification
File classification
String outputPrefix
Array[File]+ indexFiles
Boolean noLCA = false
......@@ -332,7 +332,7 @@ task KReport {
~{true="--is-count-table" false="" isCountTable} \
~{"--min-score " + minimumScore} \
~{"--min-length " + minimumLength} \
~{centrifugeClassification} \
~{classification} \
> ~{outputPrefix + "_kreport.tsv"}
>>>
......@@ -348,7 +348,7 @@ task KReport {
parameter_meta {
# inputs
centrifugeClassification: {description: "File with centrifuge classification results.", category: "required"}
classification: {description: "File with centrifuge classification results.", category: "required"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
indexFiles: {description: "The files of the index for the reference genomes.", category: "required"}
noLCA: {description: "Do not report the lca of multiple assignments, but report count fractions at the taxa.", category: "advanced"}
......
......@@ -22,7 +22,7 @@ version 1.0
task Refine {
input {
Int minPolyAlength = 20
Int minPolyALength = 20
Boolean requirePolyA = false
String logLevel = "WARN"
File inputBamFile
......@@ -40,7 +40,7 @@ task Refine {
set -e
mkdir -p "~{outputDir}"
isoseq3 refine \
--min-polya-length ~{minPolyAlength} \
--min-polya-length ~{minPolyALength} \
~{true="--require-polya" false="" requirePolyA} \
--log-level ~{logLevel} \
--num-threads ~{cores} \
......@@ -68,7 +68,7 @@ task Refine {
parameter_meta {
# inputs
minPolyAlength: {description: "Minimum poly(A) tail length.", category: "advanced"}
minPolyALength: {description: "Minimum poly(A) tail length.", category: "advanced"}
requirePolyA: {description: "Require fl reads to have a poly(A) tail and remove it.", category: "common"}
logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"}
inputBamFile: {description: "Bam input file.", category: "required"}
......
......@@ -50,7 +50,7 @@ task Indexing {
}
output {
File outputIndexFile = outputPrefix + ".mmi"
File indexFile = outputPrefix + ".mmi"
}
runtime {
......@@ -62,7 +62,7 @@ task Indexing {
parameter_meta {
# input
useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for PacBio).", category: "advanced"}
useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for pacbio).", category: "advanced"}
kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"}
minimizerWindowSize: {description: "Minimizer window size.", category: "advanced"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
......@@ -74,7 +74,7 @@ task Indexing {
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# output
outputIndexFile: {description: "Indexed reference file."}
indexFile: {description: "Indexed reference file."}
}
}
......@@ -83,9 +83,9 @@ task Mapping {
String presetOption
Int kmerSize = 15
Boolean skipSelfAndDualMappings = false
Boolean outputSAM = false
Boolean outputSam = false
String outputPrefix
Boolean addMDtagToSAM = false
Boolean addMDTagToSam = false
Boolean secondaryAlignment = false
File referenceFile
File queryFile
......@@ -110,9 +110,9 @@ task Mapping {
-x ~{presetOption} \
-k ~{kmerSize} \
~{true="-X" false="" skipSelfAndDualMappings} \
~{true="-a" false="" outputSAM} \
~{true="-a" false="" outputSam} \
-o ~{outputPrefix} \
~{true="--MD" false="" addMDtagToSAM} \
~{true="--MD" false="" addMDTagToSam} \
--secondary=~{true="yes" false="no" secondaryAlignment} \
-t ~{cores} \
~{"-G " + maxIntronLength} \
......@@ -126,7 +126,7 @@ task Mapping {
}
output {
File outputAlignmentFile = outputPrefix
File alignmentFile = outputPrefix
}
runtime {
......@@ -139,16 +139,16 @@ task Mapping {
parameter_meta {
presetOption: {description: "This option applies multiple options at the same time.", category: "common"}
kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"}
outputSAM: {description: "Output in the SAM format.", category: "common"}
outputSam: {description: "Output in the sam format.", category: "common"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
maxIntronLength: {description: "Max intron length (effective with -xsplice; changing -r).", category: "advanced"}
maxFragmentLength: {description: "Max fragment length (effective with -xsr or in the fragment mode).", category: "advanced"}
skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"}
retainMaxSecondaryAlignments: {description: "Retain at most INT secondary alignments.", category: "advanced"}
retainMaxSecondaryAlignments: {description: "Retain at most N secondary alignments.", category: "advanced"}
matchingScore: {description: "Matching score.", category: "advanced"}
mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"}
howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"}
addMDtagToSAM: {description: "Adds a MD tag to the SAM output file.", category: "common"}
addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"}
secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"}
referenceFile: {description: "Reference fasta file.", category: "required"}
queryFile: {description: "Input fasta file.", category: "required"}
......@@ -158,6 +158,6 @@ task Mapping {
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# output
outputAlignmentFile: {description: "Mapping and alignment between collections of DNA sequences file."}
alignmentFile: {description: "Mapping and alignment between collections of dna sequences file."}
}
}
......@@ -423,6 +423,7 @@ task Sort {
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
# outputs
outputBam: {description: "Sorted BAM file."}
}
......@@ -526,11 +527,10 @@ task View {
excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"}
excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"}
MAPQthreshold: {description: "Equivalent to samtools view's `-q` option.", category: "advanced"}
threads: {description: "The number of threads to use.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
\ No newline at end of file
}
......@@ -242,9 +242,9 @@ task GetReadAnnotations {
task GetSpliceJunctions {
input {
File SJinformationFile
File sjInformationFile
String inputFileType = "db"
File referenceGTF
File referenceGtf
String runMode = "intron"
String outputPrefix
......@@ -259,8 +259,8 @@ task GetSpliceJunctions {
set -e
mkdir -p "$(dirname ~{outputPrefix})"
talon_get_sjs \
~{SJfileType[inputFileType] + SJinformationFile} \
--ref ~{referenceGTF} \
~{SJfileType[inputFileType] + sjInformationFile} \
--ref ~{referenceGtf} \
--mode ~{runMode} \
--outprefix ~{outputPrefix}
}
......@@ -277,9 +277,9 @@ task GetSpliceJunctions {
parameter_meta {
# inputs
SJinformationFile: {description: "Talon gtf file or database from which to extract exons/introns.", category: "required"}
inputFileType: {description: "The file type of SJinformationFile.", category: "common"}
referenceGTF: {description: "Gtf reference file (ie gencode).", category: "required"}
sjInformationFile: {description: "Talon gtf file or database from which to extract exons/introns.", category: "required"}
inputFileType: {description: "The file type of sjInformationFile.", category: "common"}
referenceGtf: {description: "Gtf reference file (ie gencode).", category: "required"}
runMode: {description: "Determines whether to include introns or exons in the output.", category: "common"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
......@@ -293,13 +293,13 @@ task GetSpliceJunctions {
task InitializeTalonDatabase {
input {
File GTFfile
File gtfFile
String genomeBuild
String annotationVersion
Int minimumLength = 300
String novelIDprefix = "TALON"
Int cutoff5p = 500
Int cutoff3p = 300
String novelPrefix = "TALON"
Int cutOff5p = 500
Int cutOff3p = 300
String outputPrefix
String memory = "10G"
......@@ -311,13 +311,13 @@ task InitializeTalonDatabase {
set -e
mkdir -p "$(dirname ~{outputPrefix})"
talon_initialize_database \
--f=~{GTFfile} \
--f=~{gtfFile} \
--g=~{genomeBuild} \
--a=~{annotationVersion} \
--l=~{minimumLength} \
--idprefix=~{novelIDprefix} \
--5p=~{cutoff5p} \
--3p=~{cutoff3p} \
--idprefix=~{novelPrefix} \
--5p=~{cutOff5p} \
--3p=~{cutOff3p} \
--o=~{outputPrefix}
}
......@@ -333,13 +333,13 @@ task InitializeTalonDatabase {
parameter_meta {
# inputs
GTFfile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"}
gtfFile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"}
genomeBuild: {description: "Name of genome build that the gtf file is based on (ie hg38).", category: "required"}
annotationVersion: {description: "Name of supplied annotation (will be used to label data).", category: "required"}
minimumLength: { description: "Minimum required transcript length.", category: "common"}
novelIDprefix: {description: "Prefix for naming novel discoveries in eventual talon runs.", category: "common"}
cutoff5p: { description: "Maximum allowable distance (bp) at the 5' end during annotation.", category: "advanced"}
cutoff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"}
novelPrefix: {description: "Prefix for naming novel discoveries in eventual talon runs.", category: "common"}
cutOff5p: { description: "Maximum allowable distance (bp) at the 5' end during annotation.", category: "advanced"}
cutOff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
......@@ -352,7 +352,7 @@ task InitializeTalonDatabase {
task LabelReads {
input {
File SAMfile
File samFile
File referenceGenome
Int fracaRangeSize = 20
String tmpDir = "./tmp_label_reads"
......@@ -369,7 +369,7 @@ task LabelReads {
set -e
mkdir -p "$(dirname ~{outputPrefix})"
talon_label_reads \
--f=~{SAMfile} \
--f=~{samFile} \
--g=~{referenceGenome} \
--t=~{threads} \
--ar=~{fracaRangeSize} \
......@@ -392,7 +392,7 @@ task LabelReads {
parameter_meta {
# inputs
SAMfile: {description: "Sam file of transcripts.", category: "required"}
samFile: {description: "Sam file of transcripts.", category: "required"}
referenceGenome: {description: "Reference genome fasta file.", category: "required"}
fracaRangeSize: {description: "Size of post-transcript interval to compute fraction.", category: "common"}
tmpDir: {description: "Path to directory for tmp files.", category: "advanced"}
......@@ -411,7 +411,7 @@ task LabelReads {
task ReformatGtf {
input {
File GTFfile
File gtfFile
String memory = "4G"
Int timeMinutes = 30
......@@ -421,11 +421,11 @@ task ReformatGtf {
command {
set -e
talon_reformat_gtf \
-gtf ~{GTFfile}
-gtf ~{gtfFile}
}
output {
File reformattedGtf = GTFfile
File reformattedGtf = gtfFile
}
runtime {
......@@ -436,7 +436,7 @@ task ReformatGtf {
parameter_meta {
# inputs
GTFfile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"}
gtfFile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
......@@ -452,7 +452,7 @@ task SummarizeDatasets {
Boolean setVerbose = false
String outputPrefix
File? datasetGroupsCSV
File? datasetGroupsCsv
String memory = "4G"
Int timeMinutes = 50
......@@ -466,7 +466,7 @@ task SummarizeDatasets {
--db ~{databaseFile} \
~{true="--verbose" false="" setVerbose} \
--o ~{outputPrefix} \
~{"--groups " + datasetGroupsCSV}
~{"--groups " + datasetGroupsCsv}
}
output {
......@@ -484,7 +484,7 @@ task SummarizeDatasets {
databaseFile: {description: "Talon database.", category: "required"}
setVerbose: {description: "Print out the counts in terminal.", category: "advanced"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
datasetGroupsCSV: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"}
datasetGroupsCsv: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
......@@ -496,7 +496,7 @@ task SummarizeDatasets {
task Talon {
input {
Array[File] SAMfiles
Array[File] samFiles
String organism
String sequencingPlatform = "PacBio-RS-II"
File databaseFile
......@@ -518,7 +518,7 @@ task Talon {
ln -s $PWD/tmp /tmp/sqltmp #Multiprocessing will crash if the absolute path is too long.
export TMPDIR=/tmp/sqltmp
printf "" > ~{outputPrefix}/talonConfigFile.csv #File needs to be emptied when task is rerun.
for file in ~{sep=" " SAMfiles}
for file in ~{sep=" " samFiles}
do
configFileLine="$(basename ${file%.*}),~{organism},~{sequencingPlatform},${file}"
echo ${configFileLine} >> ~{outputPrefix}/talonConfigFile.csv
......@@ -549,7 +549,7 @@ task Talon {
parameter_meta {
# inputs
SAMfiles: {description: "Input sam files.", category: "required"}
samFiles: {description: "Input sam files.", category: "required"}
organism: {description: "The name of the organism from which the samples originated.", category: "required"}
sequencingPlatform: {description: "The sequencing platform used to generate long reads.", category: "required"}
databaseFile: {description: "Talon database. Created using initialize_talon_database.py.", category: "required"}
......
......@@ -22,7 +22,7 @@ version 1.0
task GetSJsFromGtf {
input {
File GTFfile
File gtfFile
File genomeFile
String outputPrefix
Int minIntronSize = 21
......@@ -36,14 +36,14 @@ task GetSJsFromGtf {
set -e
mkdir -p "$(dirname ~{outputPrefix})"
get_SJs_from_gtf \
--f=~{GTFfile} \
--f=~{gtfFile} \
--g=~{genomeFile} \
--minIntronSize=~{minIntronSize} \
~{"--o=" + outputPrefix + ".tsv"}
}
output {
File outputSJsFile = outputPrefix + ".tsv"
File spliceJunctionFile = outputPrefix + ".tsv"
}
runtime {
......@@ -54,22 +54,21 @@ task GetSJsFromGtf {
parameter_meta {
# inputs
GTFfile: {description: "Input GTF file", category: "required"}
gtfFile: {description: "Input gtf file", category: "required"}
genomeFile: {description: "Reference genome", category: "required"}
minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# outputs
outputSJsFile: {description: "Extracted splice junctions."}
spliceJunctionFile: {description: "Extracted splice junctions."}
}
}
task GetTranscriptCleanStats {
input {
File transcriptCleanSAMfile
File transcriptCleanSamFile
String outputPrefix
String memory = "4G"
......@@ -81,12 +80,12 @@ task GetTranscriptCleanStats {
set -e
mkdir -p "$(dirname ~{outputPrefix})"
get_TranscriptClean_stats \
~{transcriptCleanSAMfile} \
~{transcriptCleanSamFile} \
~{outputPrefix}
}
output {
File outputStatsFile = stdout()
File statsFile = stdout()
}
runtime {
......@@ -97,24 +96,23 @@ task GetTranscriptCleanStats {
parameter_meta {
# inputs
transcriptCleanSAMfile: {description: "Output SAM file from TranscriptClean", category: "required"}
transcriptCleanSamFile: {description: "Output sam file from transcriptclean", category: "required"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# outputs
outputStatsFile: {description: "Summary stats from TranscriptClean run."}
statsFile: {description: "Summary stats from transcriptclean run."}
}
}
task TranscriptClean {
input {
File SAMfile
File samFile
File referenceGenome
Int maxLenIndel = 5
Int maxSJoffset = 5
Int maxSJOffset = 5
String outputPrefix
Boolean correctMismatches = true
Boolean correctIndels = true
......@@ -138,7 +136,7 @@ task TranscriptClean {
set -e
mkdir -p "$(dirname ~{outputPrefix})"
TranscriptClean \
-s ~{SAMfile} \
-s ~{samFile} \
-g ~{referenceGenome} \
-t ~{cores} \
--maxLenIndel=~{maxLenIndel} \
......@@ -157,10 +155,10 @@ task TranscriptClean {
}
output {
File outputTranscriptCleanFasta = outputPrefix + "_clean.fa"
File outputTranscriptCleanLog = outputPrefix + "_clean.log"
File outputTranscriptCleanSAM = outputPrefix + "_clean.sam"
File outputTranscriptCleanTElog = outputPrefix + "_clean.TE.log"
File fastaFile = outputPrefix + "_clean.fa"
File logFile = outputPrefix + "_clean.log"
File samFile = outputPrefix + "_clean.sam"
File logFileTE = outputPrefix + "_clean.TE.log"
}
runtime {
......@@ -172,21 +170,21 @@ task TranscriptClean {
parameter_meta {
# inputs
SAMfile: {description: "Input SAM file containing transcripts to correct.", category: "required"}
samFile: {description: "Input sam file containing transcripts to correct.", category: "required"}
referenceGenome: {description: "Reference genome fasta file.", category: "required"}
maxLenIndel: {description: "Maximum size indel to correct.", category: "advanced"}
maxSJoffset: {description: "Maximum distance from annotated splice junction to correct.", category: "advanced"}
maxSJOffset: {description: "Maximum distance from annotated splice junction to correct.", category: "advanced"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
correctMismatches: {description: "Set this to make TranscriptClean correct mismatches.", category: "common"}
correctIndels: {description: "Set this to make TranscriptClean correct indels.", category: "common"}
correctSJs: {description: "Set this to make TranscriptClean correct splice junctions.", category: "common"}
dryRun: {description: "TranscriptClean will read in the data but don't do any correction.", category: "advanced"}
correctMismatches: {description: "Set this to make transcriptclean correct mismatches.", category: "common"}
correctIndels: {description: "Set this to make transcriptclean correct indels.", category: "common"}
correctSJs: {description: "Set this to make transcriptclean correct splice junctions.", category: "common"}
dryRun: {description: "Transcriptclean will read in the data but don't do any correction.", category: "advanced"}
primaryOnly: {description: "Only output primary mappings of transcripts.", category: "advanced"}
canonOnly: {description: "Only output canonical transcripts and transcript containing annotated noncanonical junctions.", category: "advanced"}
bufferSize: {description: "Number of lines to output to file at once by each thread during run.", category: "common"}
deleteTmp: {description: "The temporary directory generated by TranscriptClean will be removed.", category: "common"}
deleteTmp: {description: "The temporary directory generated by transcriptclean will be removed.", category: "common"}
spliceJunctionAnnotation: {description: "Splice junction file.", category: "common"}
variantFile: {description: "VCF formatted file of variants.", category: "common"}
variantFile: {description: "Vcf formatted file of variants.", category: "common"}
cores: {description: "The number of cores to be used.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
......@@ -194,9 +192,9 @@ task TranscriptClean {
category: "advanced"}
# outputs
outputTranscriptCleanFasta: {description: "Fasta file containing corrected reads."}
outputTranscriptCleanLog: {description: "Log file of TranscriptClean run."}
outputTranscriptCleanSAM: {description: "SAM file containing corrected aligned reads."}
outputTranscriptCleanTElog: {description: "TE log file of TranscriptClean run."}
fastaFile: {description: "Fasta file containing corrected reads."}
logFile: {description: "Log file of transcriptclean run."}
samFile: {description: "Sam file containing corrected aligned reads."}
logFileTE: {description: "TE log file of transcriptclean run."}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment