diff --git a/centrifuge.wdl b/centrifuge.wdl index e1cddcadedd9e5c6e110684cb0c702cb0e77b2e2..ee30532570c43a3ab137d51d926ebd5cda6ece20 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -281,7 +281,7 @@ task Download { task DownloadTaxonomy { input { - String centrifugeTaxonomyDir + String taxonomyDir String executable = "centrifuge-download" String? preCommand } @@ -290,19 +290,19 @@ task DownloadTaxonomy { set -e -o pipefail ~{preCommand} ~{executable} \ - -o ~{centrifugeTaxonomyDir} \ + -o ~{taxonomyDir} \ taxonomy } output { - File taxonomyTree = centrifugeTaxonomyDir + "/nodes.dmp" - File nameTable = centrifugeTaxonomyDir + "/names.dmp" + File taxonomyTree = taxonomyDir + "/nodes.dmp" + File nameTable = taxonomyDir + "/names.dmp" } } task KReport { input { - File centrifugeClassification + File classification String outputPrefix Array[File]+ indexFiles Boolean noLCA = false @@ -332,7 +332,7 @@ task KReport { ~{true="--is-count-table" false="" isCountTable} \ ~{"--min-score " + minimumScore} \ ~{"--min-length " + minimumLength} \ - ~{centrifugeClassification} \ + ~{classification} \ > ~{outputPrefix + "_kreport.tsv"} >>> @@ -348,7 +348,7 @@ task KReport { parameter_meta { # inputs - centrifugeClassification: {description: "File with centrifuge classification results.", category: "required"} + classification: {description: "File with centrifuge classification results.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} noLCA: {description: "Do not report the lca of multiple assignments, but report count fractions at the taxa.", category: "advanced"} diff --git a/isoseq3.wdl b/isoseq3.wdl index f369553f6265272e96b095ca4813ecfe947f1a0c..604a71d50fa673b5d248b7edfc943ccc66150c82 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -22,7 +22,7 @@ version 1.0 task Refine { input { - Int minPolyAlength = 20 + Int minPolyALength = 20 Boolean requirePolyA = false String logLevel = "WARN" File inputBamFile @@ -40,7 +40,7 @@ task Refine { set -e mkdir -p "~{outputDir}" isoseq3 refine \ - --min-polya-length ~{minPolyAlength} \ + --min-polya-length ~{minPolyALength} \ ~{true="--require-polya" false="" requirePolyA} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ @@ -68,7 +68,7 @@ task Refine { parameter_meta { # inputs - minPolyAlength: {description: "Minimum poly(A) tail length.", category: "advanced"} + minPolyALength: {description: "Minimum poly(A) tail length.", category: "advanced"} requirePolyA: {description: "Require fl reads to have a poly(A) tail and remove it.", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} inputBamFile: {description: "Bam input file.", category: "required"} diff --git a/minimap2.wdl b/minimap2.wdl index 04b02bf2bdc7454c8e7c772063f1999e672a4d30..fb31fb7fd769c2f42a13896f729ec1aa1d20b171 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -50,7 +50,7 @@ task Indexing { } output { - File outputIndexFile = outputPrefix + ".mmi" + File indexFile = outputPrefix + ".mmi" } runtime { @@ -62,7 +62,7 @@ task Indexing { parameter_meta { # input - useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for PacBio).", category: "advanced"} + useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for pacbio).", category: "advanced"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} minimizerWindowSize: {description: "Minimizer window size.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} @@ -74,7 +74,7 @@ task Indexing { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output - outputIndexFile: {description: "Indexed reference file."} + indexFile: {description: "Indexed reference file."} } } @@ -83,9 +83,9 @@ task Mapping { String presetOption Int kmerSize = 15 Boolean skipSelfAndDualMappings = false - Boolean outputSAM = false + Boolean outputSam = false String outputPrefix - Boolean addMDtagToSAM = false + Boolean addMDTagToSam = false Boolean secondaryAlignment = false File referenceFile File queryFile @@ -110,9 +110,9 @@ task Mapping { -x ~{presetOption} \ -k ~{kmerSize} \ ~{true="-X" false="" skipSelfAndDualMappings} \ - ~{true="-a" false="" outputSAM} \ + ~{true="-a" false="" outputSam} \ -o ~{outputPrefix} \ - ~{true="--MD" false="" addMDtagToSAM} \ + ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ -t ~{cores} \ ~{"-G " + maxIntronLength} \ @@ -126,7 +126,7 @@ task Mapping { } output { - File outputAlignmentFile = outputPrefix + File alignmentFile = outputPrefix } runtime { @@ -139,16 +139,16 @@ task Mapping { parameter_meta { presetOption: {description: "This option applies multiple options at the same time.", category: "common"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} - outputSAM: {description: "Output in the SAM format.", category: "common"} + outputSam: {description: "Output in the sam format.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} maxIntronLength: {description: "Max intron length (effective with -xsplice; changing -r).", category: "advanced"} maxFragmentLength: {description: "Max fragment length (effective with -xsr or in the fragment mode).", category: "advanced"} skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} - retainMaxSecondaryAlignments: {description: "Retain at most INT secondary alignments.", category: "advanced"} + retainMaxSecondaryAlignments: {description: "Retain at most N secondary alignments.", category: "advanced"} matchingScore: {description: "Matching score.", category: "advanced"} mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} - addMDtagToSAM: {description: "Adds a MD tag to the SAM output file.", category: "common"} + addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} referenceFile: {description: "Reference fasta file.", category: "required"} queryFile: {description: "Input fasta file.", category: "required"} @@ -158,6 +158,6 @@ task Mapping { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output - outputAlignmentFile: {description: "Mapping and alignment between collections of DNA sequences file."} + alignmentFile: {description: "Mapping and alignment between collections of dna sequences file."} } } diff --git a/samtools.wdl b/samtools.wdl index 0b8394bff308d65874d03170918569417b1c751e..c155f0260d336b79dc52f3be8440f4a5b84e2e7d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -423,6 +423,7 @@ task Sort { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + # outputs outputBam: {description: "Sorted BAM file."} } @@ -526,11 +527,10 @@ task View { excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"} excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"} MAPQthreshold: {description: "Equivalent to samtools view's `-q` option.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/talon.wdl b/talon.wdl index e39a3cd2e683b660624feb71890930bb535ba650..c796c1cec168aecc607e8803a0aca65ddac77a97 100644 --- a/talon.wdl +++ b/talon.wdl @@ -242,9 +242,9 @@ task GetReadAnnotations { task GetSpliceJunctions { input { - File SJinformationFile + File sjInformationFile String inputFileType = "db" - File referenceGTF + File referenceGtf String runMode = "intron" String outputPrefix @@ -259,8 +259,8 @@ task GetSpliceJunctions { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_get_sjs \ - ~{SJfileType[inputFileType] + SJinformationFile} \ - --ref ~{referenceGTF} \ + ~{SJfileType[inputFileType] + sjInformationFile} \ + --ref ~{referenceGtf} \ --mode ~{runMode} \ --outprefix ~{outputPrefix} } @@ -277,9 +277,9 @@ task GetSpliceJunctions { parameter_meta { # inputs - SJinformationFile: {description: "Talon gtf file or database from which to extract exons/introns.", category: "required"} - inputFileType: {description: "The file type of SJinformationFile.", category: "common"} - referenceGTF: {description: "Gtf reference file (ie gencode).", category: "required"} + sjInformationFile: {description: "Talon gtf file or database from which to extract exons/introns.", category: "required"} + inputFileType: {description: "The file type of sjInformationFile.", category: "common"} + referenceGtf: {description: "Gtf reference file (ie gencode).", category: "required"} runMode: {description: "Determines whether to include introns or exons in the output.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} @@ -293,13 +293,13 @@ task GetSpliceJunctions { task InitializeTalonDatabase { input { - File GTFfile + File gtfFile String genomeBuild String annotationVersion Int minimumLength = 300 - String novelIDprefix = "TALON" - Int cutoff5p = 500 - Int cutoff3p = 300 + String novelPrefix = "TALON" + Int cutOff5p = 500 + Int cutOff3p = 300 String outputPrefix String memory = "10G" @@ -311,13 +311,13 @@ task InitializeTalonDatabase { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_initialize_database \ - --f=~{GTFfile} \ + --f=~{gtfFile} \ --g=~{genomeBuild} \ --a=~{annotationVersion} \ --l=~{minimumLength} \ - --idprefix=~{novelIDprefix} \ - --5p=~{cutoff5p} \ - --3p=~{cutoff3p} \ + --idprefix=~{novelPrefix} \ + --5p=~{cutOff5p} \ + --3p=~{cutOff3p} \ --o=~{outputPrefix} } @@ -333,13 +333,13 @@ task InitializeTalonDatabase { parameter_meta { # inputs - GTFfile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} + gtfFile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} genomeBuild: {description: "Name of genome build that the gtf file is based on (ie hg38).", category: "required"} annotationVersion: {description: "Name of supplied annotation (will be used to label data).", category: "required"} minimumLength: { description: "Minimum required transcript length.", category: "common"} - novelIDprefix: {description: "Prefix for naming novel discoveries in eventual talon runs.", category: "common"} - cutoff5p: { description: "Maximum allowable distance (bp) at the 5' end during annotation.", category: "advanced"} - cutoff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"} + novelPrefix: {description: "Prefix for naming novel discoveries in eventual talon runs.", category: "common"} + cutOff5p: { description: "Maximum allowable distance (bp) at the 5' end during annotation.", category: "advanced"} + cutOff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -352,7 +352,7 @@ task InitializeTalonDatabase { task LabelReads { input { - File SAMfile + File samFile File referenceGenome Int fracaRangeSize = 20 String tmpDir = "./tmp_label_reads" @@ -369,7 +369,7 @@ task LabelReads { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_label_reads \ - --f=~{SAMfile} \ + --f=~{samFile} \ --g=~{referenceGenome} \ --t=~{threads} \ --ar=~{fracaRangeSize} \ @@ -392,7 +392,7 @@ task LabelReads { parameter_meta { # inputs - SAMfile: {description: "Sam file of transcripts.", category: "required"} + samFile: {description: "Sam file of transcripts.", category: "required"} referenceGenome: {description: "Reference genome fasta file.", category: "required"} fracaRangeSize: {description: "Size of post-transcript interval to compute fraction.", category: "common"} tmpDir: {description: "Path to directory for tmp files.", category: "advanced"} @@ -411,7 +411,7 @@ task LabelReads { task ReformatGtf { input { - File GTFfile + File gtfFile String memory = "4G" Int timeMinutes = 30 @@ -421,11 +421,11 @@ task ReformatGtf { command { set -e talon_reformat_gtf \ - -gtf ~{GTFfile} + -gtf ~{gtfFile} } output { - File reformattedGtf = GTFfile + File reformattedGtf = gtfFile } runtime { @@ -436,7 +436,7 @@ task ReformatGtf { parameter_meta { # inputs - GTFfile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} + gtfFile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -452,7 +452,7 @@ task SummarizeDatasets { Boolean setVerbose = false String outputPrefix - File? datasetGroupsCSV + File? datasetGroupsCsv String memory = "4G" Int timeMinutes = 50 @@ -466,7 +466,7 @@ task SummarizeDatasets { --db ~{databaseFile} \ ~{true="--verbose" false="" setVerbose} \ --o ~{outputPrefix} \ - ~{"--groups " + datasetGroupsCSV} + ~{"--groups " + datasetGroupsCsv} } output { @@ -484,7 +484,7 @@ task SummarizeDatasets { databaseFile: {description: "Talon database.", category: "required"} setVerbose: {description: "Print out the counts in terminal.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - datasetGroupsCSV: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"} + datasetGroupsCsv: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -496,7 +496,7 @@ task SummarizeDatasets { task Talon { input { - Array[File] SAMfiles + Array[File] samFiles String organism String sequencingPlatform = "PacBio-RS-II" File databaseFile @@ -518,7 +518,7 @@ task Talon { ln -s $PWD/tmp /tmp/sqltmp #Multiprocessing will crash if the absolute path is too long. export TMPDIR=/tmp/sqltmp printf "" > ~{outputPrefix}/talonConfigFile.csv #File needs to be emptied when task is rerun. - for file in ~{sep=" " SAMfiles} + for file in ~{sep=" " samFiles} do configFileLine="$(basename ${file%.*}),~{organism},~{sequencingPlatform},${file}" echo ${configFileLine} >> ~{outputPrefix}/talonConfigFile.csv @@ -549,7 +549,7 @@ task Talon { parameter_meta { # inputs - SAMfiles: {description: "Input sam files.", category: "required"} + samFiles: {description: "Input sam files.", category: "required"} organism: {description: "The name of the organism from which the samples originated.", category: "required"} sequencingPlatform: {description: "The sequencing platform used to generate long reads.", category: "required"} databaseFile: {description: "Talon database. Created using initialize_talon_database.py.", category: "required"} diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 15da1f583ee7c0271e3e54751d2c588cbba5087d..daf797030a263bb971b8a16cd05d2df37a2ba0ea 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -22,7 +22,7 @@ version 1.0 task GetSJsFromGtf { input { - File GTFfile + File gtfFile File genomeFile String outputPrefix Int minIntronSize = 21 @@ -36,14 +36,14 @@ task GetSJsFromGtf { set -e mkdir -p "$(dirname ~{outputPrefix})" get_SJs_from_gtf \ - --f=~{GTFfile} \ + --f=~{gtfFile} \ --g=~{genomeFile} \ --minIntronSize=~{minIntronSize} \ ~{"--o=" + outputPrefix + ".tsv"} } output { - File outputSJsFile = outputPrefix + ".tsv" + File spliceJunctionFile = outputPrefix + ".tsv" } runtime { @@ -54,22 +54,21 @@ task GetSJsFromGtf { parameter_meta { # inputs - GTFfile: {description: "Input GTF file", category: "required"} + gtfFile: {description: "Input gtf file", category: "required"} genomeFile: {description: "Reference genome", category: "required"} minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputSJsFile: {description: "Extracted splice junctions."} + spliceJunctionFile: {description: "Extracted splice junctions."} } } task GetTranscriptCleanStats { input { - File transcriptCleanSAMfile + File transcriptCleanSamFile String outputPrefix String memory = "4G" @@ -81,12 +80,12 @@ task GetTranscriptCleanStats { set -e mkdir -p "$(dirname ~{outputPrefix})" get_TranscriptClean_stats \ - ~{transcriptCleanSAMfile} \ + ~{transcriptCleanSamFile} \ ~{outputPrefix} } output { - File outputStatsFile = stdout() + File statsFile = stdout() } runtime { @@ -97,24 +96,23 @@ task GetTranscriptCleanStats { parameter_meta { # inputs - transcriptCleanSAMfile: {description: "Output SAM file from TranscriptClean", category: "required"} + transcriptCleanSamFile: {description: "Output sam file from transcriptclean", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputStatsFile: {description: "Summary stats from TranscriptClean run."} + statsFile: {description: "Summary stats from transcriptclean run."} } } task TranscriptClean { input { - File SAMfile + File samFile File referenceGenome Int maxLenIndel = 5 - Int maxSJoffset = 5 + Int maxSJOffset = 5 String outputPrefix Boolean correctMismatches = true Boolean correctIndels = true @@ -138,7 +136,7 @@ task TranscriptClean { set -e mkdir -p "$(dirname ~{outputPrefix})" TranscriptClean \ - -s ~{SAMfile} \ + -s ~{samFile} \ -g ~{referenceGenome} \ -t ~{cores} \ --maxLenIndel=~{maxLenIndel} \ @@ -157,10 +155,10 @@ task TranscriptClean { } output { - File outputTranscriptCleanFasta = outputPrefix + "_clean.fa" - File outputTranscriptCleanLog = outputPrefix + "_clean.log" - File outputTranscriptCleanSAM = outputPrefix + "_clean.sam" - File outputTranscriptCleanTElog = outputPrefix + "_clean.TE.log" + File fastaFile = outputPrefix + "_clean.fa" + File logFile = outputPrefix + "_clean.log" + File samFile = outputPrefix + "_clean.sam" + File logFileTE = outputPrefix + "_clean.TE.log" } runtime { @@ -172,21 +170,21 @@ task TranscriptClean { parameter_meta { # inputs - SAMfile: {description: "Input SAM file containing transcripts to correct.", category: "required"} + samFile: {description: "Input sam file containing transcripts to correct.", category: "required"} referenceGenome: {description: "Reference genome fasta file.", category: "required"} maxLenIndel: {description: "Maximum size indel to correct.", category: "advanced"} - maxSJoffset: {description: "Maximum distance from annotated splice junction to correct.", category: "advanced"} + maxSJOffset: {description: "Maximum distance from annotated splice junction to correct.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - correctMismatches: {description: "Set this to make TranscriptClean correct mismatches.", category: "common"} - correctIndels: {description: "Set this to make TranscriptClean correct indels.", category: "common"} - correctSJs: {description: "Set this to make TranscriptClean correct splice junctions.", category: "common"} - dryRun: {description: "TranscriptClean will read in the data but don't do any correction.", category: "advanced"} + correctMismatches: {description: "Set this to make transcriptclean correct mismatches.", category: "common"} + correctIndels: {description: "Set this to make transcriptclean correct indels.", category: "common"} + correctSJs: {description: "Set this to make transcriptclean correct splice junctions.", category: "common"} + dryRun: {description: "Transcriptclean will read in the data but don't do any correction.", category: "advanced"} primaryOnly: {description: "Only output primary mappings of transcripts.", category: "advanced"} canonOnly: {description: "Only output canonical transcripts and transcript containing annotated noncanonical junctions.", category: "advanced"} bufferSize: {description: "Number of lines to output to file at once by each thread during run.", category: "common"} - deleteTmp: {description: "The temporary directory generated by TranscriptClean will be removed.", category: "common"} + deleteTmp: {description: "The temporary directory generated by transcriptclean will be removed.", category: "common"} spliceJunctionAnnotation: {description: "Splice junction file.", category: "common"} - variantFile: {description: "VCF formatted file of variants.", category: "common"} + variantFile: {description: "Vcf formatted file of variants.", category: "common"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -194,9 +192,9 @@ task TranscriptClean { category: "advanced"} # outputs - outputTranscriptCleanFasta: {description: "Fasta file containing corrected reads."} - outputTranscriptCleanLog: {description: "Log file of TranscriptClean run."} - outputTranscriptCleanSAM: {description: "SAM file containing corrected aligned reads."} - outputTranscriptCleanTElog: {description: "TE log file of TranscriptClean run."} + fastaFile: {description: "Fasta file containing corrected reads."} + logFile: {description: "Log file of transcriptclean run."} + samFile: {description: "Sam file containing corrected aligned reads."} + logFileTE: {description: "TE log file of transcriptclean run."} } }