Skip to content
Snippets Groups Projects
Commit b107182c authored by Ruben Vorderman's avatar Ruben Vorderman
Browse files

Merge remote-tracking branch 'origin' into BIOWDL-475

parents d9b05b58 6cd572e7
No related branches found
No related tags found
No related merge requests found
......@@ -11,6 +11,11 @@ that users understand how the changes affect the new version.
version 4.0.0-develop
---------------------------
+ TALON: Update `FilterTalonTranscripts` to new version, which removes the
pairingsFile and replaces this with datasetsFile.
+ TALON: Add `GetSpliceJunctions` & `LabelReads` tasks.
+ TALON: Update to version 5.0.
+ Add tasks for pbmm2, the PacBio wrapper for minimap2.
+ Update the image for chunked-scatter and make use of new features from 0.2.0.
+ Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and
STAR.
......
version 1.0
# Copyright (c) 2020 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
task Mapping {
input {
String presetOption
Boolean sort=true
String sample
File referenceMMI
File queryFile
Int cores = 4
String memory = "30G"
Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores)
String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1"
}
command {
pbmm2 align \
--preset ~{presetOption} \
~{true="--sort" false="" sort} \
-j ~{cores} \
~{referenceMMI} \
~{queryFile} \
~{sample}.align.bam
}
output {
File outputAlignmentFile = sample + ".align.bam"
File outputIndexFile = sample + ".align.bam.bai"
}
runtime {
cpu: cores
memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
presetOption: {description: "This option applies multiple options at the same time.", category: "required"}
sort: {description: "Sort the output bam file.", category: "advanced"}
sample: {description: "Name of the sample"}
referenceMMI: {description: "MMI file for the reference.", category: "required"}
queryFile: {description: "BAM file with reads to align against the reference.", category: "required"}
cores: {description: "The number of cores to be used.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# output
outputAlignmentFile: {description: "Mapped bam file."}
outputIndexFile: {description: "Bam index file."}
}
}
Subproject commit b83da72b9b43b956a3062b78fb08044eb9fae464
Subproject commit 325a129c14de56b2055ee0e9e0da7dc74df5fec4
......@@ -32,7 +32,7 @@ task CreateAbundanceFileFromDatabase {
String memory = "4G"
Int timeMinutes = 30
String dockerImage = "biocontainers/talon:v4.4.2_cv1"
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
command {
......@@ -88,7 +88,7 @@ task CreateGtfFromDatabase {
String memory = "4G"
Int timeMinutes = 30
String dockerImage = "biocontainers/talon:v4.4.2_cv1"
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
command {
......@@ -137,12 +137,16 @@ task FilterTalonTranscripts {
File databaseFile
String annotationVersion
String outputPrefix
Float maxFracA = 0.5
Int minCount = 5
Boolean allowGenomic = false
File? pairingsFile
File? datasetsFile
Int? minDatasets
String memory = "4G"
Int timeMinutes = 30
String dockerImage = "biocontainers/talon:v4.4.2_cv1"
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
command {
......@@ -152,7 +156,11 @@ task FilterTalonTranscripts {
--db=~{databaseFile} \
-a ~{annotationVersion} \
~{"--o=" + outputPrefix + "_whitelist.csv"} \
~{"-p " + pairingsFile}
--maxFracA=~{maxFracA} \
--minCount=~{minCount} \
~{true="--allowGenomic" false="" allowGenomic} \
--datasets=~{datasetsFile} \
--minDatasets=~{minDatasets}
}
output {
......@@ -170,7 +178,11 @@ task FilterTalonTranscripts {
databaseFile: {description: "TALON database.", category: "required"}
annotationVersion: {description: "Which annotation version to use.", category: "required"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
pairingsFile: {description: "A file indicating which datasets should be considered together.", category: "advanced"}
maxFracA: {description: "Maximum fraction of As to allow in the window located immediately after any read assigned to a novel transcript.", category: "advanced"}
minCount: {description: "Number of minimum occurrences required for a novel transcript PER dataset.", category: "advanced"}
allowGenomic: {description: "If this option is set, transcripts from the Genomic novelty category will be permitted in the output.", category: "advanced"}
datasetsFile: {description: "Datasets to include.", category: "advanced"}
minDatasets: {description: "Minimum number of datasets novel transcripts must be found in.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
......@@ -190,7 +202,7 @@ task GetReadAnnotations {
String memory = "4G"
Int timeMinutes = 30
String dockerImage = "biocontainers/talon:v4.4.2_cv1"
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
command {
......@@ -228,6 +240,57 @@ task GetReadAnnotations {
}
}
task GetSpliceJunctions {
input {
File SJinformationFile
String inputFileType = "db"
File referenceGTF
String runMode = "intron"
String outputPrefix
String memory = "4G"
Int timeMinutes = 30
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
Map[String, String] SJfileType = {"db": "--db", "gtf": "--gtf"}
command {
set -e
mkdir -p "$(dirname ~{outputPrefix})"
talon_get_sjs \
~{SJfileType[inputFileType] + SJinformationFile} \
--ref ~{referenceGTF} \
--mode ~{runMode} \
--outprefix ~{outputPrefix}
}
output {
File outputSJfile = outputPrefix + "_" + runMode + "s.tsv"
}
runtime {
memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
# inputs
SJinformationFile: {description: "TALON GTF file or database from which to extract exons/introns.", category: "required"}
inputFileType: {description: "The file type of SJinformationFile.", category: "common"}
referenceGTF: {description: "GTF reference file (ie GENCODE).", category: "required"}
runMode: {description: "Determines whether to include introns or exons in the output.", category: "common"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# outputs
outputSJfile: {description: "File containing locations, novelty and transcript assignments of exons/introns."}
}
}
task InitializeTalonDatabase {
input {
File GTFfile
......@@ -241,7 +304,7 @@ task InitializeTalonDatabase {
String memory = "10G"
Int timeMinutes = 60
String dockerImage = "biocontainers/talon:v4.4.2_cv1"
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
command {
......@@ -287,13 +350,72 @@ task InitializeTalonDatabase {
}
}
task LabelReads {
input {
File SAMfile
File referenceGenome
Int fracaRangeSize = 20
String tmpDir = "./tmp_label_reads"
Boolean deleteTmp = true
String outputPrefix
Int threads = 4
String memory = "25G"
Int timeMinutes = 2880
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
command {
set -e
mkdir -p "$(dirname ~{outputPrefix})"
talon_label_reads \
--f=~{SAMfile} \
--g=~{referenceGenome} \
--t=~{threads} \
--ar=~{fracaRangeSize} \
--tmpDir=~{tmpDir} \
~{true="--deleteTmp" false="" deleteTmp} \
--o=~{outputPrefix}
}
output {
File outputLabeledSAM = outputPrefix + "_labeled.sam"
File outputReadLabels = outputPrefix + "_read_labels.tsv"
}
runtime {
cpu: threads
memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
# inputs
SAMfile: {description: "SAM file of transcripts.", category: "required"}
referenceGenome: {description: "Reference genome fasta file.", category: "required"}
fracaRangeSize: {description: "Size of post-transcript interval to compute fraction.", category: "common"}
tmpDir: {description: "Path to directory for tmp files.", category: "advanced"}
deleteTmp: {description: "If set, tmp dir will be removed.", category: "advanced"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
threads: {description: "The number of threads to be used.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# outputs
outputLabeledSAM: {description: "SAM file with labeled transcripts."}
outputReadLabels: {description: "Tabular file with fraction description per read."}
}
}
task ReformatGtf {
input {
File GTFfile
String memory = "4G"
Int timeMinutes = 30
String dockerImage = "biocontainers/talon:v4.4.2_cv1"
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
command {
......@@ -334,7 +456,7 @@ task SummarizeDatasets {
String memory = "4G"
Int timeMinutes = 50
String dockerImage = "biocontainers/talon:v4.4.2_cv1"
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
command {
......@@ -383,10 +505,10 @@ task Talon {
Float minimumIdentity = 0.8
String outputPrefix
Int cores = 4
Int threads = 4
String memory = "25G"
Int timeMinutes = 2880
String dockerImage = "biocontainers/talon:v4.4.2_cv1"
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
command <<<
......@@ -405,7 +527,7 @@ task Talon {
~{"--f " + outputPrefix + "/talonConfigFile.csv"} \
--db ~{databaseFile} \
--build ~{genomeBuild} \
--threads ~{cores} \
--threads ~{threads} \
--cov ~{minimumCoverage} \
--identity ~{minimumIdentity} \
~{"--o " + outputPrefix + "/run"}
......@@ -419,7 +541,7 @@ task Talon {
}
runtime {
cpu: cores
cpu: threads
memory: memory
time_minutes: timeMinutes
docker: dockerImage
......@@ -435,7 +557,7 @@ task Talon {
minimumCoverage: {description: "Minimum alignment coverage in order to use a SAM entry.", category: "common"}
minimumIdentity: {description: "Minimum alignment identity in order to use a SAM entry.", category: "common" }
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
cores: {description: "The number of cores to be used.", category: "advanced"}
threads: {description: "The number of threads to be used.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment