gffcompare.wdl 7.84 KiB
version 1.0
# Copyright (c) 2017 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
task GffCompare {
input {
Array[File] inputGtfFiles
# gffcmp is the default used by the program as well. This needs to be
# defined in order for the output values to be consistent and correct.
String outPrefix = "gffcmp"
Boolean C = false
Boolean A = false
Boolean X = false
Boolean K = false
Boolean snCorrection = false
Boolean precisionCorrection = false
Boolean discardSingleExonTransfragsAndReferenceTranscripts = false
Boolean discardSingleExonReferenceTranscripts = false
Boolean noTmap = false
Boolean verbose = false
Boolean debugMode = false
File? inputGtfList
File? referenceAnnotation
String? outputDir
File? genomeSequences
Int? maxDistanceFreeEndsTerminalExons
Int? maxDistanceGroupingTranscriptStartSites
String? namePrefix
String memory = "4G"
Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30)
String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0"
# This workaround only works in the input section.
# Issue addressed at https://github.com/openwdl/wdl/pull/263.
File? noneFile # This is a wdl workaround. Please do not assign!
}
# This allows for the creation of output directories.
String dirPrefix = if defined(outputDir)
then select_first([outputDir]) + "/"
else ""
String totalPrefix = dirPrefix + outPrefix
command {
set -e
~{"mkdir -p " + outputDir}
gffcompare \
~{"-r " + referenceAnnotation} \
~{"-o '" + totalPrefix + "'"} \
~{"-s " + genomeSequences} \
~{"-e " + maxDistanceFreeEndsTerminalExons} \
~{"-d " + maxDistanceGroupingTranscriptStartSites} \
~{"-p " + namePrefix} \
~{true="-C" false="" C} \
~{true="-A" false="" A} \
~{true="-X" false="" X} \
~{true="-K" false="" K} \
~{true="-R" false="" snCorrection} \
~{true="-Q" false="" precisionCorrection} \
~{true="-M" false="" discardSingleExonTransfragsAndReferenceTranscripts} \
~{true="-N" false="" discardSingleExonReferenceTranscripts} \
~{true="-T" false="" noTmap} \
~{true="-V" false="" verbose} \
~{true="D" false="" debugMode} \
~{"-i " + inputGtfList} \
~{sep=" " inputGtfFiles}
}
# Output of gffcompare is not stable. It depends on the number of files in the input.
Int noFilesGtfList = if defined(inputGtfList)
then length(read_lines(select_first([inputGtfList])))
else 0
Int noInputFiles = length(inputGtfFiles)
Boolean oneFile = (noFilesGtfList + noInputFiles) == 1
String annotatedName = if oneFile && defined(referenceAnnotation)
then "annotated"
else "combined"
# Check if a redundant .gtf will be created.
Boolean createRedundant = C || A || X
output {
# noneFile is not stable. Please replace this as soon as wdl spec allows.
File annotated = totalPrefix + "." + annotatedName + ".gtf"
File loci = totalPrefix + ".loci"
File stats = totalPrefix + ".stats"
File tracking = totalPrefix + ".tracking"
Array[File] allFiles = select_all([annotated, loci, stats, tracking, redundant, missedIntrons])
File? redundant = if createRedundant
then totalPrefix + ".redundant.gtf"
else noneFile
File? missedIntrons = if debugMode
then totalPrefix + ".missed_introns.gtf"
else noneFile
}
runtime {
memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
# inputs
inputGtfFiles: {description: "The input GTF files.", category: "required"}
referenceAnnotation: {description: "The GTF file to compare with.", category: "required"}
outPrefix: {description: "The prefix for the output.", category: "advanced"}
C: {description: "Equivalent to gffcompare's `-C` flag.", category: "advanced"}
A: {description: "Equivalent to gffcompare's `-A` flag.", category: "advanced"}
X: {description: "Equivalent to gffcompare's `-X` flag.", category: "advanced"}
K: {description: "Equivalent to gffcompare's `-K` flag.", category: "advanced"}
snCorrection: {description: "Equivalent to gffcompare's `-R` flag.", category: "advanced"}
precisionCorrection: {description: "Equivalent to gffcompare's `-Q` flag.", category: "advanced"}
discardSingleExonTransfragsAndReferenceTranscripts: {description: "Equivalent to gffcompare's `-M` flag.", category: "advanced"}
discardSingleExonReferenceTranscripts: {description: "Equivalent to gffcompare's `-N` flag.", category: "advanced"}
noTmap: {description: "Equivalent to gffcompare's `-T` flag.", category: "advanced"}
verbose: {description: "Equivalent to gffcompare's `-V` flag.", category: "advanced"}
debugMode: {description: "Equivalent to gffcompare's `-D` flag.", category: "advanced"}
inputGtfList: {description: "Equivalent to gffcompare's `-i` option.", category: "advanced"}
outputDir: {description: "The location the output should be written.", category: "common"}
genomeSequences: {description: "Equivalent to gffcompare's `-s` option.", category: "advanced"}
maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"}
maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"}
namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# outputs
annotated: {description: "Annotated GTF file."}
loci: {description: "File describing the processed loci."}
stats: {description: "Various statistics related to the “accuracy” (or a measure of agreement) of the input transcripts when compared to reference annotation data."}
tracking: {description: "File matching up transcripts between samples."}
allFiles: {description: "A collection of all output files."}
redundant: {description: "File containing duplicate/redundant transcripts."}
missedIntrons: {description: "File denoting missed introns."}
}
meta {
WDL_AID: {
exclude: ["noneFile"]
}
}
}