From 9dde7d50cd4597f5f286c7fdf4d231033c8e5492 Mon Sep 17 00:00:00 2001
From: jboom1 <j.boom.me@lumc.nl>
Date: Thu, 22 Aug 2019 11:05:23 +0200
Subject: [PATCH] Add WDL task for TranscriptClean

---
 transcriptclean.wdl | 199 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 199 insertions(+)
 create mode 100644 transcriptclean.wdl

diff --git a/transcriptclean.wdl b/transcriptclean.wdl
new file mode 100644
index 0000000..84dc6a2
--- /dev/null
+++ b/transcriptclean.wdl
@@ -0,0 +1,199 @@
+version 1.0
+
+# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task transcriptclean {
+    input {
+        File SAMfile
+        File referenceFile
+        String outputPrefix
+        String outputDirPath
+        File? spliceJnsFile
+        File? variantsFile
+        Int? maxLenIndel
+        Int? maxSJoffset
+        Boolean? correctMismatches
+        Boolean? correctIndels
+        Boolean? correctSJs
+        Boolean? dryRun
+        Boolean? primaryOnly
+        Int cores = 1
+        Int memory = 25
+        String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1"
+    }
+
+    command {
+        set -e pipefail
+        mkdir -p ~{outputDirPath}
+        TranscriptClean \
+            ~{"-s " + SAMfile} \
+            ~{"-g " + referenceFile} \
+            ~{"-o " + outputDirPath + outputPrefix} \
+            ~{"-j " + spliceJnsFile} \
+            ~{"-v " + variantsFile} \
+            ~{"--maxLenIndel=" + maxLenIndel} \
+            ~{"--maxSJOffset=" + maxSJoffset} \
+            ~{true="-m CORRECTMISMATCHES" false="-m false" correctMismatches} \
+            ~{true="-i CORRECTINDELS" false="-i false" correctIndels} \
+            ~{true="--correctSJs=CORRECTSJS" false="--correctSJs=false" correctSJs} \
+            ~{true="--dryRun" false="" dryRun} \
+            ~{true="--primaryOnly" false="" primaryOnly}
+    }
+
+    output {
+        File outputTCfasta = outputDirPath + outputPrefix + "_clean.fa"
+        File outputTClog = outputDirPath + outputPrefix + "_clean.log"
+        File outputTCsam = outputDirPath + outputPrefix + "_clean.sam"
+        File outputTCteLog = outputDirPath + outputPrefix + "_clean.TE.log"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+}
+
+task cleansplicejns {
+    input{
+        File SAMfile
+        File referenceFile
+        String outputPrefix
+        String outputDirPath
+        File spliceJNsFile
+        File? variantsFile
+        Int cores = 1
+        Int memory = 4
+        String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1"
+    }
+
+    command {
+        set -e pipefail
+        mkdir -p ~{outputDirPath}
+        clean_splice_jns \
+            ~{"--f=" + SAMfile} \
+            ~{"--g=" + referenceFile} \
+            ~{"--o=" + outputDirPath + outputPrefix} \
+            ~{"--s=" + spliceJNsFile} \
+            ~{"--v=" + variantsFile}
+    }
+
+    output {
+        File outputCleanSJsam = outputDirPath + outputPrefix + "_clean.sam"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+}
+
+task getsjsfromgtf {
+    input {
+        File GTFfile
+        File referenceFile
+        String outputPrefix
+        String outputDirPath
+        Int? minIntronSize
+        Int cores = 1
+        Int memory = 8
+        String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1"
+    }
+
+    command {
+        set -e pipefail
+        mkdir -p ~{outputDirPath}
+        get_SJs_from_gtf \
+            ~{"--f=" + GTFfile} \
+            ~{"--g=" + referenceFile} \
+            ~{"--o=" + outputDirPath + outputPrefix + ".tsv"} \
+            ~{"--minIntronSize=" + minIntronSize}
+    }
+
+    output {
+        File outputSJsFile = outputDirPath + outputPrefix + ".tsv"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+}
+
+task getcorrectedsjsfromlog {
+    input{
+        File TElogFile
+        String outputPrefix
+        String outputDirPath
+        Int cores = 1
+        Int memory = 5
+        String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1"
+    }
+
+    command {
+        set -e pipefail
+        mkdir -p ~{outputDirPath}
+        get_corrected_SJs_from_log \
+            ~{TElogFile} \
+            ~{outputDirPath + outputPrefix + ".tsv"}
+    }
+
+    output {
+        File outputCorrectedSJs = outputDirPath + outputPrefix + ".tsv"
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+}
+
+task gettranscriptcleanstats {
+    input{
+        File minimapSAMfile
+        String outputPrefix
+        String outputDirPath
+        Int cores = 1
+        Int memory = 4
+        String dockerImage = "biocontainers/transcriptclean:v1.0.7_cv1"
+    }
+
+    command {
+        set -e pipefail
+        mkdir -p ~{outputDirPath}
+        get_TranscriptClean_stats \
+            ~{minimapSAMfile} \
+            ~{outputDirPath + outputPrefix}
+    }
+
+    output {
+        File outputStat = stdout()
+    }
+
+    runtime {
+        cpu: cores
+        memory: memory
+        docker: dockerImage
+    }
+}
-- 
GitLab