Update TranscriptClean to version 2.0.2

1e0fd640 · JB · 9b385071 · 1e0fd640 · 1e0fd640
Commit 1e0fd640 authored 5 years ago by JB
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ that users understand how the changes affect the new version.

 version 2.0.0
 ---------------------------
+ TranscriptClean: Update TranscriptClean to version 2.0.2
 + Memory runtime attributes are now Strings indicating total memory, as opposed to Ints indicating memory per core.
 + Memory inputs for most tasks are now Strings, remaining Int memory inputs are renamed to "memoryGb".
 + Use the biowdl-input-converter container for JsonToYaml, to reduce the amount of containers needed.

--- a/transcriptclean.wdl
+++ b/transcriptclean.wdl
@@ -20,88 +20,6 @@ version 1.0
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

-task CleanSpliceJunctions {
-    input {
-        File SAMfile
-        File referenceGenome
-        String outputPrefix
-        File spliceJunctionAnnotation
-
-        File? variantFile
-
-        Int cores = 1
-        String memory = "4G"
-        String dockerImage = "biocontainers/transcriptclean:v1.0.8_cv1"
-    }
-
-    command {
-        set -e
-        mkdir -p $(dirname ~{outputPrefix})
-        clean_splice_jns \
-        ~{"--f=" + SAMfile} \
-        ~{"--g=" + referenceGenome} \
-        ~{"--o=" + outputPrefix} \
-        ~{"--s=" + spliceJunctionAnnotation} \
-        ~{"--v=" + variantFile}
-    }
-
-    output {
-        File outputCleanedSAM = outputPrefix + "_clean.sam"
-    }
-
-    runtime {
-        cpu: cores
-        memory: memory
-        docker: dockerImage
-    }
-
-    parameter_meta {
-        SAMfile: "Input SAM file"
-        referenceGenome: "Reference genome fasta file."
-        outputPrefix: "Output directory path + output file prefix."
-        spliceJunctionAnnotation: "Splice junction file"
-        variantFile: "VCF formatted file of variants"
-
-        outputCleanedSAM: "Cleaned sam output file."
-    }
-}
-
-task GetCorrectedSJsFromLog {
-    input {
-        File TElogFile
-        String outputPrefix
-
-        Int cores = 1
-        String memory = "5G"
-        String dockerImage = "biocontainers/transcriptclean:v1.0.8_cv1"
-    }
-
-    command {
-        set -e 
-        mkdir -p $(dirname ~{outputPrefix})
-        get_corrected_SJs_from_log \
-        ~{TElogFile} \
-        ~{outputPrefix + ".tsv"}
-    }
-
-    output {
-        File outputCorrectedSJs = outputPrefix + ".tsv"
-    }
-
-    runtime {
-        cpu: cores
-        memory: memory
-        docker: dockerImage
-    }
-
-    parameter_meta {
-        TElogFile: "TE log from TranscriptClean."
-        outputPrefix: "Output directory path + output file prefix."
-
-        outputCorrectedSJs: "Formely noncanonical splice junctions in BED format."
-    }
-}
-
 task GetSJsFromGtf {
    input {
        File GTFfile
@@ -111,7 +29,7 @@ task GetSJsFromGtf {

        Int cores = 1
        String memory = "8G"
-        String dockerImage = "biocontainers/transcriptclean:v1.0.8_cv1"
+        String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1"
    }

    command {
@@ -151,7 +69,7 @@ task GetTranscriptCleanStats {

        Int cores = 1
        String memory = "4G"
-        String dockerImage = "biocontainers/transcriptclean:v1.0.8_cv1"
+        String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1"
    }

    command {
@@ -189,16 +107,18 @@ task TranscriptClean {
        Int maxSJoffset = 5
        Boolean correctMismatches = true
        Boolean correctIndels = true
+        Boolean correctSJs = true
        Boolean dryRun = false
        Boolean primaryOnly = false
+        Boolean canonOnly = false
+        Int bufferSize = 100

        File? spliceJunctionAnnotation
        File? variantFile
-        Boolean? correctSJs

        Int cores = 1
        String memory = "25G"
-        String dockerImage = "biocontainers/transcriptclean:v1.0.8_cv1"
+        String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1"
    }

    command {
@@ -212,11 +132,14 @@ task TranscriptClean {
        ~{"-v " + variantFile} \
        ~{"--maxLenIndel=" + maxLenIndel} \
        ~{"--maxSJOffset=" + maxSJoffset} \
-        ~{true="-m CORRECTMISMATCHES" false="-m false" correctMismatches} \
-        ~{true="-i CORRECTINDELS" false="-i false" correctIndels} \
-        ~{true="--correctSJs=CORRECTSJS" false="--correctSJs=false" correctSJs} \
+        ~{true="-m true" false="-m false" correctMismatches} \
+        ~{true="-i true" false="-i false" correctIndels} \
+        ~{true="--correctSJs=true" false="--correctSJs=false" correctSJs} \
        ~{true="--dryRun" false="" dryRun} \
-        ~{true="--primaryOnly" false="" primaryOnly}
+        ~{true="--primaryOnly" false="" primaryOnly} \
+        ~{true="--canonOnly" false="" canonOnly} \
+        ~{"--bufferSize=" + bufferSize} \
+        ~{"-t " + cores}
    }

    output {
@@ -236,7 +159,8 @@ task TranscriptClean {
        SAMfile: "Input SAM file containing transcripts to correct."
        referenceGenome: "Reference genome fasta file."
        outputPrefix: "Output directory path + output file prefix."
-        spliceJunctionAnnotation: "Splice junction file"
+        spliceJunctionAnnotation: "Splice junction file."
+        variantFile: "VCF formatted file of variants."
        maxLenIndel: "Maximum size indel to correct."
        maxSJoffset: "Maximum distance from annotated splice junction to correct."
        correctMismatches: "Set this to make TranscriptClean correct mismatches."
@@ -244,6 +168,8 @@ task TranscriptClean {
        correctSJs: "Set this to make TranscriptClean correct splice junctions."
        dryRun: "TranscriptClean will read in the data but don't do any correction."
        primaryOnly: "TranscriptClean will only output primary mappings of transcripts."
+        canonOnly: "TranscriptClean will output only canonical transcripts and transcript containing annotated noncanonical junctions."
+        bufferSize: "Number of lines to output to file at once by each thread during run."

        outputTranscriptCleanFasta: "Fasta file containing corrected reads."
        outputTranscriptCleanLog: "Log file of TranscriptClean run."