From 66dc8af24f67b0d97027bcec74e06e7fd08f3ca5 Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Mon, 23 Jun 2014 16:42:11 +0200
Subject: [PATCH] Added bz2 support

---
 .../nl/lumc/sasc/biopet/function/Pbzip2.scala | 40 +++++++++++++++++++
 .../nl/lumc/sasc/biopet/function/Zcat.scala   |  9 +++++
 flexiprep/examples/test.json                  |  1 +
 flexiprep/nbactions.xml                       |  6 +--
 .../pipelines/flexiprep/Flexiprep.scala       | 23 ++++++-----
 5 files changed, 66 insertions(+), 13 deletions(-)
 create mode 100644 biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Pbzip2.scala

diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Pbzip2.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Pbzip2.scala
new file mode 100644
index 000000000..05b4dcae4
--- /dev/null
+++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Pbzip2.scala
@@ -0,0 +1,40 @@
+package nl.lumc.sasc.biopet.function
+
+import nl.lumc.sasc.biopet.core._
+import nl.lumc.sasc.biopet.core.config._
+//import org.broadinstitute.sting.queue.function.CommandLineFunction
+import org.broadinstitute.sting.commandline._
+import java.io.File
+
+class Pbzip2(val root:Configurable) extends BiopetCommandLineFunction {
+  @Input(doc="Zipped file")
+  var input: File = _
+  
+  @Output(doc="Unzipped file")
+  var output: File = _
+  
+  executeble = config("exe", "pbzip2")
+  
+  var decomrpess = true
+  var memory: Int = config("memory", 1000)
+  
+  override val defaultVmem = (memory * 2 / 1000) + "G"
+  override val defaultThreads = 2
+  
+  def cmdLine = required(executeble) +
+      conditional(decomrpess, "-d") +
+      conditional(!decomrpess, "-z") +
+      optional("-p", threads, spaceSeparated=false) +
+      optional("-m", memory, spaceSeparated=false) +
+      required("-c", output) +
+      required(input)
+}
+
+object Pbzip2 {
+  def apply(root:Configurable, input:File, output:File): Pbzip2 = {
+    val pbzip2 = new Pbzip2(root)
+    pbzip2.input = input
+    pbzip2.output = output
+    return pbzip2
+  }
+}
\ No newline at end of file
diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala
index 15b01d922..fc3245e07 100644
--- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala
+++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala
@@ -16,4 +16,13 @@ class Zcat(val root:Configurable) extends BiopetCommandLineFunction {
   executeble = config("exe", "zcat")
   
   def cmdLine = required(executeble) + required(input) + " > " + required(output)
+}
+
+object Zcat {
+  def apply(root:Configurable, input:File, output:File): Zcat = {
+    val zcat = new Zcat(root)
+    zcat.input = input
+    zcat.output = output
+    return zcat
+  }
 }
\ No newline at end of file
diff --git a/flexiprep/examples/test.json b/flexiprep/examples/test.json
index 66c92f595..285265ef0 100644
--- a/flexiprep/examples/test.json
+++ b/flexiprep/examples/test.json
@@ -1,6 +1,7 @@
 {
     "fastqc": { "exe": "/home/pjvan_thof/Downloads/FastQC/fastqc" },
     "gatk": {"flexiprep": { "sdfg": { "exe": "gatk" }}},
+    "pbzip2": { "exe": "/home/pjvan_thof/pipelines/test/test" },
     "flexiprep": {
         "fastqc": { "exe": "/home/pjvan_thof/pipelines/test/test" },
         "cutadapt": {"exe":"/home/pjvan_thof/pipelines/test/test"},
diff --git a/flexiprep/nbactions.xml b/flexiprep/nbactions.xml
index 0f53adede..47a3066de 100644
--- a/flexiprep/nbactions.xml
+++ b/flexiprep/nbactions.xml
@@ -10,7 +10,7 @@
                 <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
             </goals>
             <properties>
-                <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep -R2 bla.fastq.gz -R1 ../input_R1.fastq -outputDir /home/pjvan_thof/pipelines/test -config /home/pjvan_thof/pipelines/biopet/flexiprep/examples/test.json -l DEBUG</exec.args>
+                <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep -R2 bla.fastq.gz -R1 ../input_R1.fastq.bz2 -outputDir /home/pjvan_thof/pipelines/test -config /home/pjvan_thof/pipelines/biopet/flexiprep/examples/test.json -l DEBUG</exec.args>
                 <exec.executable>java</exec.executable>
                 <exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir>
             </properties>
@@ -25,7 +25,7 @@
                 <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
             </goals>
             <properties>
-                <exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep -R2 bla.fastq.gz -R1 ../input_R1.fastq -outputDir /home/pjvan_thof/pipelines/test -config /home/pjvan_thof/pipelines/biopet/flexiprep/examples/test.json -l DEBUG</exec.args>
+                <exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep -R2 bla.fastq.gz -R1 ../input_R1.fastq.bz2 -outputDir /home/pjvan_thof/pipelines/test -config /home/pjvan_thof/pipelines/biopet/flexiprep/examples/test.json -l DEBUG</exec.args>
                 <exec.executable>java</exec.executable>
                 <jpda.listen>true</jpda.listen>
                 <exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir>
@@ -41,7 +41,7 @@
                 <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
             </goals>
             <properties>
-                <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep -R2 bla.fastq.gz -R1 ../input_R1.fastq -outputDir /home/pjvan_thof/pipelines/test -config /home/pjvan_thof/pipelines/biopet/flexiprep/examples/test.json -l DEBUG</exec.args>
+                <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep -R2 bla.fastq.gz -R1 ../input_R1.fastq.bz2 -outputDir /home/pjvan_thof/pipelines/test -config /home/pjvan_thof/pipelines/biopet/flexiprep/examples/test.json -l DEBUG</exec.args>
                 <exec.executable>java</exec.executable>
                 <exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir>
             </properties>
diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala
index 6d2bc320d..112da3f3c 100644
--- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala
+++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala
@@ -60,8 +60,8 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript {
   def biopetScript() {
     runInitialFastqc()
     
-    outputFiles += ("fastq_input_R1" -> zcatIfNeeded(input_R1,outputDir))
-    if (paired) outputFiles += ("fastq_input_R2" -> zcatIfNeeded(input_R2,outputDir))
+    outputFiles += ("fastq_input_R1" -> extractIfNeeded(input_R1,outputDir))
+    if (paired) outputFiles += ("fastq_input_R2" -> extractIfNeeded(input_R2,outputDir))
     
     addSeqstat(outputFiles("fastq_input_R1"), "seqstat_R1")
     if (paired) addSeqstat(outputFiles("fastq_input_R2"), "seqstat_R2")
@@ -237,17 +237,20 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript {
     return fastqcCommand
   }
   
-  def zcatIfNeeded(file:File, runDir:String) : File = {
+  def extractIfNeeded(file:File, runDir:String) : File = {
     if (file.getName().endsWith(".gz") || file.getName().endsWith(".gzip")) {
-      var newFile: File = swapExt(file,".gz","")
-      if (file.getName().endsWith(".gzip")) newFile = swapExt(file,".gzip","")
-      val zcatCommand = new Zcat(this)
-      zcatCommand.input = file
-      zcatCommand.output = new File(runDir + newFile)
-      //zcatCommand.jobOutputFile = outputDir + "." + file.getName + ".out"
+      var newFile: File = swapExt(runDir, file,".gz","")
+      if (file.getName().endsWith(".gzip")) newFile = swapExt(runDir, file,".gzip","")
+      val zcatCommand = Zcat(this, file, newFile)
       if (!this.skipClip || !this.skipTrim) zcatCommand.isIntermediate = true
       add(zcatCommand)
-      return zcatCommand.output
+      return newFile
+    } else if (file.getName().endsWith(".bz2")) {
+      var newFile = swapExt(runDir, file,".bz2","")
+      val pbzip2 = Pbzip2(this,file, newFile)
+      if (!this.skipClip || !this.skipTrim) pbzip2.isIntermediate = true
+      add(pbzip2)
+      return newFile
     } else return file
   }
   
-- 
GitLab