From 66dc8af24f67b0d97027bcec74e06e7fd08f3ca5 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Mon, 23 Jun 2014 16:42:11 +0200 Subject: [PATCH] Added bz2 support --- .../nl/lumc/sasc/biopet/function/Pbzip2.scala | 40 +++++++++++++++++++ .../nl/lumc/sasc/biopet/function/Zcat.scala | 9 +++++ flexiprep/examples/test.json | 1 + flexiprep/nbactions.xml | 6 +-- .../pipelines/flexiprep/Flexiprep.scala | 23 ++++++----- 5 files changed, 66 insertions(+), 13 deletions(-) create mode 100644 biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Pbzip2.scala diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Pbzip2.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Pbzip2.scala new file mode 100644 index 000000000..05b4dcae4 --- /dev/null +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Pbzip2.scala @@ -0,0 +1,40 @@ +package nl.lumc.sasc.biopet.function + +import nl.lumc.sasc.biopet.core._ +import nl.lumc.sasc.biopet.core.config._ +//import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.commandline._ +import java.io.File + +class Pbzip2(val root:Configurable) extends BiopetCommandLineFunction { + @Input(doc="Zipped file") + var input: File = _ + + @Output(doc="Unzipped file") + var output: File = _ + + executeble = config("exe", "pbzip2") + + var decomrpess = true + var memory: Int = config("memory", 1000) + + override val defaultVmem = (memory * 2 / 1000) + "G" + override val defaultThreads = 2 + + def cmdLine = required(executeble) + + conditional(decomrpess, "-d") + + conditional(!decomrpess, "-z") + + optional("-p", threads, spaceSeparated=false) + + optional("-m", memory, spaceSeparated=false) + + required("-c", output) + + required(input) +} + +object Pbzip2 { + def apply(root:Configurable, input:File, output:File): Pbzip2 = { + val pbzip2 = new Pbzip2(root) + pbzip2.input = input + pbzip2.output = output + return pbzip2 + } +} \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala index 15b01d922..fc3245e07 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala @@ -16,4 +16,13 @@ class Zcat(val root:Configurable) extends BiopetCommandLineFunction { executeble = config("exe", "zcat") def cmdLine = required(executeble) + required(input) + " > " + required(output) +} + +object Zcat { + def apply(root:Configurable, input:File, output:File): Zcat = { + val zcat = new Zcat(root) + zcat.input = input + zcat.output = output + return zcat + } } \ No newline at end of file diff --git a/flexiprep/examples/test.json b/flexiprep/examples/test.json index 66c92f595..285265ef0 100644 --- a/flexiprep/examples/test.json +++ b/flexiprep/examples/test.json @@ -1,6 +1,7 @@ { "fastqc": { "exe": "/home/pjvan_thof/Downloads/FastQC/fastqc" }, "gatk": {"flexiprep": { "sdfg": { "exe": "gatk" }}}, + "pbzip2": { "exe": "/home/pjvan_thof/pipelines/test/test" }, "flexiprep": { "fastqc": { "exe": "/home/pjvan_thof/pipelines/test/test" }, "cutadapt": {"exe":"/home/pjvan_thof/pipelines/test/test"}, diff --git a/flexiprep/nbactions.xml b/flexiprep/nbactions.xml index 0f53adede..47a3066de 100644 --- a/flexiprep/nbactions.xml +++ b/flexiprep/nbactions.xml @@ -10,7 +10,7 @@ <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> </goals> <properties> - <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep -R2 bla.fastq.gz -R1 ../input_R1.fastq -outputDir /home/pjvan_thof/pipelines/test -config /home/pjvan_thof/pipelines/biopet/flexiprep/examples/test.json -l DEBUG</exec.args> + <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep -R2 bla.fastq.gz -R1 ../input_R1.fastq.bz2 -outputDir /home/pjvan_thof/pipelines/test -config /home/pjvan_thof/pipelines/biopet/flexiprep/examples/test.json -l DEBUG</exec.args> <exec.executable>java</exec.executable> <exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir> </properties> @@ -25,7 +25,7 @@ <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> </goals> <properties> - <exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep -R2 bla.fastq.gz -R1 ../input_R1.fastq -outputDir /home/pjvan_thof/pipelines/test -config /home/pjvan_thof/pipelines/biopet/flexiprep/examples/test.json -l DEBUG</exec.args> + <exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep -R2 bla.fastq.gz -R1 ../input_R1.fastq.bz2 -outputDir /home/pjvan_thof/pipelines/test -config /home/pjvan_thof/pipelines/biopet/flexiprep/examples/test.json -l DEBUG</exec.args> <exec.executable>java</exec.executable> <jpda.listen>true</jpda.listen> <exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir> @@ -41,7 +41,7 @@ <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> </goals> <properties> - <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep -R2 bla.fastq.gz -R1 ../input_R1.fastq -outputDir /home/pjvan_thof/pipelines/test -config /home/pjvan_thof/pipelines/biopet/flexiprep/examples/test.json -l DEBUG</exec.args> + <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep -R2 bla.fastq.gz -R1 ../input_R1.fastq.bz2 -outputDir /home/pjvan_thof/pipelines/test -config /home/pjvan_thof/pipelines/biopet/flexiprep/examples/test.json -l DEBUG</exec.args> <exec.executable>java</exec.executable> <exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir> </properties> diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 6d2bc320d..112da3f3c 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -60,8 +60,8 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { def biopetScript() { runInitialFastqc() - outputFiles += ("fastq_input_R1" -> zcatIfNeeded(input_R1,outputDir)) - if (paired) outputFiles += ("fastq_input_R2" -> zcatIfNeeded(input_R2,outputDir)) + outputFiles += ("fastq_input_R1" -> extractIfNeeded(input_R1,outputDir)) + if (paired) outputFiles += ("fastq_input_R2" -> extractIfNeeded(input_R2,outputDir)) addSeqstat(outputFiles("fastq_input_R1"), "seqstat_R1") if (paired) addSeqstat(outputFiles("fastq_input_R2"), "seqstat_R2") @@ -237,17 +237,20 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { return fastqcCommand } - def zcatIfNeeded(file:File, runDir:String) : File = { + def extractIfNeeded(file:File, runDir:String) : File = { if (file.getName().endsWith(".gz") || file.getName().endsWith(".gzip")) { - var newFile: File = swapExt(file,".gz","") - if (file.getName().endsWith(".gzip")) newFile = swapExt(file,".gzip","") - val zcatCommand = new Zcat(this) - zcatCommand.input = file - zcatCommand.output = new File(runDir + newFile) - //zcatCommand.jobOutputFile = outputDir + "." + file.getName + ".out" + var newFile: File = swapExt(runDir, file,".gz","") + if (file.getName().endsWith(".gzip")) newFile = swapExt(runDir, file,".gzip","") + val zcatCommand = Zcat(this, file, newFile) if (!this.skipClip || !this.skipTrim) zcatCommand.isIntermediate = true add(zcatCommand) - return zcatCommand.output + return newFile + } else if (file.getName().endsWith(".bz2")) { + var newFile = swapExt(runDir, file,".bz2","") + val pbzip2 = Pbzip2(this,file, newFile) + if (!this.skipClip || !this.skipTrim) pbzip2.isIntermediate = true + add(pbzip2) + return newFile } else return file } -- GitLab