Skip to content
Snippets Groups Projects
Commit 96d44ee9 authored by bow's avatar bow
Browse files

Use sync tool and update its wrapper

parent 32d55edd
No related branches found
No related tags found
No related merge requests found
...@@ -9,10 +9,16 @@ ...@@ -9,10 +9,16 @@
package nl.lumc.sasc.biopet.tools package nl.lumc.sasc.biopet.tools
import java.io.File import java.io.File
import scala.io.Source
import scala.util.matching.Regex
import scala.annotation.tailrec import scala.annotation.tailrec
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
import argonaut._, Argonaut._
import scalaz._, Scalaz._
import htsjdk.samtools.fastq.{ BasicFastqWriter, FastqReader, FastqRecord } import htsjdk.samtools.fastq.{ BasicFastqWriter, FastqReader, FastqRecord }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.core.ToolCommand
...@@ -27,9 +33,60 @@ class FastqSync(val root: Configurable) extends BiopetJavaCommandLineFunction { ...@@ -27,9 +33,60 @@ class FastqSync(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName javaMainClass = getClass.getName
@Input(doc = "Original FASTQ file (read 1 or 2)", shortName = "r", required = true)
var refFastq: File = _
@Input(doc = "Input read 1 FASTQ file", shortName = "i", required = true)
var inputFastq1: File = _
@Input(doc = "Input read 2 FASTQ file", shortName = "j", required = true)
var inputFastq2: File = _
@Output(doc = "Output read 1 FASTQ file", shortName = "o", required = true)
var outputFastq1: File = _
@Output(doc = "Output read 2 FASTQ file", shortName = "p", required = true)
var outputFastq2: File = _
var outputStats: File = _
// executed command line
override def commandLine =
super.commandLine +
required("-r", refFastq) +
required("-i", inputFastq1) +
required("-j", inputFastq2) +
required("-o", outputFastq1) +
required("-p", outputFastq2) + " > " +
required(outputStats)
// summary statistics
def summary: Json = {
val regex = new Regex("""Filtered (\d*) reads from first read file.
|Filtered (\d*) reads from second read file.
|Synced read files contain (\d*) reads.""".stripMargin,
"R1", "R2", "RL")
val (countFilteredR1, countFilteredR2, countRLeft) =
if (outputStats.exists) {
val text = Source
.fromFile(outputStats)
.getLines()
.mkString("\n")
regex.findFirstMatchIn(text) match {
case None => (0, 0, 0)
case Some(rmatch) => (rmatch.group("R1").toInt, rmatch.group("R2").toInt, rmatch.group("RL").toInt)
}
} else (0, 0, 0)
("num_reads_discarded_R1" := countFilteredR1) ->:
("num_reads_discarded_R2" := countFilteredR2) ->:
("num_reads_kept" := countRLeft) ->:
jEmptyObject
}
} }
// TODO: implement reading from and writing to gzipped files
object FastqSync extends ToolCommand { object FastqSync extends ToolCommand {
/** /**
...@@ -139,6 +196,30 @@ object FastqSync extends ToolCommand { ...@@ -139,6 +196,30 @@ object FastqSync extends ToolCommand {
println("Synced read files contain %d reads.".format(counts.numKept)) println("Synced read files contain %d reads.".format(counts.numKept))
} }
/** Function to merge this tool's summary with summaries from other objects */
// TODO: refactor this into the object? At least make it work on the summary object
def mergeSummaries(jsons: List[Json]): Json = {
val (read1FilteredCount, read2FilteredCount, readsLeftCount) = jsons
// extract the values we require from each JSON object into tuples
.map {
case json =>
(json.field("num_reads_discarded_R1").get.numberOrZero.toInt,
json.field("num_reads_discarded_R2").get.numberOrZero.toInt,
json.field("num_reads_kept").get.numberOrZero.toInt)
}
// reduce the tuples
.reduceLeft {
(x: (Int, Int, Int), y: (Int, Int, Int)) =>
(x._1 + y._1, x._2 + y._2, x._3 + y._3)
}
("num_reads_discarded_R1" := read1FilteredCount) ->:
("num_reads_discarded_R2" := read2FilteredCount) ->:
("num_reads_kept" := readsLeftCount) ->:
jEmptyObject
}
case class Args(refFastq: File = new File(""), case class Args(refFastq: File = new File(""),
inputFastq1: File = new File(""), inputFastq1: File = new File(""),
inputFastq2: File = new File(""), inputFastq2: File = new File(""),
......
...@@ -21,7 +21,8 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Argument } ...@@ -21,7 +21,8 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Argument }
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.{ Gzip, Pbzip2, Md5sum, Zcat, Seqstat } import nl.lumc.sasc.biopet.extensions.{ Gzip, Pbzip2, Md5sum, Zcat, Seqstat }
import nl.lumc.sasc.biopet.scripts.{ FastqSync } //import nl.lumc.sasc.biopet.scripts.FastqSync
import nl.lumc.sasc.biopet.tools.FastqSync
class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
def this() = this(null) def this() = this(null)
...@@ -182,15 +183,20 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { ...@@ -182,15 +183,20 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
R2 = cutadapt_R2.fastq_output R2 = cutadapt_R2.fastq_output
deps ::= R2 deps ::= R2
val fastqSync = FastqSync(this, cutadapt_R1.fastq_input, cutadapt_R1.fastq_output, cutadapt_R2.fastq_output, val fqSync = new FastqSync(this)
swapExt(outDir, R1, R1_ext, ".sync" + R1_ext), swapExt(outDir, R2, R2_ext, ".sync" + R2_ext), swapExt(outDir, R1, R1_ext, ".sync.stats")) fqSync.refFastq = cutadapt_R1.fastq_input
fastqSync.deps :::= deps fqSync.inputFastq1 = cutadapt_R1.fastq_output
fastqSync.isIntermediate = true fqSync.inputFastq2 = cutadapt_R2.fastq_output
add(fastqSync) fqSync.outputFastq1 = swapExt(outDir, R1, R1_ext, ".sync" + R1_ext)
summary.addFastqcSync(fastqSync, chunk) fqSync.outputFastq2 = swapExt(outDir, R2, R2_ext, ".sync" + R2_ext)
outputFiles += ("syncStats" -> fastqSync.output_stats) fqSync.outputStats = swapExt(outDir, R1, R1_ext, ".sync.stats")
R1 = fastqSync.output_R1 fqSync.deps :::= deps
R2 = fastqSync.output_R2 add(fqSync)
summary.addFastqcSync(fqSync, chunk)
outputFiles += ("syncStats" -> fqSync.outputStats)
R1 = fqSync.outputFastq1
R2 = fqSync.outputFastq2
deps :::= R1 :: R2 :: Nil deps :::= R1 :: R2 :: Nil
} }
} }
......
...@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep ...@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep
import java.io.PrintWriter import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.{ Md5sum, Seqstat } import nl.lumc.sasc.biopet.extensions.{ Md5sum, Seqstat }
import nl.lumc.sasc.biopet.scripts.{ FastqSync } import nl.lumc.sasc.biopet.tools.FastqSync
import org.broadinstitute.gatk.queue.function.InProcessFunction import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File import java.io.File
...@@ -112,8 +112,8 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co ...@@ -112,8 +112,8 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co
def addFastqcSync(fastqSync: FastqSync, chunk: String = ""): FastqSync = { def addFastqcSync(fastqSync: FastqSync, chunk: String = ""): FastqSync = {
if (!chunks.contains(chunk)) chunks += (chunk -> new Chunk) if (!chunks.contains(chunk)) chunks += (chunk -> new Chunk)
chunks(chunk).fastqSync = fastqSync chunks(chunk).fastqSync = fastqSync
deps ::= fastqSync.output_stats deps ::= fastqSync.outputStats
return fastqSync fastqSync
} }
// format: OFF // format: OFF
override def run { override def run {
...@@ -223,11 +223,13 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co ...@@ -223,11 +223,13 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co
jEmptyObject) jEmptyObject)
} }
def syncstatSummary(): Option[Json] = { def syncstatSummary(): Option[Json] =
if (flexiprep.skipClip || !flexiprep.paired) return None if (flexiprep.skipClip || !flexiprep.paired)
val s = for ((key, value) <- chunks) yield value.fastqSync.getSummary None
return Option(FastqSync.mergeSummaries(s.toList)) else {
} val s = for ((key, value) <- chunks) yield value.fastqSync.summary
Option(FastqSync.mergeSummaries(s.toList))
}
def trimstatSummary(): Option[Json] = { def trimstatSummary(): Option[Json] = {
if (flexiprep.skipTrim) return None if (flexiprep.skipTrim) return None
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment