Commit 15658350 authored by Peter van 't Hof's avatar Peter van 't Hof

Merge branch 'feature-full-fastq-to-gears' into feature-centrifuge_pipe

parents 531c04b7 6e66f2cc
......@@ -26,6 +26,50 @@ class Centrifuge(val root: Configurable) extends BiopetCommandLineFunction with
@Output(doc = "Output with hits per sequence")
var report: Option[File] = None
@Output(required = false)
var un: Option[File] = None
@Output(required = false)
var al: Option[File] = None
@Output(required = false)
var unConc: Option[File] = None
@Output(required = false)
var alConc: Option[File] = None
@Output(required = false)
var metFile: Option[File] = None
// Input args
var q: Boolean = config("q", default = false)
var qseq: Boolean = config("qseq", default = false)
var f: Boolean = config("f", default = false)
var r: Boolean = config("r", default = false)
var c: Boolean = config("c", default = false)
var skip: Option[Int] = config("skip")
var upto: Option[Int] = config("upto")
var trim5: Option[Int] = config("trim5")
var trim3: Option[Int] = config("trim3")
var phred33: Boolean = config("phred33", default = false)
var phred64: Boolean = config("phred64", default = false)
var intQuals: Boolean = config("int_quals", default = false)
var ignoreQuals: Boolean = config("ignore_quals", default = false)
var nofw: Boolean = config("nofw", default = false)
var norc: Boolean = config("norc", default = false)
// Classification args
var minHitlen: Option[Int] = config("min_hitlen")
var minTotallen: Option[Int] = config("min_totallen")
var hostTaxids: List[Int] = config("host_taxids", default = Nil)
var excludeTaxids: List[Int] = config("exclude_taxids", default = Nil)
// Output args
var t: Boolean = config("t", default = false)
var quiet: Boolean = config("quiet", default = false)
var metStderr: Boolean = config("met_stderr", default = false)
var met: Option[Int] = config("met")
override def defaultThreads = 8
executable = config("exe", default = "centrifuge", freeVar = false)
......@@ -49,7 +93,34 @@ class Centrifuge(val root: Configurable) extends BiopetCommandLineFunction with
* @return Command to run
*/
def cmdLine: String = executable +
//TODO: Options
conditional(q, "-q") +
conditional(qseq, "--qseq") +
conditional(f, "-f") +
conditional(r, "-r") +
conditional(c, "-c") +
optional("--skip", skip) +
optional("--upto", upto) +
optional("--trim5", trim5) +
optional("--trim3", trim3) +
conditional(phred33, "--phred33") +
conditional(phred64, "--phred64") +
conditional(intQuals, "--int-quals") +
conditional(ignoreQuals, "--ignore-quals") +
conditional(nofw, "--nofw") +
conditional(norc, "--norc") +
optional("--min-hitlen", minHitlen) +
optional("--min-totallen", minTotallen) +
optional("--host-taxids", if (hostTaxids.nonEmpty) Some(hostTaxids.mkString(",")) else None) +
optional("--exclude-taxids", if (excludeTaxids.nonEmpty) Some(excludeTaxids.mkString(",")) else None) +
optional("--met-file", metFile) +
conditional(t, "-t") +
conditional(quiet, "--quiet") +
conditional(metStderr, "--met-stderr") +
optional("--met", met) +
optional(if (un.exists(_.getName.endsWith(".gz"))) "--un-gz" else "--un", un) +
optional(if (al.exists(_.getName.endsWith(".gz"))) "--al-gz" else "--al", al) +
optional(if (unConc.exists(_.getName.endsWith(".gz"))) "--un-conc-gz" else "--un-conc", unConc) +
optional(if (alConc.exists(_.getName.endsWith(".gz"))) "--al-conc-gz" else "--al-conc", alConc) +
optional("--threads", threads) +
required("-x", index) +
(inputR2 match {
......
......@@ -163,8 +163,8 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
gs.libId = Some(libId)
gs.outputDir = libDir
gs.fastqR1 = Some(addDownsample(flexiprep.fastqR1Qc, gs.outputDir))
gs.fastqR2 = flexiprep.fastqR2Qc.map(addDownsample(_, gs.outputDir))
gs.fastqR1 = List(addDownsample(flexiprep.fastqR1Qc, gs.outputDir))
gs.fastqR2 = flexiprep.fastqR2Qc.map(addDownsample(_, gs.outputDir)).toList
add(gs)
}
}
......@@ -194,8 +194,8 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
add(Zcat(qscript, flexipreps.flatMap(_.fastqR2Qc)) | new Gzip(qscript) > file)
}
gearsSingle.fastqR1 = Some(addDownsample(mergeR1, gearsSingle.outputDir))
gearsSingle.fastqR2 = mergeR2.map(addDownsample(_, gearsSingle.outputDir))
gearsSingle.fastqR1 = List(addDownsample(mergeR1, gearsSingle.outputDir))
gearsSingle.fastqR2 = mergeR2.map(addDownsample(_, gearsSingle.outputDir)).toList
add(gearsSingle)
}
......
......@@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.pipelines.gears
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.BiopetQScript.InputFile
import nl.lumc.sasc.biopet.core.{ PipelineCommand, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.{ Gzip, Zcat }
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
......@@ -29,10 +30,10 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
def this() = this(null)
@Input(doc = "R1 reads in FastQ format", shortName = "R1", required = false)
var fastqR1: Option[File] = None
var fastqR1: List[File] = Nil
@Input(doc = "R2 reads in FastQ format", shortName = "R2", required = false)
var fastqR2: Option[File] = None
var fastqR2: List[File] = Nil
@Input(doc = "All unmapped reads will be extracted from this bam for analysis", shortName = "bam", required = false)
var bamFile: Option[File] = None
......@@ -40,8 +41,8 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
@Argument(required = false)
var outputName: String = _
lazy val krakenScript = if (config("gears_use_kraken", default = true)) Some(new GearsKraken(this)) else None
lazy val centrifugeScript = if (config("gears_use_centrifuge", default = false)) Some(new GearsCentrifuge(this)) else None
lazy val krakenScript = if (config("gears_use_kraken", default = false)) Some(new GearsKraken(this)) else None
lazy val centrifugeScript = if (config("gears_use_centrifuge", default = true)) Some(new GearsCentrifuge(this)) else None
lazy val qiimeRatx = if (config("gears_use_qiime_rtax", default = false)) Some(new GearsQiimeRtax(this)) else None
lazy val qiimeClosed = if (config("gears_use_qiime_closed", default = false)) Some(new GearsQiimeClosed(this)) else None
lazy val qiimeOpen = if (config("gears_use_qiime_open", default = false)) Some(new GearsQiimeOpen(this)) else None
......@@ -49,12 +50,13 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
/** Executed before running the script */
def init(): Unit = {
if (!fastqR1.isDefined && !bamFile.isDefined) Logging.addError("Please specify fastq-file(s) or bam file")
if (fastqR1.isDefined == bamFile.isDefined) Logging.addError("Provide either a bam file or a R1/R2 file")
if (fastqR1.isEmpty && !bamFile.isDefined) Logging.addError("Please specify fastq-file(s) or bam file")
if (fastqR1.nonEmpty == bamFile.isDefined) Logging.addError("Provide either a bam file or a R1/R2 file")
if (fastqR2.nonEmpty && fastqR1.size != fastqR2.size) Logging.addError("R1 and R2 has not the same number of files")
if (sampleId == null || sampleId == None) Logging.addError("Missing sample ID on GearsSingle module")
if (outputName == null) {
if (fastqR1.isDefined) outputName = fastqR1.map(_.getName
if (fastqR1.nonEmpty) outputName = fastqR1.headOption.map(_.getName
.stripSuffix(".gz")
.stripSuffix(".fastq")
.stripSuffix(".fq"))
......@@ -62,7 +64,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName")
}
if (fastqR1.isDefined) {
if (fastqR1.nonEmpty) {
fastqR1.foreach(inputFiles :+= InputFile(_))
fastqR2.foreach(inputFiles :+= InputFile(_))
} else bamFile.foreach(inputFiles :+= InputFile(_))
......@@ -79,30 +81,42 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
protected def executeFlexiprep(r1: File, r2: Option[File]): (File, Option[File]) = {
protected def executeFlexiprep(r1: List[File], r2: List[File]): (File, Option[File]) = {
val read1: File = if (r1.size == 1) r1.head else {
val outputFile = new File(outputDir, "merged.R1.fq.gz")
Zcat(this, r1) | new Gzip(this) > outputFile
outputFile
}
val read2: Option[File] = if (r2.size <= 1) r2.headOption else {
val outputFile = new File(outputDir, "merged.R2.fq.gz")
Zcat(this, r2) | new Gzip(this) > outputFile
Some(outputFile)
}
if (!skipFlexiprep) {
val flexiprep = new Flexiprep(this)
flexiprep.inputR1 = r1
flexiprep.inputR2 = r2
flexiprep.inputR1 = read1
flexiprep.inputR2 = read2
flexiprep.sampleId = if (sampleId.isEmpty) Some("noSampleName") else sampleId
flexiprep.libId = if (libId.isEmpty) Some("noLibName") else libId
flexiprep.outputDir = new File(outputDir, "flexiprep")
add(flexiprep)
(flexiprep.fastqR1Qc, flexiprep.fastqR2Qc)
} else (r1, r2)
} else (read1, read2)
}
/** Method to add jobs */
def biopetScript(): Unit = {
val (r1, r2): (File, Option[File]) = (fastqR1, fastqR2, bamFile) match {
case (Some(r1), _, _) => executeFlexiprep(r1, fastqR2)
case (r1, _, _) if r1.nonEmpty => executeFlexiprep(r1, fastqR2)
case (_, _, Some(bam)) =>
val extract = new ExtractUnmappedReads(this)
extract.outputDir = outputDir
extract.bamFile = bam
extract.outputName = outputName
add(extract)
executeFlexiprep(extract.fastqUnmappedR1, extract.fastqUnmappedR2)
executeFlexiprep(extract.fastqUnmappedR1 :: Nil, extract.fastqUnmappedR2.toList)
case _ => throw new IllegalArgumentException("Missing input files")
}
......@@ -178,8 +192,8 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
/** Statistics shown in the summary file */
def summaryFiles: Map[String, File] = Map.empty ++
(if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
(if (fastqR1.isDefined) Map("input_R1" -> fastqR1.get) else Map()) ++
(if (fastqR2.isDefined) Map("input_R2" -> fastqR2.get) else Map()) ++
fastqR1.zipWithIndex.map(x => s"input_R1_${x._2}" -> x._1) ++
fastqR2.zipWithIndex.map(x => s"input_R2_${x._2}" -> x._1) ++
outputFiles
}
......
......@@ -23,23 +23,26 @@ class GearsSingleReport(val root: Configurable) extends ReportBuilderExtension {
object GearsSingleReport extends ReportBuilder {
// TODO: Add dustbin analysis (aggregated)
// TODO: Add alignment stats per sample for the dustbin analysis
override def extFiles = super.extFiles ++ List("js/gears.js", "js/krona-2.0.js", "img/krona/loading.gif", "img/krona/hidden.png", "img/krona/favicon.ico")
.map(x => ExtFile("/nl/lumc/sasc/biopet/pipelines/gears/report/ext/" + x, x))
def indexPage = {
val krakenExecuted = summary.getValue(sampleId, libId, "gearskraken", "stats", "krakenreport").isDefined
val centrifugeExecuted = summary.getValue(sampleId, libId, "gearscentrifuge", "stats", "centrifuge_report").isDefined
ReportPage(
List(
"Versions" -> ReportPage(List(), List((
"Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
))), Map())
),
List(
"Gears intro" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsSingleFront.ssp"),
"Kraken analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp")
"Versions" -> ReportPage(List(),
List(("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"))), Map())
),
List("Gears intro" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsSingleFront.ssp")) ++
(if (krakenExecuted) List("Kraken analysis" ->
ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp"))
else Nil) ++
(if (centrifugeExecuted) List("Centrifuge analysis" ->
ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryModuleTag" -> "gearscentrifuge", "summaryStatsTag" -> "centrifuge_unique_report")))
else Nil),
pageArgs
)
}
......
......@@ -80,8 +80,8 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers {
inputMode match {
case Some("fastq") =>
gears.fastqR1 = Some(TestGearsSingle.r1)
gears.fastqR2 = if (paired) Some(TestGearsSingle.r2) else None
gears.fastqR1 = List(TestGearsSingle.r1)
gears.fastqR2 = if (paired) List(TestGearsSingle.r2) else Nil
case Some("bam") => gears.bamFile = Some(TestGearsSingle.bam)
case None =>
case _ => new IllegalStateException(s"$inputMode not allowed as inputMode")
......@@ -108,12 +108,12 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers {
val pipesJobs = gears.functions.filter(_.isInstanceOf[BiopetPipe]).flatMap(_.asInstanceOf[BiopetPipe].pipesJobs)
gears.summarySettings("gears_use_kraken") shouldBe kraken.getOrElse(true)
gears.summarySettings("gears_use_kraken") shouldBe kraken.getOrElse(false)
gears.summarySettings("gear_use_qiime_rtax") shouldBe qiimeRtax
gears.summarySettings("gear_use_qiime_closed") shouldBe qiimeClosed
gears.summarySettings("gear_use_qiime_open") shouldBe qiimeOpen
gears.krakenScript.isDefined shouldBe kraken.getOrElse(true)
gears.krakenScript.isDefined shouldBe kraken.getOrElse(false)
gears.centrifugeScript.isDefined shouldBe centrifuge
gears.qiimeClosed.isDefined shouldBe qiimeClosed
gears.qiimeOpen.isDefined shouldBe qiimeOpen
......@@ -124,10 +124,10 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers {
gears.functions.count(_.isInstanceOf[SamtoolsView]) shouldBe (if (inputMode == Some("bam")) 1 else 0)
gears.functions.count(_.isInstanceOf[SamToFastq]) shouldBe (if (inputMode == Some("bam")) 1 else 0)
gears.functions.count(_.isInstanceOf[Kraken]) shouldBe (if (kraken.getOrElse(true)) 1 else 0)
gears.functions.count(_.isInstanceOf[KrakenReport]) shouldBe (if (kraken.getOrElse(true)) 1 else 0)
gears.functions.count(_.isInstanceOf[Kraken]) shouldBe (if (kraken.getOrElse(false)) 1 else 0)
gears.functions.count(_.isInstanceOf[KrakenReport]) shouldBe (if (kraken.getOrElse(false)) 1 else 0)
gears.functions.count(_.isInstanceOf[KrakenReportToJson]) shouldBe
((if (kraken.getOrElse(true)) 1 else 0) + (if (centrifuge) 2 else 0))
((if (kraken.getOrElse(false)) 1 else 0) + (if (centrifuge) 2 else 0))
gears.functions.count(_.isInstanceOf[Centrifuge]) shouldBe (if (centrifuge) 1 else 0)
gears.functions.count(_.isInstanceOf[CentrifugeKreport]) shouldBe (if (centrifuge) 2 else 0)
......
......@@ -130,7 +130,10 @@ object GearsTest {
"seqtk" -> Map("exe" -> "test"),
"sickle" -> Map("exe" -> "test"),
"cutadapt" -> Map("exe" -> "test"),
"pickopenreferenceotus" -> Map("exe" -> "test")
"centrifuge" -> Map("exe" -> "test"),
"centrifuge_index" -> "test",
"pickopenreferenceotus" -> Map("exe" -> "test"),
"centrifugekreport" -> Map("exe" -> "test")
)
val sample1 = Map(
......
......@@ -218,6 +218,14 @@ trait MultisampleMappingTrait extends MultiSampleQScript
/** Default is set to keep the merged files, user can set this in the config. To change the default this method can be overriden */
def keepMergedFiles: Boolean = config("keep_merged_files", default = true)
/**
* @deprecated
*/
lazy val unmappedToGears: Boolean = config("unmapped_to_gears", default = false)
if (config.contains("unmapped_to_gears")) logger.warn("Config value 'unmapped_to_gears' is replaced with 'mapping_to_gears', Assumes default: mapping_to_gears=unmapped")
lazy val mappingToGears: String = config("mapping_to_gears", default = if (unmappedToGears) "unmapped" else "none")
/** This method can be extended to add jobs to the pipeline, to do this the super call of this function must be called by the pipelines */
def addJobs(): Unit = {
addPerLibJobs() // This add jobs for each library
......@@ -249,12 +257,22 @@ trait MultisampleMappingTrait extends MultiSampleQScript
if (config("execute_bam2wig", default = true)) add(Bam2Wig(qscript, preProcessBam.get))
}
if (config("unmapped_to_gears", default = false) && libraries.flatMap(_._2.bamFile).nonEmpty) {
val gears = new GearsSingle(qscript)
gears.bamFile = preProcessBam
gears.sampleId = Some(sampleId)
gears.outputDir = new File(sampleDir, "gears")
add(gears)
mappingToGears match {
case "unmapped" =>
val gears = new GearsSingle(qscript)
gears.bamFile = preProcessBam
gears.sampleId = Some(sampleId)
gears.outputDir = new File(sampleDir, "gears")
add(gears)
case "all" =>
val gears = new GearsSingle(qscript)
gears.fastqR1 = libraries.flatMap(_._2.inputR1).toList
gears.fastqR2 = libraries.flatMap(_._2.inputR2).toList
gears.sampleId = Some(sampleId)
gears.outputDir = new File(sampleDir, "gears")
add(gears)
case "none" =>
case x => Logging.addError(s"${x} is not a valid value for 'mapping_to_gears'")
}
}
}
......
......@@ -14,9 +14,10 @@
*/
package nl.lumc.sasc.biopet.pipelines.mapping
import java.io.{ File, FileOutputStream }
import java.io.{File, FileOutputStream}
import com.google.common.io.Files
import nl.lumc.sasc.biopet.extensions.centrifuge.Centrifuge
import nl.lumc.sasc.biopet.extensions.kraken.Kraken
import nl.lumc.sasc.biopet.pipelines.flexiprep.Fastqc
import nl.lumc.sasc.biopet.utils.ConfigUtils
......@@ -25,7 +26,7 @@ import org.apache.commons.io.FileUtils
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ AfterClass, BeforeClass, DataProvider, Test }
import org.testng.annotations.{AfterClass, BeforeClass, DataProvider, Test}
/**
* Test class for [[Mapping]]
......@@ -88,7 +89,7 @@ abstract class AbstractTestMapping(val aligner: String) extends TestNGSuite with
//Flexiprep
mapping.functions.count(_.isInstanceOf[Fastqc]) shouldBe (if (skipFlexiprep) 0 else if (paired) 4 else 2)
mapping.functions.count(_.isInstanceOf[Kraken]) shouldBe (if (unmappedToGears) 1 else 0)
mapping.functions.count(_.isInstanceOf[Centrifuge]) shouldBe (if (unmappedToGears) 1 else 0)
}
val outputDir = Files.createTempDir()
......@@ -143,6 +144,9 @@ abstract class AbstractTestMapping(val aligner: String) extends TestNGSuite with
"samtools" -> Map("exe" -> "test"),
"kraken" -> Map("exe" -> "test", "db" -> "test"),
"krakenreport" -> Map("exe" -> "test", "db" -> "test"),
"centrifuge" -> Map("exe" -> "test"),
"centrifugekreport" -> Map("exe" -> "test"),
"centrifuge_index" -> "test",
"md5sum" -> Map("exe" -> "test")
)
......
......@@ -14,17 +14,18 @@
*/
package nl.lumc.sasc.biopet.pipelines.mapping
import java.io.{ File, FileOutputStream }
import java.io.{File, FileOutputStream}
import com.google.common.io.Files
import nl.lumc.sasc.biopet.extensions.centrifuge.Centrifuge
import nl.lumc.sasc.biopet.extensions.kraken.Kraken
import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, MergeSamFiles }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
import nl.lumc.sasc.biopet.extensions.picard.{MarkDuplicates, MergeSamFiles}
import nl.lumc.sasc.biopet.utils.{ConfigUtils, Logging}
import nl.lumc.sasc.biopet.utils.config.Config
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ DataProvider, Test }
import org.testng.annotations.{DataProvider, Test}
/**
* Created by pjvanthof on 15/05/16.
......@@ -102,7 +103,7 @@ trait MultisampleMappingTestTrait extends TestNGSuite with Matchers {
}
}
pipeline.functions.count(_.isInstanceOf[Kraken]) shouldBe (if (unmappedToGears) (numberFastqLibs + numberSamples) else 0)
pipeline.functions.count(_.isInstanceOf[Centrifuge]) shouldBe (if (unmappedToGears) (numberFastqLibs + numberSamples) else 0)
pipeline.summarySettings.get("merge_strategy") shouldBe Some(merge.toString)
}
......@@ -198,6 +199,9 @@ object MultisampleMappingTestTrait {
"wigtobigwig" -> Map("exe" -> "test"),
"kraken" -> Map("exe" -> "test", "db" -> "test"),
"krakenreport" -> Map("exe" -> "test", "db" -> "test"),
"centrifuge" -> Map("exe" -> "test"),
"centrifugekreport" -> Map("exe" -> "test"),
"centrifuge_index" -> "test",
"md5sum" -> Map("exe" -> "test")
)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment