Commit 2727f142 authored by bow's avatar bow
Browse files

Merge branch 'feature-centrifuge' into 'develop'

Added base version of centrifuge extension

This is adding centrifuge, this will later on replace kraken.

See merge request !452
parents 0c7e6dae 290f37f6
......@@ -35,11 +35,11 @@ class Cuffquant(val root: Configurable) extends BiopetCommandLineFunction with V
means we have 2 samples, each with 2 replicates
so our input is a list of lists of Files
*/
var input: List[List[File]] = List.empty[List[File]]
var input: List[List[File]] = Nil
/** input GTF file */
@Input(doc = "Input GTF file", required = true)
var transcriptsGtf: File = null
var transcriptsGtf: File = _
/** output file, computed automatically from output directory */
@Output(doc = "Output CXB file")
......
......@@ -40,6 +40,8 @@ class Gzip(val root: Configurable) extends BiopetCommandLineFunction with Versio
object Gzip {
def apply(root: Configurable): Gzip = new Gzip(root)
def apply(root: Configurable, input: File, output: File): Gzip = Gzip(root, List(input), output)
def apply(root: Configurable, input: List[File], output: File): Gzip = {
val gzip = new Gzip(root)
gzip.input = input
......
package nl.lumc.sasc.biopet.extensions.centrifuge
import java.io.File
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.util.matching.Regex
/**
* Created by pjvanthof on 19/09/16.
*/
class Centrifuge(val root: Configurable) extends BiopetCommandLineFunction with Version {
@Input(doc = "Input: FastQ or FastA", required = true)
var inputR1: File = _
@Input(doc = "Input: FastQ or FastA", required = false)
var inputR2: Option[File] = None
var index: File = config("centrifuge_index")
@Output(doc = "Output with hits per sequence")
var output: File = _
@Output(doc = "Output with hits per sequence")
var report: Option[File] = None
override def defaultThreads = 8
executable = config("exe", default = "centrifuge", freeVar = false)
/** Command to get version of executable */
def versionCommand: String = s"$executable --version"
/** Regex to get version from version command output */
def versionRegex: Regex = ".* version (.*)".r
override def beforeGraph(): Unit = {
super.beforeGraph()
deps :+= new File(index + ".1.cf")
deps :+= new File(index + ".2.cf")
deps :+= new File(index + ".3.cf")
}
/**
* This function needs to be implemented to define the command that is executed
*
* @return Command to run
*/
def cmdLine: String = executable +
//TODO: Options
optional("--threads", threads) +
required("-x", index) +
(inputR2 match {
case Some(r2) => required("-1", inputR1) + required("-2", r2)
case _ => required("-U", inputR1)
}) +
(if (outputAsStsout) "" else required("-S", output)) +
optional("--report-file", report)
}
package nl.lumc.sasc.biopet.extensions.centrifuge
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Created by pjvanthof on 19/09/16.
*/
class CentrifugeKreport(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Output files centrifuge", required = true)
var centrifugeOutputFiles: List[File] = Nil
@Output(doc = "Output report")
var output: File = _
var index: File = config("centrifuge_index", namespace = "centrifuge")
var onlyUnique: Boolean = config("only_unique", default = false)
var showZeros: Boolean = config("show_zeros", default = false)
var isCounts: Boolean = config("is_counts", default = false)
var minScore: Option[Double] = config("min_score")
var minLength: Option[Int] = config("min_length")
override def defaultCoreMemory = 4.0
executable = config("exe", default = "centrifuge-kreport", freeVar = false)
override def beforeGraph(): Unit = {
super.beforeGraph()
deps :+= new File(index + ".1.cf")
deps :+= new File(index + ".2.cf")
deps :+= new File(index + ".3.cf")
}
def cmdLine = executable +
conditional(onlyUnique, "--only-unique") +
conditional(showZeros, "--show-zeros") +
conditional(isCounts, "--is-counts") +
optional("--min-score=", minScore, spaceSeparated = false) +
optional("--min-length=", minLength, spaceSeparated = false) +
required("-x", index) +
repeat(centrifugeOutputFiles) +
" > " + required(output)
}
......@@ -42,7 +42,7 @@ class KrakenReportToJson(val root: Configurable) extends ToolCommandFunction wit
@Output(doc = "Output JSON", shortName = "output", required = true)
var output: File = _
override def defaultCoreMemory = 2.0
override def defaultCoreMemory = 4.0
override def cmdLine =
super.cmdLine +
......
......@@ -10,11 +10,13 @@
<%@ var libId: Option[String] = None %>
<%@ var args: Map[String, Any] %>
<%@ var outputDir: File %>
<%@ var summaryStatsTag: String = "krakenreport" %>
<%@ var summaryModuleTag: String = "gearskraken" %>
<%
val summaries = if (sampleId.isEmpty && libId.isEmpty) {
summary.getSampleValues("gearskraken", "stats", "krakenreport").map(x => x._1 -> x._2.get.asInstanceOf[Map[String, Any]])
} else summary.getValue(sampleId, libId, "gearskraken", "stats", "krakenreport").map(sampleId.get -> _.asInstanceOf[Map[String, Any]]).toList.toMap
summary.getSampleValues(summaryModuleTag, "stats", summaryStatsTag).map(x => x._1 -> x._2.get.asInstanceOf[Map[String, Any]])
} else summary.getValue(sampleId, libId, summaryModuleTag, "stats", summaryStatsTag).map(sampleId.get -> _.asInstanceOf[Map[String, Any]]).toList.toMap
val tempFile = File.createTempFile("krona.", ".xml")
tempFile.deleteOnExit()
......@@ -24,10 +26,10 @@
args ++ Map("kronaXml" -> tempFile))
val file = new File(outputDir, "kraken_krona.html")
val file = new File(outputDir, s"$summaryModuleTag-$summaryStatsTag.html")
val writer = new PrintWriter(file)
writer.println(output)
writer.close()
%>
<iframe src="kraken_krona.html" style="width:100%;height:80vh;border:none;"></iframe>
<iframe src="${summaryModuleTag}-${summaryStatsTag}.html" style="width:100%;height:80vh;border:none;"></iframe>
package nl.lumc.sasc.biopet.pipelines.gears
import nl.lumc.sasc.biopet.core.SampleLibraryTag
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.Gzip
import nl.lumc.sasc.biopet.extensions.centrifuge.{ Centrifuge, CentrifugeKreport }
import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvanthof on 19/09/16.
*/
class GearsCentrifuge(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
var fastqR1: File = _
var fastqR2: Option[File] = None
var outputName: String = _
override def fixedValues = Map("centrifugekreport" -> Map("only_unique" -> false))
def init(): Unit = {
require(fastqR1 != null)
require(outputName != null)
}
def centrifugeOutput = new File(outputDir, s"$outputName.centrifuge.gz")
def biopetScript(): Unit = {
val centrifuge = new Centrifuge(this)
centrifuge.inputR1 = fastqR1
centrifuge.inputR2 = fastqR2
centrifuge.output = new File(outputDir, s"$outputName.centrifuge")
centrifuge.report = Some(new File(outputDir, s"$outputName.centrifuge.report"))
centrifuge.isIntermediate = true
add(centrifuge)
add(Gzip(this, centrifuge.output, centrifugeOutput))
makeKreport(List(centrifuge.output), "centrifuge", unique = false)
makeKreport(List(centrifuge.output), "centrifuge_unique", unique = true)
addSummaryJobs()
}
protected def makeKreport(inputFiles: List[File], name: String, unique: Boolean): Unit = {
val centrifugeKreport = new CentrifugeKreport(this)
centrifugeKreport.centrifugeOutputFiles = inputFiles
centrifugeKreport.output = new File(outputDir, s"$outputName.$name.kreport")
centrifugeKreport.onlyUnique = unique
add(centrifugeKreport)
val krakenReportJSON = new KrakenReportToJson(this)
krakenReportJSON.inputReport = centrifugeKreport.output
krakenReportJSON.output = new File(outputDir, s"$outputName.$name.krkn.json")
krakenReportJSON.skipNames = config("skipNames", default = false)
add(krakenReportJSON)
addSummarizable(krakenReportJSON, s"${name}_report")
}
/** Location of summary file */
def summaryFile = new File(outputDir, sampleId.getOrElse("sampleName_unknown") + ".centrifuge.summary.json")
/** Pipeline settings shown in the summary file */
def summarySettings: Map[String, Any] = Map()
/** Statistics shown in the summary file */
def summaryFiles: Map[String, File] = outputFiles + ("input_R1" -> fastqR1, "centrifuge_output" -> centrifugeOutput) ++
(fastqR2 match {
case Some(file) => Map("input_R2" -> file)
case _ => Map()
})
}
......@@ -41,10 +41,7 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi
def init(): Unit = {
require(fastqR1 != null)
if (outputName == null) outputName = fastqR1.getName
.stripSuffix(".gz")
.stripSuffix(".fq")
.stripSuffix(".fastq")
require(outputName != null)
}
lazy val krakenConvertToFasta: Boolean = config("kraken_discard_quality", default = false)
......@@ -107,7 +104,7 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi
def summaryFile = new File(outputDir, sampleId.getOrElse("sampleName_unknown") + ".kraken.summary.json")
/** Pipeline settings shown in the summary file */
def summarySettings: Map[String, Any] = Map.empty
def summarySettings: Map[String, Any] = Map()
/** Statistics shown in the summary file */
def summaryFiles: Map[String, File] = outputFiles + ("input_R1" -> fastqR1) ++ (fastqR2 match {
......
......@@ -38,13 +38,20 @@ object GearsReport extends MultisampleReportBuilder {
def indexPage = {
val krakenExecuted = summary.getSampleValues("gearskraken", "stats", "krakenreport").values.forall(_.isDefined)
val centrifugeExecuted = summary.getSampleValues("gearscentrifuge", "stats", "centrifuge_report").values.forall(_.isDefined)
val qiimeClosesOtuTable = summary.getValue("gears", "files", "pipeline", "qiime_closed_otu_table", "path")
.map(x => new File(x.toString))
val qiimeOpenOtuTable = summary.getValue("gears", "files", "pipeline", "qiime_open_otu_table", "path")
.map(x => new File(x.toString))
ReportPage(
(if (krakenExecuted) List("Kraken analysis" -> ReportPage(List(), List(
(if (centrifugeExecuted) List("Centriguge analysis" -> ReportPage(List("Non-unique" -> ReportPage(List(), List("All mappings" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryStatsTag" -> "centrifuge_report")
)), Map())), List(
"Unique mappings" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryStatsTag" -> "centrifuge_unique_report")
)), Map("summaryModuleTag" -> "gearscentrifuge")))
else Nil) ::: (if (krakenExecuted) List("Kraken analysis" -> ReportPage(List(), List(
"Krona plot" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp"
)), Map()))
else Nil) ::: (if (qiimeClosesOtuTable.isDefined) List("Qiime closed reference analysis" -> ReportPage(List(), List(
......@@ -74,12 +81,19 @@ object GearsReport extends MultisampleReportBuilder {
/** Single sample page */
def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = {
val krakenExecuted = summary.getValue(Some(sampleId), None, "gearskraken", "stats", "krakenreport").isDefined
val centrifugeExecuted = summary.getValue(Some(sampleId), None, "gearscentrifuge", "stats", "centrifuge_report").isDefined
val qiimeClosesOtuTable = summary.getValue(Some(sampleId), None, "gearsqiimeclosed", "files", "pipeline", "otu_table", "path")
.map(x => new File(x.toString))
val qiimeOpenOtuTable = summary.getValue(Some(sampleId), None, "gearsqiimeopen", "files", "pipeline", "otu_table", "path")
.map(x => new File(x.toString))
ReportPage((if (krakenExecuted) List("Kraken" -> ReportPage(List(), List(
ReportPage((if (centrifugeExecuted) List("Centriguge analysis" -> ReportPage(List("Non-unique" -> ReportPage(List(), List("All mappings" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryStatsTag" -> "centrifuge_report")
)), Map())), List(
"Unique mappings" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryStatsTag" -> "centrifuge_unique_report")
)), Map("summaryModuleTag" -> "gearscentrifuge")))
else Nil) ::: (if (krakenExecuted) List("Kraken" -> ReportPage(List(), List(
"Kraken analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp"
)), Map()))
else Nil) ::: (if (qiimeClosesOtuTable.isDefined) List("Qiime closed reference analysis" -> ReportPage(List(), List(
......@@ -99,6 +113,7 @@ object GearsReport extends MultisampleReportBuilder {
def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = {
val flexiprepExecuted = summary.getLibraryValue(sampleId, libId, "flexiprep").isDefined
val krakenExecuted = summary.getValue(Some(sampleId), Some(libId), "gearskraken", "stats", "krakenreport").isDefined
val centrifugeExecuted = summary.getValue(Some(sampleId), Some(libId), "gearscentrifuge", "stats", "centrifuge_report").isDefined
val qiimeClosesOtuTable = summary.getValue(Some(sampleId), Some(libId), "gearsqiimeclosed", "files", "pipeline", "otu_table", "path")
.map(x => new File(x.toString))
val qiimeOpenOtuTable = summary.getValue(Some(sampleId), Some(libId), "gearsqiimeopen", "files", "pipeline", "otu_table", "path")
......@@ -106,7 +121,13 @@ object GearsReport extends MultisampleReportBuilder {
ReportPage(
(if (flexiprepExecuted) List("QC" -> FlexiprepReport.flexiprepPage) else Nil
) ::: (if (krakenExecuted) List("Kraken" -> ReportPage(List(), List(
) ::: (if (centrifugeExecuted) List("Centriguge analysis" -> ReportPage(List("Non-unique" -> ReportPage(List(), List("All mappings" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryStatsTag" -> "centrifuge_report")
)), Map())), List(
"Unique mappings" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryStatsTag" -> "centrifuge_unique_report")
)), Map("summaryModuleTag" -> "gearscentrifuge")))
else Nil) ::: (if (krakenExecuted) List("Kraken" -> ReportPage(List(), List(
"Kraken analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp"
)), Map()))
else Nil) ::: (if (qiimeClosesOtuTable.isDefined) List("Qiime closed reference analysis" -> ReportPage(List(), List(
......
......@@ -41,6 +41,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
var outputName: String = _
lazy val krakenScript = if (config("gears_use_kraken", default = true)) Some(new GearsKraken(this)) else None
lazy val centrifugeScript = if (config("gears_use_centrifuge", default = false)) Some(new GearsCentrifuge(this)) else None
lazy val qiimeRatx = if (config("gears_use_qiime_rtax", default = false)) Some(new GearsQiimeRtax(this)) else None
lazy val qiimeClosed = if (config("gears_use_qiime_closed", default = false)) Some(new GearsQiimeClosed(this)) else None
lazy val qiimeOpen = if (config("gears_use_qiime_open", default = false)) Some(new GearsQiimeOpen(this)) else None
......@@ -126,6 +127,14 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
add(kraken)
}
centrifugeScript foreach { centrifuge =>
centrifuge.outputDir = new File(outputDir, "centrifuge")
centrifuge.fastqR1 = r1
centrifuge.fastqR2 = r2
centrifuge.outputName = outputName
add(centrifuge)
}
qiimeRatx foreach { qiimeRatx =>
qiimeRatx.outputDir = new File(outputDir, "qiime_rtax")
qiimeRatx.fastqR1 = r1
......
......@@ -17,6 +17,8 @@ package nl.lumc.sasc.biopet.pipelines.gears
import java.io.File
import com.google.common.io.Files
import nl.lumc.sasc.biopet.core.BiopetPipe
import nl.lumc.sasc.biopet.extensions.centrifuge.{ Centrifuge, CentrifugeKreport }
import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport }
import nl.lumc.sasc.biopet.extensions.picard.SamToFastq
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView
......@@ -49,6 +51,7 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers {
def paired: Boolean = false
def hasOutputName: Boolean = false
def kraken: Option[Boolean] = None
def centrifuge: Boolean = false
def qiimeClosed: Boolean = false
def qiimeOpen: Boolean = false
def qiimeRtax: Boolean = false
......@@ -61,6 +64,7 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers {
def testGears(): Unit = {
val map = ConfigUtils.mergeMaps(Map(
"gears_use_qiime_rtax" -> qiimeRtax,
"gears_use_centrifuge" -> centrifuge,
"gears_use_qiime_closed" -> qiimeClosed,
"gears_use_qiime_open" -> qiimeOpen,
"gears_use_seq_count" -> seqCount,
......@@ -102,12 +106,15 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers {
gears.outputName shouldBe (if (inputMode == Some("bam")) "bamfile" else "R1")
}
val pipesJobs = gears.functions.filter(_.isInstanceOf[BiopetPipe]).flatMap(_.asInstanceOf[BiopetPipe].pipesJobs)
gears.summarySettings("gears_use_kraken") shouldBe kraken.getOrElse(true)
gears.summarySettings("gear_use_qiime_rtax") shouldBe qiimeRtax
gears.summarySettings("gear_use_qiime_closed") shouldBe qiimeClosed
gears.summarySettings("gear_use_qiime_open") shouldBe qiimeOpen
gears.krakenScript.isDefined shouldBe kraken.getOrElse(true)
gears.centrifugeScript.isDefined shouldBe centrifuge
gears.qiimeClosed.isDefined shouldBe qiimeClosed
gears.qiimeOpen.isDefined shouldBe qiimeOpen
gears.qiimeRatx.isDefined shouldBe qiimeRtax
......@@ -119,7 +126,11 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers {
gears.functions.count(_.isInstanceOf[Kraken]) shouldBe (if (kraken.getOrElse(true)) 1 else 0)
gears.functions.count(_.isInstanceOf[KrakenReport]) shouldBe (if (kraken.getOrElse(true)) 1 else 0)
gears.functions.count(_.isInstanceOf[KrakenReportToJson]) shouldBe (if (kraken.getOrElse(true)) 1 else 0)
gears.functions.count(_.isInstanceOf[KrakenReportToJson]) shouldBe
((if (kraken.getOrElse(true)) 1 else 0) + (if (centrifuge) 2 else 0))
gears.functions.count(_.isInstanceOf[Centrifuge]) shouldBe (if (centrifuge) 1 else 0)
gears.functions.count(_.isInstanceOf[CentrifugeKreport]) shouldBe (if (centrifuge) 2 else 0)
}
}
}
......@@ -132,6 +143,9 @@ class GearsSingleDefaultTest extends TestGearsSingle
class GearsSingleKrakenTest extends TestGearsSingle {
override def kraken = Some(true)
}
class GearsSingleCentrifugeTest extends TestGearsSingle {
override def centrifuge = true
}
class GearsSingleQiimeClosedTest extends TestGearsSingle {
override def qiimeClosed = true
}
......@@ -149,6 +163,10 @@ class GearsSingleKrakenPairedTest extends TestGearsSingle {
override def paired = true
override def kraken = Some(true)
}
class GearsSingleCentrifugePairedTest extends TestGearsSingle {
override def paired = true
override def centrifuge = true
}
class GearsSingleQiimeClosedPairedTest extends TestGearsSingle {
override def paired = true
override def qiimeClosed = true
......@@ -168,6 +186,7 @@ class GearsSingleseqCountPairedTest extends TestGearsSingle {
class GearsSingleAllTest extends TestGearsSingle {
override def kraken = Some(true)
override def centrifuge = true
override def qiimeClosed = true
override def qiimeOpen = true
override def qiimeRtax = true
......@@ -175,6 +194,7 @@ class GearsSingleAllTest extends TestGearsSingle {
}
class GearsSingleAllPairedTest extends TestGearsSingle {
override def kraken = Some(true)
override def centrifuge = true
override def qiimeClosed = true
override def qiimeOpen = true
override def qiimeRtax = true
......@@ -214,6 +234,8 @@ object TestGearsSingle {
val executables = Map(
"kraken" -> Map("exe" -> "test", "db" -> "test"),
"centrifuge" -> Map("exe" -> "test", "centrifuge_index" -> "test"),
"centrifugekreport" -> Map("exe" -> "test"),
"krakenreport" -> Map("exe" -> "test", "db" -> "test"),
"sambamba" -> Map("exe" -> "test"),
"samtools" -> Map("exe" -> "test"),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment