Skip to content
Snippets Groups Projects
Commit 2727f142 authored by bow's avatar bow
Browse files

Merge branch 'feature-centrifuge' into 'develop'

Added base version of centrifuge extension

This is adding centrifuge, this will later on replace kraken.

See merge request !452
parents 0c7e6dae 290f37f6
No related branches found
No related tags found
No related merge requests found
Showing
with 254 additions and 16 deletions
......@@ -35,11 +35,11 @@ class Cuffquant(val root: Configurable) extends BiopetCommandLineFunction with V
means we have 2 samples, each with 2 replicates
so our input is a list of lists of Files
*/
var input: List[List[File]] = List.empty[List[File]]
var input: List[List[File]] = Nil
/** input GTF file */
@Input(doc = "Input GTF file", required = true)
var transcriptsGtf: File = null
var transcriptsGtf: File = _
/** output file, computed automatically from output directory */
@Output(doc = "Output CXB file")
......
......@@ -40,6 +40,8 @@ class Gzip(val root: Configurable) extends BiopetCommandLineFunction with Versio
object Gzip {
def apply(root: Configurable): Gzip = new Gzip(root)
def apply(root: Configurable, input: File, output: File): Gzip = Gzip(root, List(input), output)
def apply(root: Configurable, input: List[File], output: File): Gzip = {
val gzip = new Gzip(root)
gzip.input = input
......
package nl.lumc.sasc.biopet.extensions.centrifuge
import java.io.File
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.util.matching.Regex
/**
* Created by pjvanthof on 19/09/16.
*/
class Centrifuge(val root: Configurable) extends BiopetCommandLineFunction with Version {
@Input(doc = "Input: FastQ or FastA", required = true)
var inputR1: File = _
@Input(doc = "Input: FastQ or FastA", required = false)
var inputR2: Option[File] = None
var index: File = config("centrifuge_index")
@Output(doc = "Output with hits per sequence")
var output: File = _
@Output(doc = "Output with hits per sequence")
var report: Option[File] = None
override def defaultThreads = 8
executable = config("exe", default = "centrifuge", freeVar = false)
/** Command to get version of executable */
def versionCommand: String = s"$executable --version"
/** Regex to get version from version command output */
def versionRegex: Regex = ".* version (.*)".r
override def beforeGraph(): Unit = {
super.beforeGraph()
deps :+= new File(index + ".1.cf")
deps :+= new File(index + ".2.cf")
deps :+= new File(index + ".3.cf")
}
/**
* This function needs to be implemented to define the command that is executed
*
* @return Command to run
*/
def cmdLine: String = executable +
//TODO: Options
optional("--threads", threads) +
required("-x", index) +
(inputR2 match {
case Some(r2) => required("-1", inputR1) + required("-2", r2)
case _ => required("-U", inputR1)
}) +
(if (outputAsStsout) "" else required("-S", output)) +
optional("--report-file", report)
}
package nl.lumc.sasc.biopet.extensions.centrifuge
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Created by pjvanthof on 19/09/16.
*/
class CentrifugeKreport(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Output files centrifuge", required = true)
var centrifugeOutputFiles: List[File] = Nil
@Output(doc = "Output report")
var output: File = _
var index: File = config("centrifuge_index", namespace = "centrifuge")
var onlyUnique: Boolean = config("only_unique", default = false)
var showZeros: Boolean = config("show_zeros", default = false)
var isCounts: Boolean = config("is_counts", default = false)
var minScore: Option[Double] = config("min_score")
var minLength: Option[Int] = config("min_length")
override def defaultCoreMemory = 4.0
executable = config("exe", default = "centrifuge-kreport", freeVar = false)
override def beforeGraph(): Unit = {
super.beforeGraph()
deps :+= new File(index + ".1.cf")
deps :+= new File(index + ".2.cf")
deps :+= new File(index + ".3.cf")
}
def cmdLine = executable +
conditional(onlyUnique, "--only-unique") +
conditional(showZeros, "--show-zeros") +
conditional(isCounts, "--is-counts") +
optional("--min-score=", minScore, spaceSeparated = false) +
optional("--min-length=", minLength, spaceSeparated = false) +
required("-x", index) +
repeat(centrifugeOutputFiles) +
" > " + required(output)
}
......@@ -42,7 +42,7 @@ class KrakenReportToJson(val root: Configurable) extends ToolCommandFunction wit
@Output(doc = "Output JSON", shortName = "output", required = true)
var output: File = _
override def defaultCoreMemory = 2.0
override def defaultCoreMemory = 4.0
override def cmdLine =
super.cmdLine +
......
......@@ -10,11 +10,13 @@
<%@ var libId: Option[String] = None %>
<%@ var args: Map[String, Any] %>
<%@ var outputDir: File %>
<%@ var summaryStatsTag: String = "krakenreport" %>
<%@ var summaryModuleTag: String = "gearskraken" %>
<%
val summaries = if (sampleId.isEmpty && libId.isEmpty) {
summary.getSampleValues("gearskraken", "stats", "krakenreport").map(x => x._1 -> x._2.get.asInstanceOf[Map[String, Any]])
} else summary.getValue(sampleId, libId, "gearskraken", "stats", "krakenreport").map(sampleId.get -> _.asInstanceOf[Map[String, Any]]).toList.toMap
summary.getSampleValues(summaryModuleTag, "stats", summaryStatsTag).map(x => x._1 -> x._2.get.asInstanceOf[Map[String, Any]])
} else summary.getValue(sampleId, libId, summaryModuleTag, "stats", summaryStatsTag).map(sampleId.get -> _.asInstanceOf[Map[String, Any]]).toList.toMap
val tempFile = File.createTempFile("krona.", ".xml")
tempFile.deleteOnExit()
......@@ -24,10 +26,10 @@
args ++ Map("kronaXml" -> tempFile))
val file = new File(outputDir, "kraken_krona.html")
val file = new File(outputDir, s"$summaryModuleTag-$summaryStatsTag.html")
val writer = new PrintWriter(file)
writer.println(output)
writer.close()
%>
<iframe src="kraken_krona.html" style="width:100%;height:80vh;border:none;"></iframe>
<iframe src="${summaryModuleTag}-${summaryStatsTag}.html" style="width:100%;height:80vh;border:none;"></iframe>
package nl.lumc.sasc.biopet.pipelines.gears
import nl.lumc.sasc.biopet.core.SampleLibraryTag
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.Gzip
import nl.lumc.sasc.biopet.extensions.centrifuge.{ Centrifuge, CentrifugeKreport }
import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvanthof on 19/09/16.
*/
class GearsCentrifuge(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
var fastqR1: File = _
var fastqR2: Option[File] = None
var outputName: String = _
override def fixedValues = Map("centrifugekreport" -> Map("only_unique" -> false))
def init(): Unit = {
require(fastqR1 != null)
require(outputName != null)
}
def centrifugeOutput = new File(outputDir, s"$outputName.centrifuge.gz")
def biopetScript(): Unit = {
val centrifuge = new Centrifuge(this)
centrifuge.inputR1 = fastqR1
centrifuge.inputR2 = fastqR2
centrifuge.output = new File(outputDir, s"$outputName.centrifuge")
centrifuge.report = Some(new File(outputDir, s"$outputName.centrifuge.report"))
centrifuge.isIntermediate = true
add(centrifuge)
add(Gzip(this, centrifuge.output, centrifugeOutput))
makeKreport(List(centrifuge.output), "centrifuge", unique = false)
makeKreport(List(centrifuge.output), "centrifuge_unique", unique = true)
addSummaryJobs()
}
protected def makeKreport(inputFiles: List[File], name: String, unique: Boolean): Unit = {
val centrifugeKreport = new CentrifugeKreport(this)
centrifugeKreport.centrifugeOutputFiles = inputFiles
centrifugeKreport.output = new File(outputDir, s"$outputName.$name.kreport")
centrifugeKreport.onlyUnique = unique
add(centrifugeKreport)
val krakenReportJSON = new KrakenReportToJson(this)
krakenReportJSON.inputReport = centrifugeKreport.output
krakenReportJSON.output = new File(outputDir, s"$outputName.$name.krkn.json")
krakenReportJSON.skipNames = config("skipNames", default = false)
add(krakenReportJSON)
addSummarizable(krakenReportJSON, s"${name}_report")
}
/** Location of summary file */
def summaryFile = new File(outputDir, sampleId.getOrElse("sampleName_unknown") + ".centrifuge.summary.json")
/** Pipeline settings shown in the summary file */
def summarySettings: Map[String, Any] = Map()
/** Statistics shown in the summary file */
def summaryFiles: Map[String, File] = outputFiles + ("input_R1" -> fastqR1, "centrifuge_output" -> centrifugeOutput) ++
(fastqR2 match {
case Some(file) => Map("input_R2" -> file)
case _ => Map()
})
}
......@@ -41,10 +41,7 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi
def init(): Unit = {
require(fastqR1 != null)
if (outputName == null) outputName = fastqR1.getName
.stripSuffix(".gz")
.stripSuffix(".fq")
.stripSuffix(".fastq")
require(outputName != null)
}
lazy val krakenConvertToFasta: Boolean = config("kraken_discard_quality", default = false)
......@@ -107,7 +104,7 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi
def summaryFile = new File(outputDir, sampleId.getOrElse("sampleName_unknown") + ".kraken.summary.json")
/** Pipeline settings shown in the summary file */
def summarySettings: Map[String, Any] = Map.empty
def summarySettings: Map[String, Any] = Map()
/** Statistics shown in the summary file */
def summaryFiles: Map[String, File] = outputFiles + ("input_R1" -> fastqR1) ++ (fastqR2 match {
......
......@@ -38,13 +38,20 @@ object GearsReport extends MultisampleReportBuilder {
def indexPage = {
val krakenExecuted = summary.getSampleValues("gearskraken", "stats", "krakenreport").values.forall(_.isDefined)
val centrifugeExecuted = summary.getSampleValues("gearscentrifuge", "stats", "centrifuge_report").values.forall(_.isDefined)
val qiimeClosesOtuTable = summary.getValue("gears", "files", "pipeline", "qiime_closed_otu_table", "path")
.map(x => new File(x.toString))
val qiimeOpenOtuTable = summary.getValue("gears", "files", "pipeline", "qiime_open_otu_table", "path")
.map(x => new File(x.toString))
ReportPage(
(if (krakenExecuted) List("Kraken analysis" -> ReportPage(List(), List(
(if (centrifugeExecuted) List("Centriguge analysis" -> ReportPage(List("Non-unique" -> ReportPage(List(), List("All mappings" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryStatsTag" -> "centrifuge_report")
)), Map())), List(
"Unique mappings" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryStatsTag" -> "centrifuge_unique_report")
)), Map("summaryModuleTag" -> "gearscentrifuge")))
else Nil) ::: (if (krakenExecuted) List("Kraken analysis" -> ReportPage(List(), List(
"Krona plot" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp"
)), Map()))
else Nil) ::: (if (qiimeClosesOtuTable.isDefined) List("Qiime closed reference analysis" -> ReportPage(List(), List(
......@@ -74,12 +81,19 @@ object GearsReport extends MultisampleReportBuilder {
/** Single sample page */
def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = {
val krakenExecuted = summary.getValue(Some(sampleId), None, "gearskraken", "stats", "krakenreport").isDefined
val centrifugeExecuted = summary.getValue(Some(sampleId), None, "gearscentrifuge", "stats", "centrifuge_report").isDefined
val qiimeClosesOtuTable = summary.getValue(Some(sampleId), None, "gearsqiimeclosed", "files", "pipeline", "otu_table", "path")
.map(x => new File(x.toString))
val qiimeOpenOtuTable = summary.getValue(Some(sampleId), None, "gearsqiimeopen", "files", "pipeline", "otu_table", "path")
.map(x => new File(x.toString))
ReportPage((if (krakenExecuted) List("Kraken" -> ReportPage(List(), List(
ReportPage((if (centrifugeExecuted) List("Centriguge analysis" -> ReportPage(List("Non-unique" -> ReportPage(List(), List("All mappings" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryStatsTag" -> "centrifuge_report")
)), Map())), List(
"Unique mappings" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryStatsTag" -> "centrifuge_unique_report")
)), Map("summaryModuleTag" -> "gearscentrifuge")))
else Nil) ::: (if (krakenExecuted) List("Kraken" -> ReportPage(List(), List(
"Kraken analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp"
)), Map()))
else Nil) ::: (if (qiimeClosesOtuTable.isDefined) List("Qiime closed reference analysis" -> ReportPage(List(), List(
......@@ -99,6 +113,7 @@ object GearsReport extends MultisampleReportBuilder {
def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = {
val flexiprepExecuted = summary.getLibraryValue(sampleId, libId, "flexiprep").isDefined
val krakenExecuted = summary.getValue(Some(sampleId), Some(libId), "gearskraken", "stats", "krakenreport").isDefined
val centrifugeExecuted = summary.getValue(Some(sampleId), Some(libId), "gearscentrifuge", "stats", "centrifuge_report").isDefined
val qiimeClosesOtuTable = summary.getValue(Some(sampleId), Some(libId), "gearsqiimeclosed", "files", "pipeline", "otu_table", "path")
.map(x => new File(x.toString))
val qiimeOpenOtuTable = summary.getValue(Some(sampleId), Some(libId), "gearsqiimeopen", "files", "pipeline", "otu_table", "path")
......@@ -106,7 +121,13 @@ object GearsReport extends MultisampleReportBuilder {
ReportPage(
(if (flexiprepExecuted) List("QC" -> FlexiprepReport.flexiprepPage) else Nil
) ::: (if (krakenExecuted) List("Kraken" -> ReportPage(List(), List(
) ::: (if (centrifugeExecuted) List("Centriguge analysis" -> ReportPage(List("Non-unique" -> ReportPage(List(), List("All mappings" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryStatsTag" -> "centrifuge_report")
)), Map())), List(
"Unique mappings" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp",
Map("summaryStatsTag" -> "centrifuge_unique_report")
)), Map("summaryModuleTag" -> "gearscentrifuge")))
else Nil) ::: (if (krakenExecuted) List("Kraken" -> ReportPage(List(), List(
"Kraken analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/krakenKrona.ssp"
)), Map()))
else Nil) ::: (if (qiimeClosesOtuTable.isDefined) List("Qiime closed reference analysis" -> ReportPage(List(), List(
......
......@@ -41,6 +41,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
var outputName: String = _
lazy val krakenScript = if (config("gears_use_kraken", default = true)) Some(new GearsKraken(this)) else None
lazy val centrifugeScript = if (config("gears_use_centrifuge", default = false)) Some(new GearsCentrifuge(this)) else None
lazy val qiimeRatx = if (config("gears_use_qiime_rtax", default = false)) Some(new GearsQiimeRtax(this)) else None
lazy val qiimeClosed = if (config("gears_use_qiime_closed", default = false)) Some(new GearsQiimeClosed(this)) else None
lazy val qiimeOpen = if (config("gears_use_qiime_open", default = false)) Some(new GearsQiimeOpen(this)) else None
......@@ -126,6 +127,14 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
add(kraken)
}
centrifugeScript foreach { centrifuge =>
centrifuge.outputDir = new File(outputDir, "centrifuge")
centrifuge.fastqR1 = r1
centrifuge.fastqR2 = r2
centrifuge.outputName = outputName
add(centrifuge)
}
qiimeRatx foreach { qiimeRatx =>
qiimeRatx.outputDir = new File(outputDir, "qiime_rtax")
qiimeRatx.fastqR1 = r1
......
......@@ -17,6 +17,8 @@ package nl.lumc.sasc.biopet.pipelines.gears
import java.io.File
import com.google.common.io.Files
import nl.lumc.sasc.biopet.core.BiopetPipe
import nl.lumc.sasc.biopet.extensions.centrifuge.{ Centrifuge, CentrifugeKreport }
import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport }
import nl.lumc.sasc.biopet.extensions.picard.SamToFastq
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView
......@@ -49,6 +51,7 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers {
def paired: Boolean = false
def hasOutputName: Boolean = false
def kraken: Option[Boolean] = None
def centrifuge: Boolean = false
def qiimeClosed: Boolean = false
def qiimeOpen: Boolean = false
def qiimeRtax: Boolean = false
......@@ -61,6 +64,7 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers {
def testGears(): Unit = {
val map = ConfigUtils.mergeMaps(Map(
"gears_use_qiime_rtax" -> qiimeRtax,
"gears_use_centrifuge" -> centrifuge,
"gears_use_qiime_closed" -> qiimeClosed,
"gears_use_qiime_open" -> qiimeOpen,
"gears_use_seq_count" -> seqCount,
......@@ -102,12 +106,15 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers {
gears.outputName shouldBe (if (inputMode == Some("bam")) "bamfile" else "R1")
}
val pipesJobs = gears.functions.filter(_.isInstanceOf[BiopetPipe]).flatMap(_.asInstanceOf[BiopetPipe].pipesJobs)
gears.summarySettings("gears_use_kraken") shouldBe kraken.getOrElse(true)
gears.summarySettings("gear_use_qiime_rtax") shouldBe qiimeRtax
gears.summarySettings("gear_use_qiime_closed") shouldBe qiimeClosed
gears.summarySettings("gear_use_qiime_open") shouldBe qiimeOpen
gears.krakenScript.isDefined shouldBe kraken.getOrElse(true)
gears.centrifugeScript.isDefined shouldBe centrifuge
gears.qiimeClosed.isDefined shouldBe qiimeClosed
gears.qiimeOpen.isDefined shouldBe qiimeOpen
gears.qiimeRatx.isDefined shouldBe qiimeRtax
......@@ -119,7 +126,11 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers {
gears.functions.count(_.isInstanceOf[Kraken]) shouldBe (if (kraken.getOrElse(true)) 1 else 0)
gears.functions.count(_.isInstanceOf[KrakenReport]) shouldBe (if (kraken.getOrElse(true)) 1 else 0)
gears.functions.count(_.isInstanceOf[KrakenReportToJson]) shouldBe (if (kraken.getOrElse(true)) 1 else 0)
gears.functions.count(_.isInstanceOf[KrakenReportToJson]) shouldBe
((if (kraken.getOrElse(true)) 1 else 0) + (if (centrifuge) 2 else 0))
gears.functions.count(_.isInstanceOf[Centrifuge]) shouldBe (if (centrifuge) 1 else 0)
gears.functions.count(_.isInstanceOf[CentrifugeKreport]) shouldBe (if (centrifuge) 2 else 0)
}
}
}
......@@ -132,6 +143,9 @@ class GearsSingleDefaultTest extends TestGearsSingle
class GearsSingleKrakenTest extends TestGearsSingle {
override def kraken = Some(true)
}
class GearsSingleCentrifugeTest extends TestGearsSingle {
override def centrifuge = true
}
class GearsSingleQiimeClosedTest extends TestGearsSingle {
override def qiimeClosed = true
}
......@@ -149,6 +163,10 @@ class GearsSingleKrakenPairedTest extends TestGearsSingle {
override def paired = true
override def kraken = Some(true)
}
class GearsSingleCentrifugePairedTest extends TestGearsSingle {
override def paired = true
override def centrifuge = true
}
class GearsSingleQiimeClosedPairedTest extends TestGearsSingle {
override def paired = true
override def qiimeClosed = true
......@@ -168,6 +186,7 @@ class GearsSingleseqCountPairedTest extends TestGearsSingle {
class GearsSingleAllTest extends TestGearsSingle {
override def kraken = Some(true)
override def centrifuge = true
override def qiimeClosed = true
override def qiimeOpen = true
override def qiimeRtax = true
......@@ -175,6 +194,7 @@ class GearsSingleAllTest extends TestGearsSingle {
}
class GearsSingleAllPairedTest extends TestGearsSingle {
override def kraken = Some(true)
override def centrifuge = true
override def qiimeClosed = true
override def qiimeOpen = true
override def qiimeRtax = true
......@@ -214,6 +234,8 @@ object TestGearsSingle {
val executables = Map(
"kraken" -> Map("exe" -> "test", "db" -> "test"),
"centrifuge" -> Map("exe" -> "test", "centrifuge_index" -> "test"),
"centrifugekreport" -> Map("exe" -> "test"),
"krakenreport" -> Map("exe" -> "test", "db" -> "test"),
"sambamba" -> Map("exe" -> "test"),
"samtools" -> Map("exe" -> "test"),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment