Commit 6a6c8673 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub

Merge pull request #90 from biopet/fix-BIOPET-645

Fixing small bug for coming release
parents 6f3ac600 40ddc1b7
......@@ -101,8 +101,8 @@ class BastyTest extends TestNGSuite with Matchers {
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples)
// Gatk preprocess
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (if (realign) numberSamples else 0)
pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (if (realign) numberSamples else 0)
pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) (numberLibs * 2) else 0)
pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0)
......
......@@ -134,12 +134,13 @@ trait SummaryQScript extends BiopetQScript { qscript: QScript =>
required("echo") + required("error_on_capture " + input.toString) + " > " + required(output)
}
md5sum.input = file
md5sum.output = new File(file.getParentFile, file.getName + ".md5")
md5sum.jobOutputFile = new File(file.getParentFile, s".${file.getName}.md5.md5sum.out")
// Need to not write a md5 file outside the outputDir
if (!file.getAbsolutePath.startsWith(outputDir.getAbsolutePath))
md5sum.output = new File(outputDir, ".md5" + file.getAbsolutePath + ".md5")
md5sum.output = if (file.getAbsolutePath.startsWith(outputDir.getAbsolutePath))
new File(file.getParentFile, file.getName + ".md5")
else {
// Need to not write a md5 file outside the outputDir
new File(outputDir, ".md5" + file.getAbsolutePath + ".md5")
}
md5sum.jobOutputFile = new File(md5sum.output.getParentFile, s".${file.getName}.md5.md5sum.out")
writeSummary.deps :+= md5sum.output
SummaryQScript.md5sumCache += file -> md5sum.output
......
#import(nl.lumc.sasc.biopet.utils.summary.Summary)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._)
#import(nl.lumc.sasc.biopet.utils.summary.db.Schema.Library)
#import(nl.lumc.sasc.biopet.utils.summary.db.Schema.Sample)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
#import(nl.lumc.sasc.biopet.core.report.ReportBuilder)
#import(nl.lumc.sasc.biopet.pipelines.gears.GearsKraken)
#import(java.io.File)
#import(java.io.PrintWriter)
<%@ var summary: Summary %>
<%@ var summary: SummaryDb %>
<%@ var rootPath: String %>
<%@ var sampleId: Option[String] = None %>
<%@ var libId: Option[String] = None %>
<%@ var sampleId: Option[Int] = None %>
<%@ var libId: Option[Int] = None %>
<%@ var args: Map[String, Any] %>
<%@ var outputDir: File %>
<%@ var centrifugeTag: Option[String] = None %>
<%@ var summaryStatsTag: String = "krakenreport" %>
<%@ var summaryModuleTag: String = "gearskraken" %>
<%@ var runId: Int %>
<%@ var allSamples: Seq[Sample] %>
<%@ var allLibraries: Seq[Library] %>
#{
val samples = sampleId match {
case Some(id) => allSamples.filter(_.id == id).toList
case _ => allSamples.toList
}
}#
<%
val summaries = if (sampleId.isEmpty) {
summary.getSampleValues(summaryModuleTag, "stats", summaryStatsTag).map(x => x._1 -> x._2.get.asInstanceOf[Map[String, Any]])
} else summary.getValue(sampleId, libId, summaryModuleTag, "stats", summaryStatsTag).map(sampleId.get -> _.asInstanceOf[Map[String, Any]]).toList.toMap
val summaries = if (libId.isDefined) summary.getStatsForLibraries(runId, summaryModuleTag, summaryStatsTag, sampleId,
Map("all" -> Nil)
).filter(_._1._2 == libId.get).map(x => x._1._1 -> x._2)
else summary.getStatsForSamples(runId, summaryModuleTag, summaryStatsTag, sampleId.map(SummaryDb.SampleId),
Map("all" -> Nil)
)
val totalReads = if (sampleId.isEmpty) {
centrifugeTag.map {tag => summary.getSampleValues(summaryModuleTag, "stats", tag, "metrics", "Read").map(x => x._1 -> x._2.getOrElse(0L).toString.toLong) }
} else centrifugeTag.flatMap(tag => summary.getValue(sampleId, libId, summaryModuleTag, "stats", tag, "metrics", "Read"))
.map(value => Map(sampleId.get -> value.toString.toLong))
val totalReads = centrifugeTag.map { tag =>
if (libId.isDefined) {
val stats = summary.getStatsForLibraries(runId, summaryModuleTag, ModuleName(tag), sampleId,
Map("total" -> List("metrics", "Read"))
).filter(_._1._2 == libId.get).head
val lib = allLibraries.filter(_.id == stats._1._2).head
val sample = allSamples.filter(_.id == stats._1._1).head
Map(s"${sample.name}" -> stats._2("total").map(_.toString.toLong).getOrElse(0L))
} else summary.getStatsForSamples(runId, summaryModuleTag, ModuleName(tag), sampleId.map(SummaryDb.SampleId),
Map("total" -> List("metrics", "Read"))
).map(x => allSamples.find(_.id == x._1).head.name -> x._2("total").map(_.toString.toLong).getOrElse(0L))
}
val tempFile = File.createTempFile("krona.", ".xml")
tempFile.deleteOnExit()
GearsKraken.convertKrakenSummariesToKronaXml(summaries, tempFile, totalReads)
GearsKraken.convertKrakenSummariesToKronaXml(summaries.map(x => allSamples.find(_.id == x._1).head.name -> x._2("all").map(_.asInstanceOf[Map[String, Any]]).getOrElse(Map())), tempFile, totalReads)
val output = ReportBuilder.renderTemplate("/nl/lumc/sasc/biopet/core/report/krona.ssp",
args ++ Map("kronaXml" -> tempFile))
......
......@@ -124,7 +124,7 @@ object GearsKraken {
val taxs: mutable.Map[String, Any] = mutable.Map()
def addTax(map: Map[String, Any], path: List[String] = Nil): Unit = {
val name = map("name").toString
val name = map.get("name").getOrElse("noName").toString
val x = path.foldLeft(taxs)((a, b) => if (a.contains(b)) a(b).asInstanceOf[mutable.Map[String, Any]] else {
a += b -> mutable.Map[String, Any]()
a(b).asInstanceOf[mutable.Map[String, Any]]
......@@ -132,13 +132,13 @@ object GearsKraken {
if (!x.contains(name)) x += name -> mutable.Map[String, Any]()
map("children").asInstanceOf[List[Any]].foreach(x => addTax(x.asInstanceOf[Map[String, Any]], path ::: name :: Nil))
map.get("children").getOrElse(List()).asInstanceOf[List[Any]].foreach(x => addTax(x.asInstanceOf[Map[String, Any]], path ::: name :: Nil))
}
summaries.foreach { x => addTax(x._2("classified").asInstanceOf[Map[String, Any]]) }
summaries.foreach { x => addTax(x._2.get("classified").getOrElse(Map()).asInstanceOf[Map[String, Any]]) }
def getValue(sample: String, path: List[String], key: String) = {
path.foldLeft(summaries(sample)("classified").asInstanceOf[Map[String, Any]]) { (b, a) =>
path.foldLeft(summaries(sample).get("classified").getOrElse(Map()).asInstanceOf[Map[String, Any]]) { (b, a) =>
b.getOrElse("children", List[Map[String, Any]]())
.asInstanceOf[List[Map[String, Any]]]
.find(_.getOrElse("name", "") == a).getOrElse(Map[String, Any]())
......
......@@ -21,8 +21,13 @@ import nl.lumc.sasc.biopet.extensions.{ Gzip, Zcat }
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
import org.broadinstitute.gatk.queue.QScript
import scala.concurrent.{ Await, Future }
import scala.concurrent.duration.Duration
import scala.concurrent.ExecutionContext.Implicits.global
/**
* Created by wyleung
*/
......@@ -55,6 +60,18 @@ class GearsSingle(val parent: Configurable) extends QScript with SummaryQScript
if (fastqR2.nonEmpty && fastqR1.size != fastqR2.size) Logging.addError("R1 and R2 has not the same number of files")
if (sampleId == null || sampleId == None) Logging.addError("Missing sample ID on GearsSingle module")
if (!skipFlexiprep) {
val db = SummaryDb.openSqliteSummary(summaryDbFile)
val future = for {
sample <- db.getSamples(runId = summaryRunId, name = sampleId).map(_.headOption)
sId <- sample.map(s => Future.successful(s.id))
.getOrElse(db.createSample(sampleId.getOrElse("noSampleName"), summaryRunId))
library <- db.getLibraries(runId = summaryRunId, name = libId, sampleId = Some(sId)).map(_.headOption)
lId <- library.map(l => Future.successful(l.id))
.getOrElse(db.createLibrary(libId.getOrElse("noLibName"), summaryRunId, sId))
} yield lId
Await.result(future, Duration.Inf)
}
if (outputName == null) {
outputName = sampleId.getOrElse("noName") + libId.map("-" + _).getOrElse("")
}
......
......@@ -78,10 +78,10 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
// TODO: hide sampleId and libId from the command line so they do not interfere with our config values
/** Readgroup Platform */
protected var platform: String = config("platform", default = "illumina")
protected var readgroupPlatform: String = config("readgroup_platform", default = "illumina")
/** Readgroup platform unit */
protected var platformUnit: Option[String] = config("platform_unit")
protected var readgroupPlatformUnit: Option[String] = config("readgroup_platform_unit")
/** Readgroup sequencing center */
protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
......@@ -98,7 +98,7 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
/** Readgroup predicted insert size */
protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
val keepFinalBamFile: Boolean = config("keep_final_bam_file", default = true)
val keepFinalBamFile: Boolean = config("keep_mapping_bam_file", default = true)
protected var paired: Boolean = false
val flexiprep = new Flexiprep(this)
......@@ -387,15 +387,15 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
hisat2.R1 = R1
hisat2.R2 = R2
hisat2.rgId = Some(readgroupId)
hisat2.rg +:= s"PL:$platform"
platformUnit.foreach(x => hisat2.rg +:= s"PU:$x")
libId match {
case Some(id) => hisat2.rg +:= s"LB:$id"
case otherwise => ;
hisat2.rg +:= s"PL:$readgroupPlatform"
readgroupPlatformUnit.foreach(x => hisat2.rg +:= s"PU:$x")
readgroupLibrary match {
case Some(id) => hisat2.rg +:= s"LB:$id"
case _ =>
}
sampleId match {
case Some(id) => hisat2.rg +:= s"SM:$id"
case otherwise => ;
case Some(id) => hisat2.rg +:= s"SM:$id"
case _ =>
}
val sortSam = new SortSam(this)
......@@ -459,13 +459,13 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
var RG: String = "ID:" + readgroupId + ","
RG += "SM:" + sampleId.get + ","
RG += "LB:" + libId.get + ","
readgroupLibrary.foreach(RG += "LB:" + _ + ",")
if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
platformUnit.foreach(x => RG += "PU:" + x + ",")
readgroupPlatformUnit.foreach(x => RG += "PU:" + x + ",")
if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
RG += "PL:" + platform
RG += "PL:" + readgroupPlatform
val stampyCmd = new Stampy(this)
stampyCmd.R1 = R1
......@@ -504,9 +504,9 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
def addBowtie2(R1: File, R2: Option[File], output: File): File = {
val bowtie2 = new Bowtie2(this)
bowtie2.rgId = Some(readgroupId)
bowtie2.rg +:= ("LB:" + libId.get)
bowtie2.rg +:= ("PL:" + platform)
platformUnit.foreach(x => bowtie2.rg +:= ("PU:" + x))
bowtie2.rg +:= ("LB:" + readgroupLibrary.getOrElse(libId.get))
bowtie2.rg +:= ("PL:" + readgroupPlatform)
readgroupPlatformUnit.foreach(x => bowtie2.rg +:= ("PU:" + x))
bowtie2.rg +:= ("SM:" + sampleId.get)
bowtie2.R1 = R1
bowtie2.R2 = R2
......@@ -561,9 +561,9 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
addOrReplaceReadGroups.createIndex = true
addOrReplaceReadGroups.RGID = readgroupId
addOrReplaceReadGroups.RGLB = libId.get
addOrReplaceReadGroups.RGPL = platform
addOrReplaceReadGroups.RGPU = platformUnit.getOrElse(readgroupId)
addOrReplaceReadGroups.RGLB = readgroupLibrary.getOrElse(libId.get)
addOrReplaceReadGroups.RGPL = readgroupPlatform
addOrReplaceReadGroups.RGPU = readgroupPlatformUnit.getOrElse(readgroupId)
addOrReplaceReadGroups.RGSM = sampleId.get
if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
......@@ -576,8 +576,8 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
def getReadGroupBwa: String = {
var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
readgroupLibrary.foreach(lb => RG += "LB:" + lb + "\\t")
RG += "PL:" + platform + "\\t"
platformUnit.foreach(x => RG += "PU:" + x + "\\t")
RG += "PL:" + readgroupPlatform + "\\t"
readgroupPlatformUnit.foreach(x => RG += "PU:" + x + "\\t")
RG += "SM:" + sampleId.get + "\\t"
if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
......
......@@ -74,7 +74,10 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
/** Sample specific settings */
override def summarySettings: Map[String, Any] = super.summarySettings ++
Map("single_sample_variantcalling" -> variantcalling.isDefined, "use_indel_realigner" -> useIndelRealigner)
Map(
"single_sample_variantcalling" -> variantcalling.isDefined,
"use_indel_realigner" -> useIndelRealigner
)
/** Class to generate jobs for a library */
class Library(libId: String) extends super.Library(libId) {
......@@ -84,7 +87,6 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
bqsrFile.map("baserecal" -> _) ++
bqsrAfterFile.map("baserecal_after" -> _)
lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
lazy val useBaseRecalibration: Boolean = {
val c: Boolean = config("use_base_recalibration", default = true)
val br = new BaseRecalibrator(qscript)
......@@ -97,18 +99,15 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
lazy val bqsrFile: Option[File] = if (useBaseRecalibration) Some(createFile("baserecal")) else None
lazy val bqsrAfterFile: Option[File] = if (useAnalyzeCovariates) Some(createFile("baserecal.after")) else None
override def keepFinalBamfile: Boolean = super.keepFinalBamfile && !useIndelRealigner && !useBaseRecalibration
override def keepFinalBamfile: Boolean = super.keepFinalBamfile && !useBaseRecalibration && !usePrintReads
override def preProcessBam: Option[Mapping#File] = if (useIndelRealigner && usePrintReads && useBaseRecalibration)
bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
else if (usePrintReads && useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
override def preProcessBam: Option[Mapping#File] = if (usePrintReads && useBaseRecalibration)
bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
else bamFile
/** Library specific settings */
override def summarySettings: Map[String, Any] = super.summarySettings ++ Map(
"library_variantcalling" -> variantcalling.isDefined,
"use_indel_realigner" -> useIndelRealigner,
"use_base_recalibration" -> useBaseRecalibration,
"useAnalyze_covariates" -> useAnalyzeCovariates
)
......@@ -122,13 +121,8 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
override def addJobs(): Unit = {
super.addJobs()
if (useIndelRealigner && useBaseRecalibration) {
val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
addBaseRecalibrator(file, libDir, libraries.size > 1, usePrintReads)
} else if (useIndelRealigner) {
addIndelRealign(bamFile.get, libDir, libraries.size > 1)
} else if (useBaseRecalibration) {
addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1, usePrintReads)
if (useBaseRecalibration) {
addBaseRecalibrator(bamFile.get, libDir, useIndelRealigner || libraries.size > 1, usePrintReads)
}
variantcalling.foreach(vc => {
......@@ -181,11 +175,11 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
Some(makeVariantcalling(multisample = false, sample = Some(sampleId)))
} else None
override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner || (libraries.size == 1))
override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner)
lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
override def preProcessBam: Option[File] = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
override def preProcessBam: Option[File] = if (useIndelRealigner) {
bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam"))
} else bamFile
......@@ -195,7 +189,7 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
override def addJobs(): Unit = {
super.addJobs()
if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
if (useIndelRealigner) {
addIndelRealign(bamFile.get, sampleDir, isIntermediate = false)
}
......
......@@ -104,8 +104,8 @@ trait ShivaTestTrait extends TestNGSuite with Matchers {
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples)
// Gatk preprocess
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (if (realign) numberSamples else 0)
pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (if (realign) numberSamples else 0)
pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) (numberLibs * 2) else 0)
pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration && usePrintReads) numberLibs else 0)
......@@ -120,7 +120,7 @@ trait ShivaTestTrait extends TestNGSuite with Matchers {
sample.libraries.foreach {
case (libId, lib) =>
lib.summarySettings.get("library_variantcalling") shouldBe Some(libraryCalling)
lib.summarySettings.get("use_indel_realigner") shouldBe Some(realign)
lib.summarySettings.get("use_indel_realigner") shouldBe None // Should not exist anymore
lib.summarySettings.get("use_base_recalibration") shouldBe Some(baseRecalibration && dbsnp)
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment