Commit b1e56b71 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Switch shiva to multisample mapping

parent 31a835a1
......@@ -56,19 +56,21 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait {
("use_indel_realigner" -> useIndelRealigner) +
("use_base_recalibration" -> useBaseRecalibration)
/** This will adds preprocess steps, gatk indel realignment and base recalibration is included here */
override def preProcess(input: File): Option[File] = {
if (!useIndelRealigner && !useBaseRecalibration) None
else {
val indelRealignFile = useIndelRealigner match {
case true => addIndelRealign(input, libDir, useBaseRecalibration || libraries.size > 1)
case false => input
}
override def preProcessBam = if (useIndelRealigner && useBaseRecalibration)
bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
else if (useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
else bamFile
useBaseRecalibration match {
case true => Some(addBaseRecalibrator(indelRealignFile, libDir, libraries.size > 1))
case false => Some(indelRealignFile)
}
override def addJobs(): Unit = {
super.addJobs()
if (useIndelRealigner && useBaseRecalibration) {
val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
addBaseRecalibrator(file, libDir, libraries.size > 1)
} else if (useIndelRealigner) {
addIndelRealign(bamFile.get, libDir, libraries.size > 1)
} else if (useBaseRecalibration) {
addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1)
}
}
}
......@@ -77,15 +79,15 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait {
lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
/** This methods will add double preprocess steps, with GATK indel realignment */
override protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
if (input.size <= 1) super.addDoublePreProcess(input)
else super.addDoublePreProcess(input, isIntermediate = useIndelRealigner).collect {
case file =>
useIndelRealigner match {
case true => addIndelRealign(file, sampleDir, isIntermediate = false)
case false => file
}
override def preProcessBam = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam"))
} else bamFile
override def addJobs(): Unit = {
super.addJobs()
if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
addIndelRealign(bamFile.get, sampleDir, libraries.size > 1)
}
}
}
......
......@@ -8,7 +8,6 @@ import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.picard.{MarkDuplicates, MergeSamFiles, AddOrReplaceReadGroups, SamToFastq}
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import MultisampleMapping.MergeStrategy
......@@ -18,10 +17,10 @@ import scala.collection.JavaConversions._
/**
* Created by pjvanthof on 18/12/15.
*/
class MultisampleMapping(val root: Configurable) extends QScript
trait MultisampleMapping extends QScript
with MultiSampleQScript
with Reference { qscript =>
def this() = this(null)
//def this() = this(null)
def mergeStrategy: MergeStrategy.Value = {
val value: String = config("merge_strategy", default = "preprocessmarkduplicates")
......@@ -31,23 +30,25 @@ class MultisampleMapping(val root: Configurable) extends QScript
}
}
def init: Unit = {
def init(): Unit = {
}
def biopetScript: Unit = {
def biopetScript(): Unit = {
addSamplesJobs()
addSummaryJobs()
}
def addMultiSampleJobs: Unit = {
def addMultiSampleJobs(): Unit = {
// this code will be executed after all code of all samples is executed
}
def summaryFile: File = new File(outputDir, "MultisamplePipeline.summary.json")
def summaryFiles: Map[String, File] = Map()
def summaryFiles: Map[String, File] = Map("referenceFasta" -> referenceFasta())
def summarySettings: Map[String, Any] = Map("merge_strategy" -> mergeStrategy.toString)
def summarySettings: Map[String, Any] = Map(
"reference" -> referenceSummary,
"merge_strategy" -> mergeStrategy.toString)
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
......@@ -81,7 +82,7 @@ class MultisampleMapping(val root: Configurable) extends QScript
def preProcessBam = bamFile
def addJobs: Unit = {
def addJobs(): Unit = {
if (inputR1.isDefined) {
mapping.foreach { m =>
m.input_R1 = inputR1.get
......@@ -179,7 +180,7 @@ class MultisampleMapping(val root: Configurable) extends QScript
case _ => throw new IllegalStateException("This should not be possible, unimplemented MergeStrategy?")
}
if (mergeStrategy != None && libraries.flatMap(_._2.bamFile).nonEmpty) {
if (mergeStrategy != MergeStrategy.None && libraries.flatMap(_._2.bamFile).nonEmpty) {
val bamMetrics = new BamMetrics(qscript)
bamMetrics.sampleId = Some(sampleId)
bamMetrics.inputBam = preProcessBam.get
......
......@@ -15,35 +15,18 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva
import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference }
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq }
import nl.lumc.sasc.biopet.pipelines.bammetrics.{ TargetRegions, BamMetrics }
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMapping
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.queue.QScript
import scala.collection.JavaConversions._
/**
* This is a trait for the Shiva pipeline
*
* Created by pjvan_thof on 2/26/15.
*/
trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { qscript: QScript =>
/** Executed before running the script */
def init(): Unit = {
}
/** Method to add jobs */
def biopetScript(): Unit = {
addSamplesJobs()
addSummaryJobs()
}
trait ShivaTrait extends MultisampleMapping with Reference with TargetRegions { qscript: QScript =>
override def reportClass = {
val shiva = new ShivaReport(this)
......@@ -65,167 +48,29 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions {
}
/** Method to make a sample */
def makeSample(id: String) = new Sample(id)
override def makeSample(id: String) = new this.Sample(id)
/** Class that will generate jobs for a sample */
class Sample(sampleId: String) extends AbstractSample(sampleId) {
/** Sample specific files to add to summary */
def summaryFiles: Map[String, File] = {
preProcessBam match {
case Some(b) => Map("preProcessBam" -> b)
case _ => Map()
}
}
/** Sample specific stats to add to summary */
def summaryStats: Map[String, Any] = Map()
class Sample(sampleId: String) extends super.Sample(sampleId) {
/** Method to make a library */
def makeLibrary(id: String) = new Library(id)
override def makeLibrary(id: String) = new this.Library(id)
/** Sample specific settings */
override def summarySettings = Map("single_sample_variantcalling" -> variantcalling.isDefined)
/** Class to generate jobs for a library */
class Library(libId: String) extends AbstractLibrary(libId) {
/** Library specific files to add to the summary */
def summaryFiles: Map[String, File] = {
((bamFile, preProcessBam) match {
case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb)
case (Some(b), _) => Map("bamFile" -> b, "preProcessBam" -> b)
case _ => Map()
}) ++ (inputR1.map("input_R1" -> _) ::
inputR2.map("input_R2" -> _) ::
inputBam.map("input_bam" -> _) :: Nil).flatten.toMap
}
/** Library specific stats to add to summary */
def summaryStats: Map[String, Any] = Map()
/** Method to execute library preprocess */
def preProcess(input: File): Option[File] = None
class Library(libId: String) extends super.Library(libId) {
/** Library specific settings */
override def summarySettings = Map("library_variantcalling" -> variantcalling.isDefined)
/** Method to make the mapping submodule */
def makeMapping = {
val mapping = new Mapping(qscript)
mapping.sampleId = Some(sampleId)
mapping.libId = Some(libId)
mapping.outputDir = libDir
mapping.outputName = sampleId + "-" + libId
(Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
}
def fileMustBeAbsulute(file: Option[File]): Option[File] = {
if (file.forall(_.isAbsolute)) file
else {
Logging.addError(s"$file for $sampleId / $libId should be a absolute file path")
file.map(_.getAbsoluteFile)
}
}
lazy val inputR1: Option[File] = fileMustBeAbsulute(config("R1"))
lazy val inputR2: Option[File] = fileMustBeAbsulute(config("R2"))
lazy val inputBam: Option[File] = fileMustBeAbsulute(if (inputR1.isEmpty) config("bam") else None)
lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
(inputR1.isDefined, inputBam.isDefined) match {
case (true, _) => makeMapping // Default starting from fastq files
case (false, true) => // Starting from bam file
config("bam_to_fastq", default = false).asBoolean match {
case true => makeMapping // bam file will be converted to fastq
case false =>
val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
(None, Some(file), preProcess(file))
}
case _ => (None, None, None)
}
lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
(bamFile.isDefined || preProcessBam.isDefined)) {
Some(makeVariantcalling(multisample = false))
} else None
/** This will add jobs for this library */
def addJobs(): Unit = {
(inputR1.isDefined, inputBam.isDefined) match {
case (true, _) => mapping.foreach(mapping => {
mapping.input_R1 = inputR1.get
mapping.input_R2 = inputR2
inputFiles :+= new InputFile(mapping.input_R1, config("R1_md5"))
mapping.input_R2.foreach(inputFiles :+= new InputFile(_, config("R2_md5")))
})
case (false, true) => {
inputFiles :+= new InputFile(inputBam.get, config("bam_md5"))
config("bam_to_fastq", default = false).asBoolean match {
case true =>
val samToFastq = SamToFastq(qscript, inputBam.get,
new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"),
new File(libDir, sampleId + "-" + libId + ".R2.fq.gz"))
samToFastq.isIntermediate = true
qscript.add(samToFastq)
mapping.foreach(mapping => {
mapping.input_R1 = samToFastq.fastqR1
mapping.input_R2 = Some(samToFastq.fastqR2)
})
case false =>
val inputSam = SamReaderFactory.makeDefault.open(inputBam.get)
val readGroups = inputSam.getFileHeader.getReadGroups
val readGroupOke = readGroups.forall(readGroup => {
if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
readGroup.getSample == sampleId && readGroup.getLibrary == libId
})
inputSam.close()
if (!readGroupOke) {
if (config("correct_readgroups", default = false).asBoolean) {
logger.info("Correcting readgroups, file:" + inputBam.get)
val aorrg = AddOrReplaceReadGroups(qscript, inputBam.get, bamFile.get)
aorrg.RGID = sampleId + "-" + libId
aorrg.RGLB = libId
aorrg.RGSM = sampleId
aorrg.RGPL = "unknown"
aorrg.RGPU = "na"
aorrg.isIntermediate = true
qscript.add(aorrg)
} else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
"\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
} else {
val oldBamFile: File = inputBam.get
val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
val newIndex: File = new File(libDir, bamFile.get.getName.stripSuffix(".bam") + ".bai")
val baiLn = Ln(qscript, oldIndex, newIndex)
add(baiLn)
val bamLn = Ln(qscript, oldBamFile, bamFile.get)
bamLn.deps :+= baiLn.output
add(bamLn)
val bamMetrics = new BamMetrics(qscript)
bamMetrics.sampleId = Some(sampleId)
bamMetrics.libId = Some(libId)
bamMetrics.inputBam = bamFile.get
bamMetrics.outputDir = new File(libDir, "metrics")
bamMetrics.init()
bamMetrics.biopetScript()
addAll(bamMetrics.functions)
addSummaryQScript(bamMetrics)
}
}
}
case _ => logger.warn("Sample: " + sampleId + " Library: " + libId + ", no reads found")
}
mapping.foreach(mapping => {
mapping.init()
mapping.biopetScript()
addAll(mapping.functions) // Add functions of mapping to curent function pool
addSummaryQScript(mapping)
})
override def addJobs = {
super.addJobs()
variantcalling.foreach(vc => {
vc.sampleId = Some(sampleId)
......@@ -241,59 +86,16 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions {
}
}
/** This will add jobs for the double preprocessing */
protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
if (input == Nil) None
else if (input.tail == Nil) {
val bamFile = new File(sampleDir, s"$sampleId.bam")
val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
val newIndex: File = new File(sampleDir, s"$sampleId.bai")
val baiLn = Ln(qscript, oldIndex, newIndex)
add(baiLn)
val bamLn = Ln(qscript, input.head, bamFile)
bamLn.deps :+= baiLn.output
add(bamLn)
Some(bamFile)
} else {
val md = new MarkDuplicates(qscript)
md.input = input
md.output = new File(sampleDir, sampleId + ".dedup.bam")
md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics")
//FIXME: making this file intermediate make the pipeline restart unnessery jobs
//md.isIntermediate = isIntermediate
add(md)
addSummarizable(md, "mark_duplicates")
Some(md.output)
}
}
lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => {
(lib._2.bamFile, lib._2.preProcessBam) match {
case (_, Some(file)) => Some(file)
case (Some(file), _) => Some(file)
case _ => None
}
}).toList)
lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Some(makeVariantcalling(multisample = false))
} else None
/** This will add sample jobs */
def addJobs(): Unit = {
addPerLibJobs()
override def addJobs(): Unit = {
super.addJobs()
preProcessBam.foreach { bam =>
val bamMetrics = new BamMetrics(qscript)
bamMetrics.sampleId = Some(sampleId)
bamMetrics.inputBam = bam
bamMetrics.outputDir = new File(sampleDir, "metrics")
bamMetrics.init()
bamMetrics.biopetScript()
addAll(bamMetrics.functions)
addSummaryQScript(bamMetrics)
variantcalling.foreach(vc => {
vc.sampleId = Some(sampleId)
vc.outputDir = new File(sampleDir, "variantcalling")
......@@ -321,7 +123,9 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions {
} else None
/** This will add the mutisample variantcalling */
def addMultiSampleJobs(): Unit = {
override def addMultiSampleJobs = {
super.addMultiSampleJobs()
multisampleVariantCalling.foreach(vc => {
vc.outputDir = new File(outputDir, "variantcalling")
vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
......@@ -351,11 +155,10 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions {
}
/** Location of summary file */
def summaryFile = new File(outputDir, "Shiva.summary.json")
override def summaryFile = new File(outputDir, "Shiva.summary.json")
/** Settings of pipeline for summary */
def summarySettings = Map(
"reference" -> referenceSummary,
override def summarySettings = super.summarySettings ++ Map(
"annotation" -> annotation.isDefined,
"multisample_variantcalling" -> multisampleVariantCalling.isDefined,
"sv_calling" -> svCalling.isDefined,
......@@ -364,5 +167,5 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions {
)
/** Files for the summary */
def summaryFiles = Map("referenceFasta" -> referenceFasta())
override def summaryFiles = super.summaryFiles ++ Map("referenceFasta" -> referenceFasta())
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment