Commit b1e56b71 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Switch shiva to multisample mapping

parent 31a835a1
...@@ -56,19 +56,21 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait { ...@@ -56,19 +56,21 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait {
("use_indel_realigner" -> useIndelRealigner) + ("use_indel_realigner" -> useIndelRealigner) +
("use_base_recalibration" -> useBaseRecalibration) ("use_base_recalibration" -> useBaseRecalibration)
/** This will adds preprocess steps, gatk indel realignment and base recalibration is included here */ override def preProcessBam = if (useIndelRealigner && useBaseRecalibration)
override def preProcess(input: File): Option[File] = { bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
if (!useIndelRealigner && !useBaseRecalibration) None else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
else { else if (useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
val indelRealignFile = useIndelRealigner match { else bamFile
case true => addIndelRealign(input, libDir, useBaseRecalibration || libraries.size > 1)
case false => input override def addJobs(): Unit = {
} super.addJobs()
if (useIndelRealigner && useBaseRecalibration) {
useBaseRecalibration match { val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
case true => Some(addBaseRecalibrator(indelRealignFile, libDir, libraries.size > 1)) addBaseRecalibrator(file, libDir, libraries.size > 1)
case false => Some(indelRealignFile) } else if (useIndelRealigner) {
} addIndelRealign(bamFile.get, libDir, libraries.size > 1)
} else if (useBaseRecalibration) {
addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1)
} }
} }
} }
...@@ -77,15 +79,15 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait { ...@@ -77,15 +79,15 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait {
lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true) lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
/** This methods will add double preprocess steps, with GATK indel realignment */ override def preProcessBam = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
override protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = { bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam"))
if (input.size <= 1) super.addDoublePreProcess(input) } else bamFile
else super.addDoublePreProcess(input, isIntermediate = useIndelRealigner).collect {
case file => override def addJobs(): Unit = {
useIndelRealigner match { super.addJobs()
case true => addIndelRealign(file, sampleDir, isIntermediate = false)
case false => file if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
} addIndelRealign(bamFile.get, sampleDir, libraries.size > 1)
} }
} }
} }
......
...@@ -8,7 +8,6 @@ import nl.lumc.sasc.biopet.extensions.Ln ...@@ -8,7 +8,6 @@ import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.picard.{MarkDuplicates, MergeSamFiles, AddOrReplaceReadGroups, SamToFastq} import nl.lumc.sasc.biopet.extensions.picard.{MarkDuplicates, MergeSamFiles, AddOrReplaceReadGroups, SamToFastq}
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.utils.Logging import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
import MultisampleMapping.MergeStrategy import MultisampleMapping.MergeStrategy
...@@ -18,10 +17,10 @@ import scala.collection.JavaConversions._ ...@@ -18,10 +17,10 @@ import scala.collection.JavaConversions._
/** /**
* Created by pjvanthof on 18/12/15. * Created by pjvanthof on 18/12/15.
*/ */
class MultisampleMapping(val root: Configurable) extends QScript trait MultisampleMapping extends QScript
with MultiSampleQScript with MultiSampleQScript
with Reference { qscript => with Reference { qscript =>
def this() = this(null) //def this() = this(null)
def mergeStrategy: MergeStrategy.Value = { def mergeStrategy: MergeStrategy.Value = {
val value: String = config("merge_strategy", default = "preprocessmarkduplicates") val value: String = config("merge_strategy", default = "preprocessmarkduplicates")
...@@ -31,23 +30,25 @@ class MultisampleMapping(val root: Configurable) extends QScript ...@@ -31,23 +30,25 @@ class MultisampleMapping(val root: Configurable) extends QScript
} }
} }
def init: Unit = { def init(): Unit = {
} }
def biopetScript: Unit = { def biopetScript(): Unit = {
addSamplesJobs() addSamplesJobs()
addSummaryJobs() addSummaryJobs()
} }
def addMultiSampleJobs: Unit = { def addMultiSampleJobs(): Unit = {
// this code will be executed after all code of all samples is executed // this code will be executed after all code of all samples is executed
} }
def summaryFile: File = new File(outputDir, "MultisamplePipeline.summary.json") def summaryFile: File = new File(outputDir, "MultisamplePipeline.summary.json")
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map("referenceFasta" -> referenceFasta())
def summarySettings: Map[String, Any] = Map("merge_strategy" -> mergeStrategy.toString) def summarySettings: Map[String, Any] = Map(
"reference" -> referenceSummary,
"merge_strategy" -> mergeStrategy.toString)
def makeSample(id: String) = new Sample(id) def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) { class Sample(sampleId: String) extends AbstractSample(sampleId) {
...@@ -81,7 +82,7 @@ class MultisampleMapping(val root: Configurable) extends QScript ...@@ -81,7 +82,7 @@ class MultisampleMapping(val root: Configurable) extends QScript
def preProcessBam = bamFile def preProcessBam = bamFile
def addJobs: Unit = { def addJobs(): Unit = {
if (inputR1.isDefined) { if (inputR1.isDefined) {
mapping.foreach { m => mapping.foreach { m =>
m.input_R1 = inputR1.get m.input_R1 = inputR1.get
...@@ -179,7 +180,7 @@ class MultisampleMapping(val root: Configurable) extends QScript ...@@ -179,7 +180,7 @@ class MultisampleMapping(val root: Configurable) extends QScript
case _ => throw new IllegalStateException("This should not be possible, unimplemented MergeStrategy?") case _ => throw new IllegalStateException("This should not be possible, unimplemented MergeStrategy?")
} }
if (mergeStrategy != None && libraries.flatMap(_._2.bamFile).nonEmpty) { if (mergeStrategy != MergeStrategy.None && libraries.flatMap(_._2.bamFile).nonEmpty) {
val bamMetrics = new BamMetrics(qscript) val bamMetrics = new BamMetrics(qscript)
bamMetrics.sampleId = Some(sampleId) bamMetrics.sampleId = Some(sampleId)
bamMetrics.inputBam = preProcessBam.get bamMetrics.inputBam = preProcessBam.get
......
...@@ -15,35 +15,18 @@ ...@@ -15,35 +15,18 @@
*/ */
package nl.lumc.sasc.biopet.pipelines.shiva package nl.lumc.sasc.biopet.pipelines.shiva
import htsjdk.samtools.SamReaderFactory import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference } import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMapping
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq }
import nl.lumc.sasc.biopet.pipelines.bammetrics.{ TargetRegions, BamMetrics }
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
import scala.collection.JavaConversions._
/** /**
* This is a trait for the Shiva pipeline * This is a trait for the Shiva pipeline
* *
* Created by pjvan_thof on 2/26/15. * Created by pjvan_thof on 2/26/15.
*/ */
trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { qscript: QScript => trait ShivaTrait extends MultisampleMapping with Reference with TargetRegions { qscript: QScript =>
/** Executed before running the script */
def init(): Unit = {
}
/** Method to add jobs */
def biopetScript(): Unit = {
addSamplesJobs()
addSummaryJobs()
}
override def reportClass = { override def reportClass = {
val shiva = new ShivaReport(this) val shiva = new ShivaReport(this)
...@@ -65,167 +48,29 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { ...@@ -65,167 +48,29 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions {
} }
/** Method to make a sample */ /** Method to make a sample */
def makeSample(id: String) = new Sample(id) override def makeSample(id: String) = new this.Sample(id)
/** Class that will generate jobs for a sample */ /** Class that will generate jobs for a sample */
class Sample(sampleId: String) extends AbstractSample(sampleId) { class Sample(sampleId: String) extends super.Sample(sampleId) {
/** Sample specific files to add to summary */
def summaryFiles: Map[String, File] = {
preProcessBam match {
case Some(b) => Map("preProcessBam" -> b)
case _ => Map()
}
}
/** Sample specific stats to add to summary */
def summaryStats: Map[String, Any] = Map()
/** Method to make a library */ /** Method to make a library */
def makeLibrary(id: String) = new Library(id) override def makeLibrary(id: String) = new this.Library(id)
/** Sample specific settings */ /** Sample specific settings */
override def summarySettings = Map("single_sample_variantcalling" -> variantcalling.isDefined) override def summarySettings = Map("single_sample_variantcalling" -> variantcalling.isDefined)
/** Class to generate jobs for a library */ /** Class to generate jobs for a library */
class Library(libId: String) extends AbstractLibrary(libId) { class Library(libId: String) extends super.Library(libId) {
/** Library specific files to add to the summary */
def summaryFiles: Map[String, File] = {
((bamFile, preProcessBam) match {
case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb)
case (Some(b), _) => Map("bamFile" -> b, "preProcessBam" -> b)
case _ => Map()
}) ++ (inputR1.map("input_R1" -> _) ::
inputR2.map("input_R2" -> _) ::
inputBam.map("input_bam" -> _) :: Nil).flatten.toMap
}
/** Library specific stats to add to summary */
def summaryStats: Map[String, Any] = Map()
/** Method to execute library preprocess */
def preProcess(input: File): Option[File] = None
/** Library specific settings */ /** Library specific settings */
override def summarySettings = Map("library_variantcalling" -> variantcalling.isDefined) override def summarySettings = Map("library_variantcalling" -> variantcalling.isDefined)
/** Method to make the mapping submodule */
def makeMapping = {
val mapping = new Mapping(qscript)
mapping.sampleId = Some(sampleId)
mapping.libId = Some(libId)
mapping.outputDir = libDir
mapping.outputName = sampleId + "-" + libId
(Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
}
def fileMustBeAbsulute(file: Option[File]): Option[File] = {
if (file.forall(_.isAbsolute)) file
else {
Logging.addError(s"$file for $sampleId / $libId should be a absolute file path")
file.map(_.getAbsoluteFile)
}
}
lazy val inputR1: Option[File] = fileMustBeAbsulute(config("R1"))
lazy val inputR2: Option[File] = fileMustBeAbsulute(config("R2"))
lazy val inputBam: Option[File] = fileMustBeAbsulute(if (inputR1.isEmpty) config("bam") else None)
lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
(inputR1.isDefined, inputBam.isDefined) match {
case (true, _) => makeMapping // Default starting from fastq files
case (false, true) => // Starting from bam file
config("bam_to_fastq", default = false).asBoolean match {
case true => makeMapping // bam file will be converted to fastq
case false =>
val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
(None, Some(file), preProcess(file))
}
case _ => (None, None, None)
}
lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean && lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
(bamFile.isDefined || preProcessBam.isDefined)) { (bamFile.isDefined || preProcessBam.isDefined)) {
Some(makeVariantcalling(multisample = false)) Some(makeVariantcalling(multisample = false))
} else None } else None
/** This will add jobs for this library */ /** This will add jobs for this library */
def addJobs(): Unit = { override def addJobs = {
(inputR1.isDefined, inputBam.isDefined) match { super.addJobs()
case (true, _) => mapping.foreach(mapping => {
mapping.input_R1 = inputR1.get
mapping.input_R2 = inputR2
inputFiles :+= new InputFile(mapping.input_R1, config("R1_md5"))
mapping.input_R2.foreach(inputFiles :+= new InputFile(_, config("R2_md5")))
})
case (false, true) => {
inputFiles :+= new InputFile(inputBam.get, config("bam_md5"))
config("bam_to_fastq", default = false).asBoolean match {
case true =>
val samToFastq = SamToFastq(qscript, inputBam.get,
new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"),
new File(libDir, sampleId + "-" + libId + ".R2.fq.gz"))
samToFastq.isIntermediate = true
qscript.add(samToFastq)
mapping.foreach(mapping => {
mapping.input_R1 = samToFastq.fastqR1
mapping.input_R2 = Some(samToFastq.fastqR2)
})
case false =>
val inputSam = SamReaderFactory.makeDefault.open(inputBam.get)
val readGroups = inputSam.getFileHeader.getReadGroups
val readGroupOke = readGroups.forall(readGroup => {
if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
readGroup.getSample == sampleId && readGroup.getLibrary == libId
})
inputSam.close()
if (!readGroupOke) {
if (config("correct_readgroups", default = false).asBoolean) {
logger.info("Correcting readgroups, file:" + inputBam.get)
val aorrg = AddOrReplaceReadGroups(qscript, inputBam.get, bamFile.get)
aorrg.RGID = sampleId + "-" + libId
aorrg.RGLB = libId
aorrg.RGSM = sampleId
aorrg.RGPL = "unknown"
aorrg.RGPU = "na"
aorrg.isIntermediate = true
qscript.add(aorrg)
} else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
"\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
} else {
val oldBamFile: File = inputBam.get
val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
val newIndex: File = new File(libDir, bamFile.get.getName.stripSuffix(".bam") + ".bai")
val baiLn = Ln(qscript, oldIndex, newIndex)
add(baiLn)
val bamLn = Ln(qscript, oldBamFile, bamFile.get)
bamLn.deps :+= baiLn.output
add(bamLn)
val bamMetrics = new BamMetrics(qscript)
bamMetrics.sampleId = Some(sampleId)
bamMetrics.libId = Some(libId)
bamMetrics.inputBam = bamFile.get
bamMetrics.outputDir = new File(libDir, "metrics")
bamMetrics.init()
bamMetrics.biopetScript()
addAll(bamMetrics.functions)
addSummaryQScript(bamMetrics)
}
}
}
case _ => logger.warn("Sample: " + sampleId + " Library: " + libId + ", no reads found")
}
mapping.foreach(mapping => {
mapping.init()
mapping.biopetScript()
addAll(mapping.functions) // Add functions of mapping to curent function pool
addSummaryQScript(mapping)
})
variantcalling.foreach(vc => { variantcalling.foreach(vc => {
vc.sampleId = Some(sampleId) vc.sampleId = Some(sampleId)
...@@ -241,59 +86,16 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { ...@@ -241,59 +86,16 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions {
} }
} }
/** This will add jobs for the double preprocessing */
protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
if (input == Nil) None
else if (input.tail == Nil) {
val bamFile = new File(sampleDir, s"$sampleId.bam")
val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
val newIndex: File = new File(sampleDir, s"$sampleId.bai")
val baiLn = Ln(qscript, oldIndex, newIndex)
add(baiLn)
val bamLn = Ln(qscript, input.head, bamFile)
bamLn.deps :+= baiLn.output
add(bamLn)
Some(bamFile)
} else {
val md = new MarkDuplicates(qscript)
md.input = input
md.output = new File(sampleDir, sampleId + ".dedup.bam")
md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics")
//FIXME: making this file intermediate make the pipeline restart unnessery jobs
//md.isIntermediate = isIntermediate
add(md)
addSummarizable(md, "mark_duplicates")
Some(md.output)
}
}
lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => {
(lib._2.bamFile, lib._2.preProcessBam) match {
case (_, Some(file)) => Some(file)
case (Some(file), _) => Some(file)
case _ => None
}
}).toList)
lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) { lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Some(makeVariantcalling(multisample = false)) Some(makeVariantcalling(multisample = false))
} else None } else None
/** This will add sample jobs */ /** This will add sample jobs */
def addJobs(): Unit = { override def addJobs(): Unit = {
addPerLibJobs() super.addJobs()
preProcessBam.foreach { bam => preProcessBam.foreach { bam =>
val bamMetrics = new BamMetrics(qscript)
bamMetrics.sampleId = Some(sampleId)
bamMetrics.inputBam = bam
bamMetrics.outputDir = new File(sampleDir, "metrics")
bamMetrics.init()
bamMetrics.biopetScript()
addAll(bamMetrics.functions)
addSummaryQScript(bamMetrics)
variantcalling.foreach(vc => { variantcalling.foreach(vc => {
vc.sampleId = Some(sampleId) vc.sampleId = Some(sampleId)
vc.outputDir = new File(sampleDir, "variantcalling") vc.outputDir = new File(sampleDir, "variantcalling")
...@@ -321,7 +123,9 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { ...@@ -321,7 +123,9 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions {
} else None } else None
/** This will add the mutisample variantcalling */ /** This will add the mutisample variantcalling */
def addMultiSampleJobs(): Unit = { override def addMultiSampleJobs = {
super.addMultiSampleJobs()
multisampleVariantCalling.foreach(vc => { multisampleVariantCalling.foreach(vc => {
vc.outputDir = new File(outputDir, "variantcalling") vc.outputDir = new File(outputDir, "variantcalling")
vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) } vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
...@@ -351,11 +155,10 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { ...@@ -351,11 +155,10 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions {
} }
/** Location of summary file */ /** Location of summary file */
def summaryFile = new File(outputDir, "Shiva.summary.json") override def summaryFile = new File(outputDir, "Shiva.summary.json")
/** Settings of pipeline for summary */ /** Settings of pipeline for summary */
def summarySettings = Map( override def summarySettings = super.summarySettings ++ Map(
"reference" -> referenceSummary,
"annotation" -> annotation.isDefined, "annotation" -> annotation.isDefined,
"multisample_variantcalling" -> multisampleVariantCalling.isDefined, "multisample_variantcalling" -> multisampleVariantCalling.isDefined,
"sv_calling" -> svCalling.isDefined, "sv_calling" -> svCalling.isDefined,
...@@ -364,5 +167,5 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { ...@@ -364,5 +167,5 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions {
) )
/** Files for the summary */ /** Files for the summary */
def summaryFiles = Map("referenceFasta" -> referenceFasta()) override def summaryFiles = super.summaryFiles ++ Map("referenceFasta" -> referenceFasta())
} }
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment