Commit a76b4665 authored by Peter van 't Hof's avatar Peter van 't Hof

Adding possible to skip PrintReads

parent 5fafdb64
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import java.util
import org.broadinstitute.gatk.engine.recalibration.BQSRGatherer
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Created by pjvanthof on 05/04/2017.
*/
class BqsrGather extends InProcessFunction {
@Input(required = true)
var inputBqsrFiles: List[File] = _
@Output(required = true)
var outputBqsrFile: File = _
def run(): Unit = {
val l = new util.ArrayList[File]()
inputBqsrFiles.foreach(l.add(_))
val gather = new BQSRGatherer
gather.gather(l, outputBqsrFile)
}
}
......@@ -38,8 +38,8 @@ import org.broadinstitute.gatk.queue.QScript
import scala.math._
/**
* This pipeline doing a alignment to a given reference genome
*/
* This pipeline doing a alignment to a given reference genome
*/
class Mapping(val parent: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
def this() = this(null)
......@@ -99,9 +99,9 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
protected var paired: Boolean = false
val flexiprep = new Flexiprep(this)
def finalBamFile: File = if (skipMarkduplicates) {
new File(outputDir, outputName + ".bam")
} else new File(outputDir, outputName + ".dedup.bam")
def mergedBamFile = new File(outputDir, outputName + ".bam")
def finalBamFile: File = if (skipMarkduplicates) mergedBamFile
else new File(outputDir, outputName + ".dedup.bam")
override def defaults: Map[String, Any] = Map(
"gsnap" -> Map("batch" -> 4),
......@@ -260,21 +260,25 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
}
var bamFile = bamFiles.head
if (!chunking) require(bamFile == mergedBamFile)
else {
val mergeSamFile = MergeSamFiles(this, bamFiles, mergedBamFile)
mergeSamFile.isIntermediate = !keepFinalBamFile || !skipMarkduplicates
add(mergeSamFile)
bamFile = mergeSamFile.output
}
if (!skipMarkduplicates) {
bamFile = new File(outputDir, outputName + ".dedup.bam")
val md = MarkDuplicates(this, bamFiles, finalBamFile)
md.isIntermediate = !keepFinalBamFile
add(md)
addSummarizable(md, "mark_duplicates")
} else if (skipMarkduplicates && chunking) {
val mergeSamFile = MergeSamFiles(this, bamFiles, finalBamFile)
mergeSamFile.isIntermediate = !keepFinalBamFile
add(mergeSamFile)
bamFile = mergeSamFile.output
}
if (!skipMetrics) {
val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"), sampleId, libId)
val bamMetrics = BamMetrics(this, finalBamFile, new File(outputDir, "metrics"), sampleId, libId)
addAll(bamMetrics.functions)
addSummaryQScript(bamMetrics)
}
......
......@@ -51,7 +51,9 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
)
/** Method to make the variantcalling namespace of shiva */
def makeVariantcalling(multisample: Boolean, sample: Option[String] = None, library: Option[String] = None): ShivaVariantcalling with QScript = {
def makeVariantcalling(multisample: Boolean,
sample: Option[String] = None,
library: Option[String] = None): ShivaVariantcalling with QScript = {
if (multisample) new ShivaVariantcalling(qscript) {
override def namePrefix = "multisample"
override def configNamespace: String = "shivavariantcalling"
......@@ -79,7 +81,10 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
/** Class to generate jobs for a library */
class Library(libId: String) extends super.Library(libId) {
override def summaryFiles = super.summaryFiles ++ variantcalling.map("final" -> _.finalFile)
override def summaryFiles = super.summaryFiles ++
variantcalling.map("final" -> _.finalFile) ++
bqsrFile.map("baserecal" -> _) ++
bqsrAfterFile.map("baserecal_after" -> _)
lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
lazy val useBaseRecalibration: Boolean = {
......@@ -89,20 +94,30 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
c && br.knownSites.nonEmpty
}
lazy val usePrintReads: Boolean = if (useBaseRecalibration) config("use_printreads", default = true) else false
lazy val useAnalyzeCovariates: Boolean = if (useBaseRecalibration) config("use_analyze_covariates", default = true) else false
lazy val bqsrFile: Option[File] = if (useBaseRecalibration) Some(createFile(".baserecal")) else None
lazy val bqsrAfterFile: Option[File] = if (useAnalyzeCovariates) Some(createFile(".baserecal.after")) else None
override def keepFinalBamfile = super.keepFinalBamfile && !useIndelRealigner && !useBaseRecalibration
override def preProcessBam = if (useIndelRealigner && useBaseRecalibration)
override def bamFile = mapping.map(_.mergedBamFile)
override def preProcessBam = if (useIndelRealigner && usePrintReads)
bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
else if (useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
else if (usePrintReads) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
else bamFile
/** Library specific settings */
override def summarySettings = Map(
override def summarySettings = super.summarySettings ++ Map(
"library_variantcalling" -> variantcalling.isDefined,
"use_indel_realigner" -> useIndelRealigner,
"use_base_recalibration" -> useBaseRecalibration)
"use_base_recalibration" -> useBaseRecalibration,
"use_print_reads" -> usePrintReads,
"useAnalyze_covariates" -> useAnalyzeCovariates
)
lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
(bamFile.isDefined || preProcessBam.isDefined)) {
......@@ -115,11 +130,11 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
if (useIndelRealigner && useBaseRecalibration) {
val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
addBaseRecalibrator(file, libDir, libraries.size > 1)
addBaseRecalibrator(file, libDir, libraries.size > 1, usePrintReads)
} else if (useIndelRealigner) {
addIndelRealign(bamFile.get, libDir, libraries.size > 1)
} else if (useBaseRecalibration) {
addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1)
addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1, usePrintReads)
}
variantcalling.foreach(vc => {
......@@ -131,6 +146,41 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
add(vc)
})
}
/** Adds base recalibration jobs */
def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean, usePrintreads: Boolean): File = {
val baseRecalibrator = BaseRecalibrator(qscript, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal"))
if (baseRecalibrator.knownSites.isEmpty) return inputBam
add(baseRecalibrator)
val baseRecalibratorAfter = BaseRecalibrator(qscript, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after"))
baseRecalibratorAfter.BQSR = Some(baseRecalibrator.out)
if (useAnalyzeCovariates) {
add(baseRecalibratorAfter)
add(AnalyzeCovariates(qscript, baseRecalibrator.out, baseRecalibratorAfter.out, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
}
val printReads = PrintReads(qscript, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
printReads.BQSR = Some(baseRecalibrator.out)
printReads.isIntermediate = isIntermediate
if (usePrintreads) {
add(printReads)
printReads.out
} else inputBam
}
} // end of library
lazy val bqsrFile: Option[File] = {
val files = libraries.flatMap(_._2.bqsrFile).toList
if (files.isEmpty) None else {
val gather = new BqsrGather
gather.inputBqsrFiles = files
gather.outputBqsrFile = createFile("baserecal")
add(gather)
Some(gather.outputBqsrFile)
}
}
lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
......@@ -164,7 +214,7 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
})
}
}
}
} // End of sample
lazy val multisampleVariantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Some(makeVariantcalling(multisample = true))
......@@ -192,6 +242,7 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
multisampleVariantCalling.foreach(vc => {
vc.outputDir = new File(outputDir, "variantcalling")
vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
vc.inputBqsrFiles = samples.flatMap { case (sampleId, sample) => sample.bqsrFile.map(sampleId -> _) }
add(vc)
annotation.foreach { toucan =>
......@@ -237,28 +288,6 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
indelRealigner.out
}
/** Adds base recalibration jobs */
def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean): File = {
val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal"))
if (baseRecalibrator.knownSites.isEmpty) return inputBam
add(baseRecalibrator)
if (config("use_analyze_covariates", default = true).asBoolean) {
val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after"))
baseRecalibratorAfter.BQSR = Some(baseRecalibrator.out)
add(baseRecalibratorAfter)
add(AnalyzeCovariates(this, baseRecalibrator.out, baseRecalibratorAfter.out, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
}
val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
printReads.BQSR = Some(baseRecalibrator.out)
printReads.isIntermediate = isIntermediate
add(printReads)
printReads.out
}
}
/** This object give a default main method to the pipelines */
......
......@@ -46,6 +46,8 @@ class ShivaVariantcalling(val parent: Configurable) extends QScript
var inputBams: Map[String, File] = Map()
var inputBqsrFiles: Map[String, File] = Map()
/** Executed before script */
def init(): Unit = {
if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
......
......@@ -20,6 +20,7 @@
package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
import nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.extensions.gatk.BqsrGather
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Default mode for the haplotypecaller */
......@@ -29,6 +30,13 @@ class HaplotypeCaller(val parent: Configurable) extends Variantcaller {
def biopetScript() {
val hc = gatk.HaplotypeCaller(this, inputBams.values.toList, outputFile)
hc.BQSR = if (inputBqsrFiles.isEmpty) None else {
val gather = new BqsrGather
gather.inputBqsrFiles = inputBqsrFiles.values.toList
gather.outputBqsrFile = new File(outputDir, "bqsr.merge")
add(gather)
Some(gather.outputBqsrFile)
}
add(hc)
}
}
......
......@@ -44,6 +44,7 @@ class HaplotypeCallerGvcf(val parent: Configurable) extends Variantcaller {
def biopetScript() {
gVcfFiles = for ((sample, inputBam) <- inputBams) yield {
val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".gvcf.vcf.gz"))
hc.BQSR = inputBqsrFiles.get(sample)
add(hc)
sample -> hc.out
}
......
......@@ -20,6 +20,7 @@
package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
import nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.extensions.gatk.BqsrGather
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Default mode for UnifiedGenotyper */
......@@ -29,6 +30,13 @@ class UnifiedGenotyper(val parent: Configurable) extends Variantcaller {
def biopetScript() {
val ug = gatk.UnifiedGenotyper(this, inputBams.values.toList, outputFile)
ug.BQSR = if (inputBqsrFiles.isEmpty) None else {
val gather = new BqsrGather
gather.inputBqsrFiles = inputBqsrFiles.values.toList
gather.outputBqsrFile = new File(outputDir, "bqsr.merge")
add(gather)
Some(gather.outputBqsrFile)
}
add(ug)
}
}
......@@ -33,6 +33,7 @@ trait Variantcaller extends QScript with BiopetQScript with Reference {
* Map of samplename -> (preprocessed) bam file
*/
var inputBams: Map[String, File] = _
var inputBqsrFiles: Map[String, File] = _
def init() = {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment