Commit 19f463b7 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub

Merge pull request #58 from biopet/fix-BIOPET-635

Option to skip printreads and give bqsr files to the variantcalller
parents af88febc c37b0388
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import java.util
import org.broadinstitute.gatk.engine.recalibration.BQSRGatherer
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Created by pjvanthof on 05/04/2017.
*/
class BqsrGather extends InProcessFunction {
@Input(required = true)
var inputBqsrFiles: List[File] = _
@Output(required = true)
var outputBqsrFile: File = _
def run(): Unit = {
val l = new util.ArrayList[File]()
inputBqsrFiles.foreach(l.add(_))
val gather = new BQSRGatherer
gather.gather(l, outputBqsrFile)
}
}
......@@ -37,7 +37,9 @@ import org.broadinstitute.gatk.queue.QScript
import scala.math._
// TODO: documentation
/**
* This pipeline doing a alignment to a given reference genome
*/
class Mapping(val parent: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
def this() = this(null)
......@@ -79,7 +81,7 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
protected var platform: String = config("platform", default = "illumina")
/** Readgroup platform unit */
protected var platformUnit: String = config("platform_unit", default = "na")
protected var platformUnit: Option[String] = config("platform_unit")
/** Readgroup sequencing center */
protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
......@@ -97,9 +99,9 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
protected var paired: Boolean = false
val flexiprep = new Flexiprep(this)
def finalBamFile: File = if (skipMarkduplicates) {
new File(outputDir, outputName + ".bam")
} else new File(outputDir, outputName + ".dedup.bam")
def mergedBamFile = new File(outputDir, outputName + ".bam")
def finalBamFile: File = if (skipMarkduplicates) mergedBamFile
else new File(outputDir, outputName + ".dedup.bam")
override def defaults: Map[String, Any] = Map(
"gsnap" -> Map("batch" -> 4),
......@@ -258,21 +260,25 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
}
var bamFile = bamFiles.head
if (!chunking) require(bamFile == mergedBamFile)
else {
val mergeSamFile = MergeSamFiles(this, bamFiles, mergedBamFile)
mergeSamFile.isIntermediate = !keepFinalBamFile || !skipMarkduplicates
add(mergeSamFile)
bamFile = mergeSamFile.output
}
if (!skipMarkduplicates) {
bamFile = new File(outputDir, outputName + ".dedup.bam")
val md = MarkDuplicates(this, bamFiles, finalBamFile)
val md = MarkDuplicates(this, mergedBamFile :: Nil, finalBamFile)
md.isIntermediate = !keepFinalBamFile
add(md)
addSummarizable(md, "mark_duplicates")
} else if (skipMarkduplicates && chunking) {
val mergeSamFile = MergeSamFiles(this, bamFiles, finalBamFile)
mergeSamFile.isIntermediate = !keepFinalBamFile
add(mergeSamFile)
bamFile = mergeSamFile.output
}
if (!skipMetrics) {
val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"), sampleId, libId)
val bamMetrics = BamMetrics(this, finalBamFile, new File(outputDir, "metrics"), sampleId, libId)
addAll(bamMetrics.functions)
addSummaryQScript(bamMetrics)
}
......@@ -379,7 +385,7 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
hisat2.R2 = R2
hisat2.rgId = Some(readgroupId)
hisat2.rg +:= s"PL:$platform"
hisat2.rg +:= s"PU:$platformUnit"
platformUnit.foreach(x => hisat2.rg +:= s"PU:$x")
libId match {
case Some(id) => hisat2.rg +:= s"LB:$id"
case otherwise => ;
......@@ -452,7 +458,7 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
RG += "SM:" + sampleId.get + ","
RG += "LB:" + libId.get + ","
if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
RG += "PU:" + platformUnit + ","
platformUnit.foreach(x => RG += "PU:" + x + ",")
if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
......@@ -497,7 +503,7 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
bowtie2.rgId = Some(readgroupId)
bowtie2.rg +:= ("LB:" + libId.get)
bowtie2.rg +:= ("PL:" + platform)
bowtie2.rg +:= ("PU:" + platformUnit)
platformUnit.foreach(x => bowtie2.rg +:= ("PU:" + x))
bowtie2.rg +:= ("SM:" + sampleId.get)
bowtie2.R1 = R1
bowtie2.R2 = R2
......@@ -554,7 +560,7 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
addOrReplaceReadGroups.RGID = readgroupId
addOrReplaceReadGroups.RGLB = libId.get
addOrReplaceReadGroups.RGPL = platform
addOrReplaceReadGroups.RGPU = platformUnit
addOrReplaceReadGroups.RGPU = platformUnit.getOrElse(readgroupId)
addOrReplaceReadGroups.RGSM = sampleId.get
if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
......@@ -568,7 +574,7 @@ class Mapping(val parent: Configurable) extends QScript with SummaryQScript with
var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
RG += "LB:" + libId.get + "\\t"
RG += "PL:" + platform + "\\t"
RG += "PU:" + platformUnit + "\\t"
platformUnit.foreach(x => RG += "PU:" + x + "\\t")
RG += "SM:" + sampleId.get + "\\t"
if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
......
......@@ -50,8 +50,12 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
"unifiedgenotyper" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0)
)
lazy val usePrintReads: Boolean = config("use_printreads", default = true)
/** Method to make the variantcalling namespace of shiva */
def makeVariantcalling(multisample: Boolean, sample: Option[String] = None, library: Option[String] = None): ShivaVariantcalling with QScript = {
def makeVariantcalling(multisample: Boolean,
sample: Option[String] = None,
library: Option[String] = None): ShivaVariantcalling with QScript = {
if (multisample) new ShivaVariantcalling(qscript) {
override def namePrefix = "multisample"
override def configNamespace: String = "shivavariantcalling"
......@@ -79,7 +83,10 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
/** Class to generate jobs for a library */
class Library(libId: String) extends super.Library(libId) {
override def summaryFiles = super.summaryFiles ++ variantcalling.map("final" -> _.finalFile)
override def summaryFiles = super.summaryFiles ++
variantcalling.map("final" -> _.finalFile) ++
bqsrFile.map("baserecal" -> _) ++
bqsrAfterFile.map("baserecal_after" -> _)
lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
lazy val useBaseRecalibration: Boolean = {
......@@ -89,20 +96,28 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
c && br.knownSites.nonEmpty
}
lazy val useAnalyzeCovariates: Boolean = if (useBaseRecalibration) config("use_analyze_covariates", default = true) else false
lazy val bqsrFile: Option[File] = if (useBaseRecalibration) Some(createFile("baserecal")) else None
lazy val bqsrAfterFile: Option[File] = if (useAnalyzeCovariates) Some(createFile("baserecal.after")) else None
override def keepFinalBamfile = super.keepFinalBamfile && !useIndelRealigner && !useBaseRecalibration
override def preProcessBam = if (useIndelRealigner && useBaseRecalibration)
override def bamFile = mapping.map(_.mergedBamFile)
override def preProcessBam = if (useIndelRealigner && usePrintReads)
bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
else if (useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
else if (usePrintReads) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
else bamFile
/** Library specific settings */
override def summarySettings = Map(
override def summarySettings = super.summarySettings ++ Map(
"library_variantcalling" -> variantcalling.isDefined,
"use_indel_realigner" -> useIndelRealigner,
"use_base_recalibration" -> useBaseRecalibration)
"use_base_recalibration" -> useBaseRecalibration,
"useAnalyze_covariates" -> useAnalyzeCovariates
)
lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
(bamFile.isDefined || preProcessBam.isDefined)) {
......@@ -115,11 +130,11 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
if (useIndelRealigner && useBaseRecalibration) {
val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
addBaseRecalibrator(file, libDir, libraries.size > 1)
addBaseRecalibrator(file, libDir, libraries.size > 1, usePrintReads)
} else if (useIndelRealigner) {
addIndelRealign(bamFile.get, libDir, libraries.size > 1)
} else if (useBaseRecalibration) {
addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1)
addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1, usePrintReads)
}
variantcalling.foreach(vc => {
......@@ -131,6 +146,41 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
add(vc)
})
}
/** Adds base recalibration jobs */
def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean, usePrintreads: Boolean): File = {
require(bqsrFile.isDefined, "bqsrFile should contain something at this point")
val baseRecalibrator = BaseRecalibrator(qscript, inputBam, bqsrFile.get) // at this point bqsrFile should exist
if (baseRecalibrator.knownSites.isEmpty) return inputBam
add(baseRecalibrator)
if (useAnalyzeCovariates) {
val baseRecalibratorAfter = BaseRecalibrator(qscript, inputBam, bqsrAfterFile.get)
baseRecalibratorAfter.BQSR = bqsrFile
add(baseRecalibratorAfter)
add(AnalyzeCovariates(qscript, baseRecalibrator.out, baseRecalibratorAfter.out, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
}
if (usePrintreads) {
val printReads = PrintReads(qscript, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
printReads.BQSR = Some(baseRecalibrator.out)
printReads.isIntermediate = isIntermediate
add(printReads)
printReads.out
} else inputBam
}
} // end of library
lazy val bqsrFile: Option[File] = {
val files = libraries.flatMap(_._2.bqsrFile).toList
if (files.isEmpty) None else {
val gather = new BqsrGather
gather.inputBqsrFiles = files
gather.outputBqsrFile = createFile("baserecal")
add(gather)
Some(gather.outputBqsrFile)
}
}
lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
......@@ -164,7 +214,7 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
})
}
}
}
} // End of sample
lazy val multisampleVariantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Some(makeVariantcalling(multisample = true))
......@@ -192,7 +242,17 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
multisampleVariantCalling.foreach(vc => {
vc.outputDir = new File(outputDir, "variantcalling")
vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
if (!usePrintReads)
vc.inputBqsrFiles = samples.flatMap { case (sampleId, sample) => sample.bqsrFile.map(sampleId -> _) }
add(vc)
if (!usePrintReads) {
import variantcallers._
if (vc.callers.exists(_ match {
case _: HaplotypeCaller | _: HaplotypeCallerAllele | _: HaplotypeCallerGvcf => false
case _: UnifiedGenotyper | _: UnifiedGenotyperAllele => false
case _ => true
})) logger.warn("Not all variantcallers chosen can read BQSR files, All non-GATK")
}
annotation.foreach { toucan =>
toucan.outputDir = new File(outputDir, "annotation")
......@@ -221,7 +281,8 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
"sv_calling" -> svCalling.isDefined,
"cnv_calling" -> cnvCalling.isDefined,
"regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
"amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
"amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")),
"use_print_reads" -> usePrintReads
)
/** Adds indel realignment jobs */
......@@ -237,28 +298,6 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
indelRealigner.out
}
/** Adds base recalibration jobs */
def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean): File = {
val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal"))
if (baseRecalibrator.knownSites.isEmpty) return inputBam
add(baseRecalibrator)
if (config("use_analyze_covariates", default = true).asBoolean) {
val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after"))
baseRecalibratorAfter.BQSR = Some(baseRecalibrator.out)
add(baseRecalibratorAfter)
add(AnalyzeCovariates(this, baseRecalibrator.out, baseRecalibratorAfter.out, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
}
val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
printReads.BQSR = Some(baseRecalibrator.out)
printReads.isIntermediate = isIntermediate
add(printReads)
printReads.out
}
}
/** This object give a default main method to the pipelines */
......
......@@ -46,6 +46,8 @@ class ShivaVariantcalling(val parent: Configurable) extends QScript
var inputBams: Map[String, File] = Map()
var inputBqsrFiles: Map[String, File] = Map()
/** Executed before script */
def init(): Unit = {
if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
......@@ -95,6 +97,7 @@ class ShivaVariantcalling(val parent: Configurable) extends QScript
cv.rod_priority_list = Some(callers.filter(_.mergeVcfResults).map(_.name).mkString(","))
for (caller <- callers) {
caller.inputBams = inputBams
caller.inputBqsrFiles = inputBqsrFiles
caller.namePrefix = namePrefix
caller.outputDir = new File(outputDir, caller.name)
add(caller)
......
......@@ -20,6 +20,7 @@
package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
import nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.extensions.gatk.BqsrGather
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Default mode for the haplotypecaller */
......@@ -29,6 +30,13 @@ class HaplotypeCaller(val parent: Configurable) extends Variantcaller {
def biopetScript() {
val hc = gatk.HaplotypeCaller(this, inputBams.values.toList, outputFile)
hc.BQSR = if (inputBqsrFiles.isEmpty) None else {
val gather = new BqsrGather
gather.inputBqsrFiles = inputBqsrFiles.values.toList
gather.outputBqsrFile = new File(outputDir, "bqsr.merge")
add(gather)
Some(gather.outputBqsrFile)
}
add(hc)
}
}
......
......@@ -20,7 +20,7 @@
package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
import nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.extensions.gatk.BqsrGather
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Allele mode for Haplotypecaller */
......@@ -33,6 +33,13 @@ class HaplotypeCallerAllele(val parent: Configurable) extends Variantcaller {
def biopetScript() {
val hc = gatk.HaplotypeCaller(this, inputBams.values.toList, outputFile)
hc.alleles = Some(alleles)
hc.BQSR = if (inputBqsrFiles.isEmpty) None else {
val gather = new BqsrGather
gather.inputBqsrFiles = inputBqsrFiles.values.toList
gather.outputBqsrFile = new File(outputDir, "bqsr.merge")
add(gather)
Some(gather.outputBqsrFile)
}
hc.genotyping_mode = Some("GENOTYPE_GIVEN_ALLELES")
add(hc)
}
......
......@@ -44,6 +44,7 @@ class HaplotypeCallerGvcf(val parent: Configurable) extends Variantcaller {
def biopetScript() {
gVcfFiles = for ((sample, inputBam) <- inputBams) yield {
val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".gvcf.vcf.gz"))
hc.BQSR = inputBqsrFiles.get(sample)
add(hc)
sample -> hc.out
}
......
......@@ -20,6 +20,7 @@
package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
import nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.extensions.gatk.BqsrGather
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Default mode for UnifiedGenotyper */
......@@ -29,6 +30,13 @@ class UnifiedGenotyper(val parent: Configurable) extends Variantcaller {
def biopetScript() {
val ug = gatk.UnifiedGenotyper(this, inputBams.values.toList, outputFile)
ug.BQSR = if (inputBqsrFiles.isEmpty) None else {
val gather = new BqsrGather
gather.inputBqsrFiles = inputBqsrFiles.values.toList
gather.outputBqsrFile = new File(outputDir, "bqsr.merge")
add(gather)
Some(gather.outputBqsrFile)
}
add(ug)
}
}
......@@ -20,6 +20,7 @@
package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
import nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.extensions.gatk.BqsrGather
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Allele mode for GenotyperAllele */
......@@ -33,6 +34,13 @@ class UnifiedGenotyperAllele(val parent: Configurable) extends Variantcaller {
val ug = gatk.UnifiedGenotyper(this, inputBams.values.toList, outputFile)
ug.alleles = Some(alleles)
ug.genotyping_mode = Some("GENOTYPE_GIVEN_ALLELES")
ug.BQSR = if (inputBqsrFiles.isEmpty) None else {
val gather = new BqsrGather
gather.inputBqsrFiles = inputBqsrFiles.values.toList
gather.outputBqsrFile = new File(outputDir, "bqsr.merge")
add(gather)
Some(gather.outputBqsrFile)
}
add(ug)
}
}
......@@ -33,6 +33,7 @@ trait Variantcaller extends QScript with BiopetQScript with Reference {
* Map of samplename -> (preprocessed) bam file
*/
var inputBams: Map[String, File] = _
var inputBqsrFiles: Map[String, File] = Map()
def init() = {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment