Commit 89b7a259 authored by Peter van 't Hof's avatar Peter van 't Hof

Split variantcalllers in separated files

parent bd7ca140
......@@ -31,6 +31,8 @@ class BiopetPipeline(val root: Configurable) extends QScript with SummaryQScript
// Executing a biopet pipeline inside
val shiva = new Shiva(this)
add(shiva)
shiva.init()
shiva.biopetScript()
addAll(shiva.functions)
......
......@@ -8,7 +8,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.broad._
import nl.lumc.sasc.biopet.pipelines.shiva.{ ShivaTrait, ShivaVariantcallingTrait }
import nl.lumc.sasc.biopet.pipelines.shiva.{ ShivaVariantcallingTrait, ShivaTrait }
import org.broadinstitute.gatk.queue.QScript
/**
......
......@@ -6,9 +6,9 @@
package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.broad.GenotypeGVCFs
import nl.lumc.sasc.biopet.pipelines.gatk.variantcallers._
import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcallingTrait
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
......@@ -22,99 +22,13 @@ class ShivaVariantcalling(val root: Configurable) extends QScript with ShivaVari
/** Will generate all available variantcallers */
override def callersList = {
new HaplotypeCallerGvcf ::
new HaplotypeCallerAllele ::
new UnifiedGenotyperAllele ::
new UnifiedGenotyper ::
new HaplotypeCaller ::
new HaplotypeCallerGvcf(this) ::
new HaplotypeCallerAllele(this) ::
new UnifiedGenotyperAllele(this) ::
new UnifiedGenotyper(this) ::
new HaplotypeCaller(this) ::
super.callersList
}
/** Default mode for the haplotypecaller */
class HaplotypeCaller extends Variantcaller {
val name = "haplotypecaller"
protected val defaultPrio = 1
def outputFile = new File(outputDir, namePrefix + ".haplotypecaller.vcf.gz")
def addJobs() {
val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(qscript)
hc.input_file = inputBams
hc.out = outputFile
add(hc)
}
}
/** Default mode for UnifiedGenotyper */
class UnifiedGenotyper extends Variantcaller {
val name = "unifiedgenotyper"
protected val defaultPrio = 20
def outputFile = new File(outputDir, namePrefix + ".unifiedgenotyper.vcf.gz")
def addJobs() {
val ug = new nl.lumc.sasc.biopet.extensions.gatk.broad.UnifiedGenotyper(qscript)
ug.input_file = inputBams
ug.out = outputFile
add(ug)
}
}
/** Allele mode for Haplotypecaller */
class HaplotypeCallerAllele extends Variantcaller {
val name = "haplotypecaller_allele"
protected val defaultPrio = 5
def outputFile = new File(outputDir, namePrefix + ".haplotypecaller_allele.vcf.gz")
def addJobs() {
val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(qscript)
hc.input_file = inputBams
hc.out = outputFile
hc.alleles = config("input_alleles")
hc.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
add(hc)
}
}
/** Allele mode for GenotyperAllele */
class UnifiedGenotyperAllele extends Variantcaller {
val name = "unifiedgenotyper_allele"
protected val defaultPrio = 9
def outputFile = new File(outputDir, namePrefix + ".unifiedgenotyper_allele.vcf.gz")
def addJobs() {
val ug = new nl.lumc.sasc.biopet.extensions.gatk.broad.UnifiedGenotyper(qscript)
ug.input_file = inputBams
ug.out = outputFile
ug.alleles = config("input_alleles")
ug.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
add(ug)
}
}
/** Gvcf mode for haplotypecaller */
class HaplotypeCallerGvcf extends Variantcaller {
val name = "haplotypecaller_gvcf"
protected val defaultPrio = 5
def outputFile = new File(outputDir, namePrefix + ".haplotypecaller_gvcf.vcf.gz")
def addJobs() {
val gvcfFiles = for (inputBam <- inputBams) yield {
val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(qscript)
hc.input_file = List(inputBam)
hc.out = new File(outputDir, inputBam.getName.stripSuffix(".bam") + ".gvcf.vcf.gz")
hc.useGvcf()
add(hc)
hc.out
}
val genotypeGVCFs = GenotypeGVCFs(qscript, gvcfFiles, outputFile)
add(genotypeGVCFs)
}
}
}
/** object to add default main method to pipeline */
......
package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Default mode for the haplotypecaller */
class HaplotypeCaller(val root: Configurable) extends Variantcaller {
val name = "haplotypecaller"
protected def defaultPrio = 1
def biopetScript() {
val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(this)
hc.input_file = inputBams.values.toList
hc.out = outputFile
add(hc)
}
}
package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Allele mode for Haplotypecaller */
class HaplotypeCallerAllele(val root: Configurable) extends Variantcaller {
val name = "haplotypecaller_allele"
protected def defaultPrio = 5
def biopetScript() {
val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(this)
hc.input_file = inputBams.values.toList
hc.out = outputFile
hc.alleles = config("input_alleles")
hc.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
add(hc)
}
}
package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers
import nl.lumc.sasc.biopet.extensions.gatk.broad.GenotypeGVCFs
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Gvcf mode for haplotypecaller */
class HaplotypeCallerGvcf(val root: Configurable) extends Variantcaller {
val name = "haplotypecaller_gvcf"
protected def defaultPrio = 5
def biopetScript() {
val gvcfFiles = for ((sample, inputBam) <- inputBams) yield {
val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(this)
hc.input_file = List(inputBam)
hc.out = new File(outputDir, sample + ".gvcf.vcf.gz")
hc.useGvcf()
add(hc)
hc.out
}
val genotypeGVCFs = GenotypeGVCFs(this, gvcfFiles.toList, outputFile)
add(genotypeGVCFs)
}
}
package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Default mode for UnifiedGenotyper */
class UnifiedGenotyper(val root: Configurable) extends Variantcaller {
val name = "unifiedgenotyper"
protected def defaultPrio = 20
def biopetScript() {
val ug = new nl.lumc.sasc.biopet.extensions.gatk.broad.UnifiedGenotyper(this)
ug.input_file = inputBams.values.toList
ug.out = outputFile
add(ug)
}
}
package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Allele mode for GenotyperAllele */
class UnifiedGenotyperAllele(val root: Configurable) extends Variantcaller {
val name = "unifiedgenotyper_allele"
protected def defaultPrio = 9
def biopetScript() {
val ug = new nl.lumc.sasc.biopet.extensions.gatk.broad.UnifiedGenotyper(this)
ug.input_file = inputBams.values.toList
ug.out = outputFile
ug.alleles = config("input_alleles")
ug.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
add(ug)
}
}
......@@ -76,7 +76,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
val map = Map("variantcallers" -> callers.toList)
val pipeline = initPipeline(map)
pipeline.inputBams = (for (n <- 1 to bams) yield ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toList
pipeline.inputBams = (for (n <- 1 to bams) yield n.toString -> ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toMap
val illegalArgumentException = pipeline.inputBams.isEmpty ||
(!raw && !bcftools &&
......
......@@ -27,9 +27,10 @@ import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins }
import nl.lumc.sasc.biopet.pipelines.shiva.{ Shiva, ShivaTrait }
import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.queue.QScript
trait BastyTrait extends MultiSampleQScript {
qscript =>
qscript: QScript =>
case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
......
......@@ -17,17 +17,18 @@ package nl.lumc.sasc.biopet.core
import java.io.File
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.queue.QSettings
import org.broadinstitute.gatk.queue.{ QScript, QSettings }
import org.broadinstitute.gatk.queue.function.QFunction
import org.broadinstitute.gatk.queue.function.scattergather.ScatterGatherableFunction
import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging }
import org.broadinstitute.gatk.utils.commandline.Argument
/** Base for biopet pipeline */
trait BiopetQScript extends Configurable with GatkLogging {
trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
@Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false)
val configfiles: List[File] = Nil
......@@ -125,6 +126,25 @@ trait BiopetQScript extends Configurable with GatkLogging {
function.isIntermediate = isIntermediate
add(function)
}
def add(subPipeline: QScript): Unit = {
subPipeline.qSettings = this.qSettings
subPipeline match {
case that: SummaryQScript =>
that.init()
that.biopetScript()
that.addSummaryJobs()
this match {
case s: SummaryQScript => s.addSummaryQScript(that)
case _ =>
}
case that:BiopetQScript =>
that.init()
that.biopetScript()
case _ => subPipeline.script
}
addAll(subPipeline.functions)
}
}
object BiopetQScript {
......
......@@ -19,11 +19,11 @@ import java.io.File
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils }
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.Argument
/** This trait creates a structured way of use multisample pipelines */
trait MultiSampleQScript extends SummaryQScript {
qscript =>
trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
@Argument(doc = "Only Sample", shortName = "s", required = false, fullName = "sample")
private[core] val onlySamples: List[String] = Nil
......
......@@ -19,6 +19,7 @@ import java.io.File
import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.core.extensions.{ CheckChecksum, Md5sum }
import org.broadinstitute.gatk.queue.QScript
import scala.collection.mutable
......@@ -27,7 +28,7 @@ import scala.collection.mutable
*
* Created by pjvan_thof on 2/14/15.
*/
trait SummaryQScript extends BiopetQScript { qscript =>
trait SummaryQScript extends BiopetQScript { qscript: QScript =>
/** Key is sample/library, None is sample or library is not applicable */
private[summary] var summarizables: Map[(String, Option[String], Option[String]), List[Summarizable]] = Map()
......
......@@ -5,6 +5,8 @@ import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import scala.language.reflectiveCalls
/**
* Created by pjvanthof on 17/11/15.
*/
......
......@@ -34,6 +34,8 @@ class Delly(val root: Configurable) extends QScript with BiopetQScript with Refe
@Output(doc = "Delly result VCF")
var outputVcf: File = _
this.inputFiles
var outputName: String = _
// select the analysis types DEL,DUP,INV,TRA
......
......@@ -15,6 +15,7 @@
*/
package nl.lumc.sasc.biopet
import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling
import nl.lumc.sasc.biopet.utils.{ BiopetExecutable, MainCommand }
object BiopetExecutablePublic extends BiopetExecutable {
......@@ -33,7 +34,7 @@ object BiopetExecutablePublic extends BiopetExecutable {
def pipelines: List[MainCommand] = List(
nl.lumc.sasc.biopet.pipelines.shiva.Shiva,
nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling,
ShivaVariantcalling,
nl.lumc.sasc.biopet.pipelines.basty.Basty
) ::: publicPipelines
......
package nl.lumc.sasc.biopet.utils
import java.io.File
import htsjdk.samtools.SamReaderFactory
import scala.collection.JavaConversions._
/**
* Created by pjvan_thof on 11/19/15.
*/
object BamUtils {
/**
* This method will convert a list of bam files to a Map[<sampleName>, <bamFile>]
*
* Each sample may only be once in the list
*
* @throws IllegalArgumentException
* @param bamFiles input bam files
* @return
*/
def sampleBamMap(bamFiles: List[File]): Map[String, File] = {
val temp = bamFiles.map { file =>
val inputSam = SamReaderFactory.makeDefault.open(file)
val samples = inputSam.getFileHeader.getReadGroups.map(_.getSample).distinct
if (samples.size == 1) samples.head -> file
else throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file)
}
if (temp.map(_._1).distinct.size != temp.size) throw new IllegalArgumentException("Samples has been found twice")
temp.toMap
}
}
......@@ -15,18 +15,15 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva
import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.breakdancer.Breakdancer
import nl.lumc.sasc.biopet.extensions.clever.CleverCaller
import nl.lumc.sasc.biopet.extensions.delly.Delly
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import scala.collection.JavaConversions._
/**
* Common trait for ShivaVariantcalling
*
......@@ -40,25 +37,11 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
@Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
protected var inputBamsArg: List[File] = Nil
protected var inputBams: Map[String, File] = Map()
def addBamFile(file: File, sampleId: Option[String] = None): Unit = {
sampleId match {
case Some(sample) => inputBams += sample -> file
case _ if !file.exists() => throw new IllegalArgumentException("Bam file does not exits: " + file)
case _ => {
val inputSam = SamReaderFactory.makeDefault.open(file)
val samples = inputSam.getFileHeader.getReadGroups.map(_.getSample).distinct
if (samples.size == 1) {
inputBams += samples.head -> file
} else throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file)
}
}
}
var inputBams: Map[String, File] = Map()
/** Executed before script */
def init(): Unit = {
inputBamsArg.foreach(addBamFile(_))
if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
}
/** Variantcallers requested by the config */
......
......@@ -15,8 +15,6 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva
import java.io.File
import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference }
import nl.lumc.sasc.biopet.extensions.Ln
......@@ -25,6 +23,7 @@ import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.queue.QScript
import scala.collection.JavaConversions._
......@@ -33,8 +32,7 @@ import scala.collection.JavaConversions._
*
* Created by pjvan_thof on 2/26/15.
*/
trait ShivaTrait extends MultiSampleQScript with Reference {
qscript =>
trait ShivaTrait extends MultiSampleQScript with Reference { qscript: QScript =>
/** Executed before running the script */
def init(): Unit = {
......@@ -233,8 +231,8 @@ trait ShivaTrait extends MultiSampleQScript with Reference {
vc.sampleId = Some(sampleId)
vc.libId = Some(libId)
vc.outputDir = new File(libDir, "variantcalling")
if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil
else vc.inputBams = bamFile.get :: Nil
if (preProcessBam.isDefined) vc.inputBams = Map(sampleId -> preProcessBam.get)
else vc.inputBams = Map(sampleId -> bamFile.get)
vc.init()
vc.biopetScript()
addAll(vc.functions)
......@@ -299,7 +297,7 @@ trait ShivaTrait extends MultiSampleQScript with Reference {
variantcalling.foreach(vc => {
vc.sampleId = Some(sampleId)
vc.outputDir = new File(sampleDir, "variantcalling")
vc.inputBams = bam :: Nil
vc.inputBams = Map(sampleId -> bam)
vc.init()
vc.biopetScript()
addAll(vc.functions)
......@@ -326,7 +324,7 @@ trait ShivaTrait extends MultiSampleQScript with Reference {
def addMultiSampleJobs(): Unit = {
multisampleVariantCalling.foreach(vc => {
vc.outputDir = new File(outputDir, "variantcalling")
vc.inputBams = samples.flatMap(_._2.preProcessBam).toList
vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
vc.init()
vc.biopetScript()
addAll(vc.functions)
......@@ -344,7 +342,7 @@ trait ShivaTrait extends MultiSampleQScript with Reference {
svCalling.foreach(sv => {
sv.outputDir = new File(outputDir, "sv_calling")
samples.foreach(x => x._2.preProcessBam.foreach(bam => sv.addBamFile(bam, Some(x._1))))
sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
sv.init()
sv.biopetScript()
addAll(sv.functions)
......
......@@ -19,12 +19,11 @@ import java.io.File
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge }
import nl.lumc.sasc.biopet.extensions.gatk.{ GenotypeConcordance, CombineVariants }
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup
import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats }
import nl.lumc.sasc.biopet.extensions.{ Ln, Bgzip, Tabix }
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance }
import nl.lumc.sasc.biopet.extensions.tools.VcfStats
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers._
import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.Input
/**
......@@ -33,10 +32,17 @@ import org.broadinstitute.gatk.utils.commandline.Input
* Created by pjvan_thof on 2/26/15.
*/
trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with Reference {
qscript =>
qscript: QScript =>
@Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
var inputBams: List[File] = Nil
protected var inputBamsArg: List[File] = Nil
var inputBams: Map[String, File] = Map()
/** Executed before script */
def init(): Unit = {
if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
}
var referenceVcf: Option[File] = config("reference_vcf")
......@@ -53,10 +59,6 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with
override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ")))
/** Executed before script */
def init(): Unit = {
}
/** Final merged output files of all variantcaller modes */
def finalFile = new File(outputDir, namePrefix + ".final.vcf.gz")
......@@ -81,7 +83,8 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with
cv.genotypeMergeOptions = Some("PRIORITIZE")
cv.rodPriorityList = callers.map(_.name).mkString(",")