Commit fe9f50f7 authored by Sander Bollen's avatar Sander Bollen
Browse files

Merge branch 'fix-Shiva_bcftools_calling' into 'develop'

Fix shiva bcftools calling

- [x] Test bcftools
- [x] Add merge to bcftools singlesample
- [x] Test bcftools singlesample


See merge request !254
parents 52a67c4b d4afc77f
......@@ -10,7 +10,7 @@ import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.GenotypeGVCFs with GatkGeneral {
annotation ++= config("annotation", default = Seq("FisherStrand", "QualByDepth", "ChromosomeCounts")).asStringList
annotation ++= config("annotation", default = Seq(), freeVar = false).asStringList
if (config.contains("dbsnp")) dbsnp = config("dbsnp")
if (config.contains("scattercount", "genotypegvcfs")) scatterCount = config("scattercount")
......
......@@ -44,12 +44,13 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
bams <- 0 to 2;
raw <- bool;
bcftools <- bool;
bcftools_singlesample <- bool;
haplotypeCallerGvcf <- bool;
haplotypeCallerAllele <- bool;
unifiedGenotyperAllele <- bool;
unifiedGenotyper <- bool;
haplotypeCaller <- bool
) yield Array[Any](bams, raw, bcftools, unifiedGenotyper, haplotypeCaller, haplotypeCallerGvcf, haplotypeCallerAllele, unifiedGenotyperAllele)
) yield Array[Any](bams, raw, bcftools, bcftools_singlesample, unifiedGenotyper, haplotypeCaller, haplotypeCallerGvcf, haplotypeCallerAllele, unifiedGenotyperAllele)
).toArray
}
......@@ -57,6 +58,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
def testShivaVariantcalling(bams: Int,
raw: Boolean,
bcftools: Boolean,
bcftools_singlesample: Boolean,
unifiedGenotyper: Boolean,
haplotypeCaller: Boolean,
haplotypeCallerGvcf: Boolean,
......@@ -65,6 +67,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
val callers: ListBuffer[String] = ListBuffer()
if (raw) callers.append("raw")
if (bcftools) callers.append("bcftools")
if (bcftools_singlesample) callers.append("bcftools_singlesample")
if (unifiedGenotyper) callers.append("unifiedgenotyper")
if (haplotypeCallerGvcf) callers.append("haplotypecaller_gvcf")
if (haplotypeCallerAllele) callers.append("haplotypecaller_allele")
......@@ -78,7 +81,8 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
val illegalArgumentException = pipeline.inputBams.isEmpty ||
(!raw && !bcftools &&
!haplotypeCaller && !unifiedGenotyper &&
!haplotypeCallerGvcf && !haplotypeCallerAllele && !unifiedGenotyperAllele)
!haplotypeCallerGvcf && !haplotypeCallerAllele && !unifiedGenotyperAllele &&
!bcftools_singlesample)
if (illegalArgumentException) intercept[IllegalArgumentException] {
pipeline.script()
......@@ -90,7 +94,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe 1 + (if (raw) 1 else 0)
//pipeline.functions.count(_.isInstanceOf[Bcftools]) shouldBe (if (bcftools) 1 else 0)
//FIXME: Can not check for bcftools because of piping
pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0)
//pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0)
pipeline.functions.count(_.isInstanceOf[VcfFilter]) shouldBe (if (raw) bams else 0)
pipeline.functions.count(_.isInstanceOf[HaplotypeCaller]) shouldBe (if (haplotypeCaller) 1 else 0) +
(if (haplotypeCallerAllele) 1 else 0) + (if (haplotypeCallerGvcf) bams else 0)
......
......@@ -28,7 +28,7 @@ trait CommandLineResources extends CommandLineFunction with Configurable {
var residentFactor: Double = config("resident_factor", default = 1.2)
private var _coreMemory: Double = 2.0
def coreMemeory = _coreMemory
def coreMemory = _coreMemory
var retry = 0
......@@ -91,7 +91,7 @@ trait CommandLineResources extends CommandLineFunction with Configurable {
commands.foreach(_.setResources())
nCoresRequest = Some(commands.map(_.threads).sum + threadsCorrection)
_coreMemory = commands.map(cmd => cmd.coreMemeory * (cmd.threads.toDouble / threads.toDouble)).sum
_coreMemory = commands.map(cmd => cmd.coreMemory * (cmd.threads.toDouble / threads.toDouble)).sum
memoryLimit = Some(_coreMemory * threads)
residentLimit = Some((_coreMemory + (0.5 * retry)) * residentFactor)
vmem = Some((_coreMemory * (vmemFactor + (0.5 * retry))) + "G")
......
......@@ -35,10 +35,9 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi
@Output(doc = "Output (for region query)", required = false)
var outputQuery: File = null
@Output(doc = "Output (for indexing)", required = false) // NOTE: it's a def since we can't change the index name ~ it's always input_name + .tbi
lazy val outputIndex: File = {
require(input != null, "Input must be defined")
new File(input.toString + ".tbi")
def outputIndex: File = {
require(input != null, "Input should be defined")
new File(input.getAbsolutePath + ".tbi")
}
@Argument(doc = "Regions to query", required = false)
......@@ -70,7 +69,8 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi
p match {
case Some(fmt) =>
require(validFormats.contains(fmt), "-p flag must be one of " + validFormats.mkString(", "))
case None => ;
outputFiles :+= outputIndex
case None =>
}
}
......@@ -96,3 +96,19 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi
else baseCommand
}
}
object Tabix {
def apply(root: Configurable, input: File) = {
val tabix = new Tabix(root)
tabix.input = input
tabix.p = tabix.input.getName match {
case s if s.endsWith(".vcf.gz") => Some("vcf")
case s if s.endsWith(".bed.gz") => Some("bed")
case s if s.endsWith(".sam.gz") => Some("sam")
case s if s.endsWith(".gff.gz") => Some("gff")
case s if s.endsWith(".psltbl.gz") => Some("psltbl")
case _ => throw new IllegalArgumentException("Unknown file type")
}
tabix
}
}
......@@ -29,6 +29,8 @@ class WigToBigWig(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Input wig file")
var inputWigFile: File = _
override def defaultCoreMemory = 3.0
@Input(doc = "Input chrom sizes file", required = true)
var inputChromSizesFile: File = _
......
......@@ -22,28 +22,66 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** This extension is based on bcftools 1.1-134 */
class BcftoolsCall(val root: Configurable) extends Bcftools {
@Input(doc = "Input File")
@Input(doc = "Input File", required = false)
var input: File = _
@Output(doc = "output File")
@Output(doc = "output File", required = false)
var output: File = _
var O: String = null
var v: Boolean = config("v", default = true)
var O: Option[String] = None
var v: Boolean = config("v", default = false)
var c: Boolean = config("c", default = false)
var m: Boolean = config("m", default = false)
var r: Option[String] = config("r")
@Input(required = false)
var R: Option[File] = config("R")
var s: Option[String] = config("s")
@Input(required = false)
var S: Option[File] = config("S")
var t: Option[String] = config("t")
@Input(required = false)
var T: Option[File] = config("T")
var A: Boolean = config("A", default = false)
var f: List[String] = config("f", default = Nil)
var g: Option[Int] = config("g")
var i: Boolean = config("i", default = false)
var M: Boolean = config("M", default = false)
var V: Option[String] = config("V")
var C: Option[String] = config("C")
var n: Option[Float] = config("n")
var p: Option[Float] = config("p")
var P: Option[Float] = config("P")
var X: Boolean = config("X", default = false)
var Y: Boolean = config("Y", default = false)
override def beforeGraph(): Unit = {
require(c != m)
}
def cmdBase = required(executable) +
def cmdLine = required(executable) +
required("call") +
optional("-O", O) +
conditional(v, "-v") +
conditional(c, "-c") +
conditional(m, "-m")
def cmdPipeInput = cmdBase + "-"
def cmdPipe = cmdBase + input
def cmdLine = cmdPipe + " > " + required(output)
conditional(m, "-m") +
optional("-r", r) +
optional("-R", R) +
optional("-s", s) +
optional("-S", S) +
optional("-t", t) +
optional("-T", T) +
conditional(A, "-A") +
repeat("-f", f) +
optional("-g", g) +
conditional(i, "-i") +
conditional(M, "-M") +
optional("-V", V) +
optional("-C", C) +
optional("-n", n) +
optional("-p", p) +
optional("-P", P) +
conditional(X, "-X") +
conditional(Y, "-Y") +
(if (outputAsStsout) "" else required("-o", output)) +
(if (inputAsStdin) "-" else required(input))
}
package nl.lumc.sasc.biopet.extensions.bcftools
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
/**
* Created by sajvanderzeeuw on 16-10-15.
*/
class BcftoolsMerge(val root: Configurable) extends Bcftools {
@Input(doc = "Input File", required = true)
var input: List[File] = Nil
@Output(doc = "output File", required = false)
var output: File = _
@Input(required = false)
var R: Option[File] = config("R")
@Input(required = false)
var useheader: Option[File] = config("useheader")
@Input(required = false)
var l: Option[File] = config("l")
var forcesamples: Boolean = config("forcesamples", default = false)
var printheader: Boolean = config("printheader", default = false)
var f: List[String] = config("f", default = Nil)
var i: List[String] = config("i", default = Nil)
var m: Option[String] = config("m")
var O: Option[String] = config("O")
var r: List[String] = config("r", default = Nil)
def cmdLine = required(executable) +
required("merge") +
(if (outputAsStsout) "" else required("-o", output)) +
conditional(forcesamples, "--force-samples") +
conditional(printheader, "--print-header") +
optional("--use-header", useheader) +
optional("-f", f) +
optional("-i", i) +
optional("-l", l) +
optional("-m", m) +
optional("-O", O) +
optional("-r", r) +
optional("-R", R) +
repeat(input)
}
......@@ -53,6 +53,8 @@ class CombineVariants(val root: Configurable) extends Gatk {
case Some("UNIQUIFY") | Some("PRIORITIZE") | Some("UNSORTED") | Some("REQUIRE_UNIQUE") | None =>
case _ => throw new IllegalArgumentException("Wrong option for genotypeMergeOptions")
}
deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi"))
deps = deps.distinct
}
override def cmdLine = super.cmdLine +
......
......@@ -47,7 +47,7 @@ class SamtoolsMpileup(val root: Configurable) extends Samtools with Reference {
reference = referenceFasta()
}
def cmdBase = required(executable) +
def cmdLine = required(executable) +
required("mpileup") +
optional("-f", reference) +
optional("-l", intervalBed) +
......@@ -56,12 +56,9 @@ class SamtoolsMpileup(val root: Configurable) extends Samtools with Reference {
optional("-d", depth) +
conditional(outputMappingQuality, "-s") +
conditional(disableBaq, "-B") +
conditional(u, "-u")
def cmdPipeInput = cmdBase + "-"
def cmdPipe = cmdBase + repeat(input)
/** Returns command to execute */
def cmdLine = cmdPipe + " > " + required(output)
conditional(u, "-u") +
(if (outputAsStsout) "" else required("-o", output)) +
(if (inputAsStdin) "-" else repeat(input))
}
object SamtoolsMpileup {
......
......@@ -27,7 +27,7 @@ class SamtoolsSort(val root: Configurable) extends Samtools {
}
def cmdLine = required(executable) + required("sort") +
optional("-m", (coreMemeory + "G")) +
optional("-m", (coreMemory + "G")) +
optional("-@", threads) +
optional("-O", outputFormat) +
required("-T", prefix) +
......
......@@ -47,12 +47,9 @@ class MpileupToVcf(val root: Configurable) extends ToolCommandFunction with Refe
override def defaultCoreMemory = 3.0
override def defaults = ConfigUtils.mergeMaps(Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1)),
super.defaults)
override def beforeGraph() {
super.beforeGraph()
reference = referenceFasta().getAbsolutePath
if (reference == null) reference = referenceFasta().getAbsolutePath
val samtoolsMpileup = new SamtoolsMpileup(this)
}
......@@ -66,20 +63,12 @@ class MpileupToVcf(val root: Configurable) extends ToolCommandFunction with Refe
}
}
override def cmdLine = {
(if (inputMpileup == null) {
val samtoolsMpileup = new SamtoolsMpileup(this)
samtoolsMpileup.reference = referenceFasta()
samtoolsMpileup.input = List(inputBam)
samtoolsMpileup.cmdPipe + " | "
} else "") +
super.cmdLine +
required("-o", output) +
optional("--minDP", minDP) +
optional("--minAP", minAP) +
optional("--homoFraction", homoFraction) +
optional("--ploidy", ploidy) +
required("--sample", sample) +
(if (inputBam == null) required("-I", inputMpileup) else "")
}
override def cmdLine = super.cmdLine +
required("-o", output) +
optional("--minDP", minDP) +
optional("--minAP", minAP) +
optional("--homoFraction", homoFraction) +
optional("--ploidy", ploidy) +
required("--sample", sample) +
(if (inputAsStdin) "" else required("-I", inputMpileup))
}
......@@ -48,6 +48,7 @@ class CustomVarScan(val root: Configurable) extends BiopetCommandLineFunction wi
disableBaq = true
depth = Option(1000000)
outputMappingQuality = true
}
private def fixMpileup = new PythonCommandLineFunction {
......@@ -96,7 +97,6 @@ class CustomVarScan(val root: Configurable) extends BiopetCommandLineFunction wi
def cmdLine: String = {
// FIXME: manual trigger of commandLine for version retrieval
mpileup.commandLine
mpileup.cmdPipe + " | " + fixMpileup.commandLine + " | " + removeEmptyPile().commandLine + " | " +
varscan.commandLine + " && " + compress.commandLine + " && " + index.commandLine
(mpileup | fixMpileup | removeEmptyPile() | varscan).commandLine + " && " + compress.commandLine + " && " + index.commandLine
}
}
......@@ -144,8 +144,8 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference {
config("bam_to_fastq", default = false).asBoolean match {
case true =>
val samToFastq = SamToFastq(qscript, config("bam"),
new File(libDir, sampleId + "-" + libId + ".R1.fastq"),
new File(libDir, sampleId + "-" + libId + ".R2.fastq"))
new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"),
new File(libDir, sampleId + "-" + libId + ".R2.fq.gz"))
samToFastq.isIntermediate = true
qscript.add(samToFastq)
mapping.foreach(mapping => {
......@@ -198,8 +198,8 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference {
})
variantcalling.foreach(vc => {
vc.sampleId = Some(libId)
vc.libId = Some(sampleId)
vc.sampleId = Some(sampleId)
vc.libId = Some(libId)
vc.outputDir = new File(libDir, "variantcalling")
if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil
else vc.inputBams = bamFile.get :: Nil
......
......@@ -19,14 +19,13 @@ import java.io.File
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsCall
import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge }
import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup
import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats }
import nl.lumc.sasc.biopet.extensions.{ Bgzip, Tabix }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
import org.broadinstitute.gatk.queue.function.CommandLineFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.utils.commandline.Input
/**
* Common trait for ShivaVariantcalling
......@@ -48,6 +47,8 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with
}
}
override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ")))
/** Executed before script */
def init(): Unit = {
}
......@@ -98,7 +99,7 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with
}
/** Will generate all available variantcallers */
protected def callersList: List[Variantcaller] = List(new Freebayes, new RawVcf, new Bcftools)
protected def callersList: List[Variantcaller] = List(new Freebayes, new RawVcf, new Bcftools, new BcftoolsSingleSample)
/** General trait for a variantcaller mode */
trait Variantcaller {
......@@ -161,22 +162,50 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with
val mp = new SamtoolsMpileup(qscript)
mp.input = inputBams
mp.u = true
mp.reference = referenceFasta()
val bt = new BcftoolsCall(qscript)
bt.O = "z"
bt.O = Some("z")
bt.v = true
bt.c = true
//TODO: add proper class with piping support, see also issue #114
add(new CommandLineFunction {
@Input
var input = inputBams
add(mp | bt > outputFile)
add(Tabix(qscript, outputFile))
}
}
@Output
var output = outputFile
/** default mode of bcftools */
class BcftoolsSingleSample extends Variantcaller {
val name = "bcftools_singlesample"
protected val defaultPrio = 8
def commandLine: String = mp.cmdPipe + " | " + bt.cmdPipeInput + " > " + outputFile + " && tabix -p vcf " + outputFile
})
/** Final output file of this mode */
def outputFile = new File(outputDir, namePrefix + ".bcftools_singlesample.vcf.gz")
def addJobs() {
val sampleVcfs = for (inputBam <- inputBams) yield {
val mp = new SamtoolsMpileup(qscript)
mp.input :+= inputBam
mp.u = true
mp.reference = referenceFasta()
val bt = new BcftoolsCall(qscript)
bt.O = Some("z")
bt.v = true
bt.c = true
bt.output = new File(outputDir, inputBam.getName + ".vcf.gz")
add(mp | bt)
add(Tabix(qscript, bt.output))
bt.output
}
val bcfmerge = new BcftoolsMerge(qscript)
bcfmerge.input = sampleVcfs
bcfmerge.output = outputFile
bcfmerge.O = Some("z")
add(bcfmerge)
add(Tabix(qscript, outputFile))
}
}
......@@ -192,10 +221,16 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with
def addJobs() {
val rawFiles = inputBams.map(bamFile => {
val mp = new SamtoolsMpileup(qscript) {
override def configName = "samtoolsmpileup"
override def defaults = Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1))
}
mp.input :+= bamFile
val m2v = new MpileupToVcf(qscript)
m2v.inputBam = bamFile
m2v.output = new File(outputDir, bamFile.getName.stripSuffix(".bam") + ".raw.vcf")
add(m2v)
add(mp | m2v)
val vcfFilter = new VcfFilter(qscript) {
override def configName = "vcffilter"
......
......@@ -49,21 +49,32 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
@DataProvider(name = "shivaVariantcallingOptions")
def shivaVariantcallingOptions = {
val bool = Array(true, false)
(for (bams <- 0 to 3; raw <- bool; bcftools <- bool; freebayes <- bool) yield Array(bams, raw, bcftools, freebayes)).toArray
(for (
bams <- 0 to 3;
raw <- bool;
bcftools <- bool;
bcftools_singlesample <- bool;
freebayes <- bool
) yield Array(bams, raw, bcftools, bcftools_singlesample, freebayes)).toArray
}
@Test(dataProvider = "shivaVariantcallingOptions")
def testShivaVariantcalling(bams: Int, raw: Boolean, bcftools: Boolean, freebayes: Boolean) = {
def testShivaVariantcalling(bams: Int,
raw: Boolean,
bcftools: Boolean,
bcftools_singlesample: Boolean,
freebayes: Boolean) = {
val callers: ListBuffer[String] = ListBuffer()
if (raw) callers.append("raw")
if (bcftools) callers.append("bcftools")
if (bcftools_singlesample) callers.append("bcftools_singlesample")
if (freebayes) callers.append("freebayes")
val map = Map("variantcallers" -> callers.toList)
val pipeline = initPipeline(map)
pipeline.inputBams = (for (n <- 1 to bams) yield ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toList
val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !freebayes)
val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !bcftools_singlesample && !freebayes)
if (illegalArgumentException) intercept[IllegalArgumentException] {
pipeline.script()
......@@ -76,7 +87,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
//pipeline.functions.count(_.isInstanceOf[Bcftools]) shouldBe (if (bcftools) 1 else 0)
//FIXME: Can not check for bcftools because of piping
pipeline.functions.count(_.isInstanceOf[Freebayes]) shouldBe (if (freebayes) 1 else 0)
pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0)
//pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0)
pipeline.functions.count(_.isInstanceOf[VcfFilter]) shouldBe (if (raw) bams else 0)
}
}
......
Markdown is supported
0% or .