Commit 610e9ad5 authored by bow's avatar bow
Browse files

Merge branch 'feature-genotype_filter' into 'develop'

Feature genotype filter

For #92

See merge request !62
parents 54d50322 4a4fa9be
...@@ -49,9 +49,6 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri ...@@ -49,9 +49,6 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
} }
def init() { def init() {
if (config.contains("target_bed")) {
defaults ++= Map("gatk" -> Map(("intervals" -> config("target_bed").asStringList)))
}
if (config.contains("gvcfFiles")) if (config.contains("gvcfFiles"))
for (file <- config("gvcfFiles").asList) for (file <- config("gvcfFiles").asList)
gvcfFiles :+= file.toString gvcfFiles :+= file.toString
......
...@@ -35,8 +35,7 @@ class MpileupToVcf(val root: Configurable) extends BiopetJavaCommandLineFunction ...@@ -35,8 +35,7 @@ class MpileupToVcf(val root: Configurable) extends BiopetJavaCommandLineFunction
override val defaultVmem = "6G" override val defaultVmem = "6G"
memoryLimit = Option(2.0) memoryLimit = Option(2.0)
if (config.contains("target_bed")) defaults ++= Map("samtoolsmpileup" -> Map("interval_bed" -> config("target_bed").asStringList.head, defaults ++= Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1))
"disable_baq" -> true, "min_map_quality" -> 1))
override def afterGraph { override def afterGraph {
super.afterGraph super.afterGraph
......
...@@ -2,7 +2,8 @@ package nl.lumc.sasc.biopet.tools ...@@ -2,7 +2,8 @@ package nl.lumc.sasc.biopet.tools
import htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter import htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder
import htsjdk.variant.vcf.VCFFileReader import htsjdk.variant.vcf.{ VCFHeader, VCFFileReader }
import htsjdk.variant.variantcontext.VariantContext
import java.io.File import java.io.File
import java.util.ArrayList import java.util.ArrayList
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
...@@ -48,60 +49,137 @@ class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction { ...@@ -48,60 +49,137 @@ class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction {
} }
object VcfFilter extends ToolCommand { object VcfFilter extends ToolCommand {
case class Args(inputVcf: File = null, outputVcf: File = null, minSampleDepth: Int = -1, minTotalDepth: Int = -1, case class Args(inputVcf: File = null,
minAlternateDepth: Int = -1, minSamplesPass: Int = 0, minBamAlternateDepth: Int = 0, filterRefCalls: Boolean = false) extends AbstractArgs outputVcf: File = null,
minQualscore: Option[Double] = None,
minSampleDepth: Int = -1,
minTotalDepth: Int = -1,
minAlternateDepth: Int = -1,
minSamplesPass: Int = 0,
minBamAlternateDepth: Int = 0,
mustHaveVariant: List[String] = Nil,
denovoInSample: String = null,
diffGenotype: List[(String, String)] = Nil,
filterHetVarToHomVar: List[(String, String)] = Nil,
filterRefCalls: Boolean = false,
filterNoCalls: Boolean = false) extends AbstractArgs
class OptParser extends AbstractOptParser { class OptParser extends AbstractOptParser {
opt[File]('I', "inputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) => opt[File]('I', "inputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
c.copy(inputVcf = x) c.copy(inputVcf = x)
} } text ("Input vcf file")
opt[File]('o', "outputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) => opt[File]('o', "outputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
c.copy(outputVcf = x) c.copy(outputVcf = x)
} text ("output file, default to stdout") } text ("Output vcf file")
opt[Int]("minSampleDepth") unbounded () action { (x, c) => opt[Int]("minSampleDepth") unbounded () valueName ("<int>") action { (x, c) =>
c.copy(minSampleDepth = x) c.copy(minSampleDepth = x)
} } text ("Min value for DP in genotype fields")
opt[Int]("minTotalDepth") unbounded () action { (x, c) => opt[Int]("minTotalDepth") unbounded () valueName ("<int>") action { (x, c) =>
c.copy(minTotalDepth = x) c.copy(minTotalDepth = x)
} } text ("Min value of DP field in INFO fields")
opt[Int]("minAlternateDepth") unbounded () action { (x, c) => opt[Int]("minAlternateDepth") unbounded () valueName ("<int>") action { (x, c) =>
c.copy(minAlternateDepth = x) c.copy(minAlternateDepth = x)
} } text ("Min value of AD field in genotype fields")
opt[Int]("minSamplesPass") unbounded () action { (x, c) => opt[Int]("minSamplesPass") unbounded () valueName ("<int>") action { (x, c) =>
c.copy(minSamplesPass = x) c.copy(minSamplesPass = x)
} } text ("Min number opf samples to pass --minAlternateDepth, --minBamAlternateDepth and --minSampleDepth")
opt[Int]("minBamAlternateDepth") unbounded () action { (x, c) => opt[Int]("minBamAlternateDepth") unbounded () valueName ("<int>") action { (x, c) =>
c.copy(minBamAlternateDepth = x) c.copy(minBamAlternateDepth = x)
} } // TODO: Convert this to more generic filter
opt[String]("denovoInSample") maxOccurs (1) unbounded () valueName ("<sample>") action { (x, c) =>
c.copy(denovoInSample = x)
} text ("Only show variants that contain unique alleles in compete set for given sample")
opt[String]("mustHaveVariant") unbounded () valueName ("<sample>") action { (x, c) =>
c.copy(mustHaveVariant = x :: c.mustHaveVariant)
} text ("Given sample must have 1 alternative allele")
opt[String]("diffGenotype") unbounded () valueName ("<sample:sample>") action { (x, c) =>
c.copy(diffGenotype = (x.split(":")(0), x.split(":")(1)) :: c.diffGenotype)
} validate { x => if (x.split(":").length == 2) success else failure("--notSameGenotype should be in this format: sample:sample")
} text ("Given samples must have a different genotype")
opt[String]("filterHetVarToHomVar") unbounded () valueName ("<sample:sample>") action { (x, c) =>
c.copy(filterHetVarToHomVar = (x.split(":")(0), x.split(":")(1)) :: c.filterHetVarToHomVar)
} validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample")
} text ("If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filterd")
opt[Unit]("filterRefCalls") unbounded () action { (x, c) => opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
c.copy(filterRefCalls = true) c.copy(filterRefCalls = true)
} text ("Filter when there are only ref calls")
opt[Unit]("filterNoCalls") unbounded () action { (x, c) =>
c.copy(filterNoCalls = true)
} text ("Filter when there are only no calls")
opt[Double]("minQualscore") unbounded () action { (x, c) =>
c.copy(minQualscore = Some(x))
} text ("Min qual score")
} }
}
var commandArgs: Args = _
/** /**
* @param args the command line arguments * @param args the command line arguments
*/ */
def main(args: Array[String]): Unit = { def main(args: Array[String]): Unit = {
val argsParser = new OptParser val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) commandArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)
val reader = new VCFFileReader(commandArgs.inputVcf, false) val reader = new VCFFileReader(commandArgs.inputVcf, false)
val header = reader.getFileHeader val header = reader.getFileHeader
val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputVcf).build) val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputVcf).build)
writer.writeHeader(header) writer.writeHeader(header)
val bamADFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-AD-")) yield line.getID).toList
val bamDPFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-DP-")) yield line.getID).toList
for (record <- reader) { for (record <- reader) {
val genotypes = for (genotype <- record.getGenotypes) yield { if (minQualscore(record) &&
filterRefCalls(record) &&
filterNoCalls(record) &&
minTotalDepth(record) &&
minSampleDepth(record) &&
minAlternateDepth(record) &&
minBamAlternateDepth(record, header) &&
mustHaveVariant(record) &&
notSameGenotype(record) &&
filterHetVarToHomVar(record) &&
denovoInSample(record)) {
writer.add(record)
}
}
reader.close
writer.close
}
def minQualscore(record: VariantContext): Boolean = {
if (commandArgs.minQualscore.isEmpty) return true
record.getPhredScaledQual >= commandArgs.minQualscore.get
}
def filterRefCalls(record: VariantContext): Boolean = {
if (commandArgs.filterNoCalls) record.getGenotypes.exists(g => !g.isHomRef)
else true
}
def filterNoCalls(record: VariantContext): Boolean = {
if (commandArgs.filterNoCalls) record.getGenotypes.exists(g => !g.isNoCall)
else true
}
def minTotalDepth(record: VariantContext): Boolean = {
record.getAttributeAsInt("DP", -1) >= commandArgs.minTotalDepth
}
def minSampleDepth(record: VariantContext): Boolean = {
record.getGenotypes.count(genotype => {
val DP = if (genotype.hasDP) genotype.getDP else -1 val DP = if (genotype.hasDP) genotype.getDP else -1
DP >= commandArgs.minSampleDepth
}) >= commandArgs.minSamplesPass
}
def minAlternateDepth(record: VariantContext): Boolean = {
record.getGenotypes.count(genotype => {
val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil
DP >= commandArgs.minSampleDepth && if (!AD.isEmpty) AD.tail.count(_ >= commandArgs.minAlternateDepth) > 0 else true
(if (!AD.isEmpty) AD.tail.count(_ >= commandArgs.minAlternateDepth) > 0 else true) && }) >= commandArgs.minSamplesPass
!(commandArgs.filterRefCalls && genotype.isHomRef)
} }
def minBamAlternateDepth(record: VariantContext, header: VCFHeader): Boolean = {
val bamADFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-AD-")) yield line.getID).toList
val bamADvalues = (for (field <- bamADFields) yield { val bamADvalues = (for (field <- bamADFields) yield {
record.getAttribute(field, new ArrayList) match { record.getAttribute(field, new ArrayList) match {
case t: ArrayList[_] if t.length > 1 => { case t: ArrayList[_] if t.length > 1 => {
...@@ -117,12 +195,43 @@ object VcfFilter extends ToolCommand { ...@@ -117,12 +195,43 @@ object VcfFilter extends ToolCommand {
} }
}).flatten }).flatten
if (record.getAttributeAsInt("DP", -1) >= commandArgs.minTotalDepth && return commandArgs.minBamAlternateDepth <= 0 || bamADvalues.count(_ == true) >= commandArgs.minSamplesPass
genotypes.count(_ == true) >= commandArgs.minSamplesPass &&
(commandArgs.minBamAlternateDepth <= 0 || bamADvalues.count(_ == true) >= commandArgs.minSamplesPass))
writer.add(record)
} }
reader.close
writer.close def mustHaveVariant(record: VariantContext): Boolean = {
return !commandArgs.mustHaveVariant.map(record.getGenotype(_)).exists(a => a.isHomRef || a.isNoCall)
}
def notSameGenotype(record: VariantContext): Boolean = {
for ((sample1, sample2) <- commandArgs.diffGenotype) {
val genotype1 = record.getGenotype(sample1)
val genotype2 = record.getGenotype(sample2)
if (genotype1.sameGenotype(genotype2)) return false
}
return true
}
def filterHetVarToHomVar(record: VariantContext): Boolean = {
for ((sample1, sample2) <- commandArgs.filterHetVarToHomVar) {
val genotype1 = record.getGenotype(sample1)
val genotype2 = record.getGenotype(sample2)
if (genotype1.isHet && !genotype1.getAlleles.forall(_.isNonReference)) {
for (allele <- genotype1.getAlleles if allele.isNonReference) {
if (genotype2.getAlleles.forall(_.basesMatch(allele))) return false
}
}
}
return true
}
def denovoInSample(record: VariantContext): Boolean = {
if (commandArgs.denovoInSample == null) return true
val genotype = record.getGenotype(commandArgs.denovoInSample)
for (allele <- genotype.getAlleles if allele.isNonReference) {
for (g <- record.getGenotypes if g.getSampleName != commandArgs.denovoInSample) {
if (g.getAlleles.exists(_.basesMatch(allele))) return false
}
}
return true
} }
} }
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment