Commit 6a965b1c authored by bow's avatar bow
Browse files

Merge branch 'feature-unique_sample_filter' into 'develop'

Added more filter options

fixes #316 

See merge request !410
parents ece09a76 02936179
...@@ -32,6 +32,11 @@ object VcfFilter extends ToolCommand { ...@@ -32,6 +32,11 @@ object VcfFilter extends ToolCommand {
} }
} }
case class BooleanArgs(uniqueOnly: Boolean = false,
sharedOnly: Boolean = false,
filterRefCalls: Boolean = false,
filterNoCalls: Boolean = false)
case class Args(inputVcf: File = null, case class Args(inputVcf: File = null,
outputVcf: File = null, outputVcf: File = null,
invertedOutputVcf: Option[File] = None, invertedOutputVcf: Option[File] = None,
...@@ -43,17 +48,17 @@ object VcfFilter extends ToolCommand { ...@@ -43,17 +48,17 @@ object VcfFilter extends ToolCommand {
mustHaveVariant: List[String] = Nil, mustHaveVariant: List[String] = Nil,
calledIn: List[String] = Nil, calledIn: List[String] = Nil,
mustHaveGenotype: List[(String, GenotypeType)] = Nil, mustHaveGenotype: List[(String, GenotypeType)] = Nil,
deNovoInSample: String = null, uniqueVariantInSample: String = null,
resToDom: List[Trio] = Nil, resToDom: List[Trio] = Nil,
trioCompound: List[Trio] = Nil, trioCompound: List[Trio] = Nil,
deNovoTrio: List[Trio] = Nil, deNovoTrio: List[Trio] = Nil,
trioLossOfHet: List[Trio] = Nil, trioLossOfHet: List[Trio] = Nil,
booleanArgs: BooleanArgs = BooleanArgs(),
diffGenotype: List[(String, String)] = Nil, diffGenotype: List[(String, String)] = Nil,
filterHetVarToHomVar: List[(String, String)] = Nil, filterHetVarToHomVar: List[(String, String)] = Nil,
filterRefCalls: Boolean = false,
filterNoCalls: Boolean = false,
iDset: Set[String] = Set(), iDset: Set[String] = Set(),
minGenomeQuality: Int = 0) extends AbstractArgs minGenomeQuality: Int = 0) extends AbstractArgs {
}
class OptParser extends AbstractOptParser { class OptParser extends AbstractOptParser {
opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) => opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
...@@ -84,7 +89,7 @@ object VcfFilter extends ToolCommand { ...@@ -84,7 +89,7 @@ object VcfFilter extends ToolCommand {
c.copy(trioCompound = new Trio(x) :: c.trioCompound) c.copy(trioCompound = new Trio(x) :: c.trioCompound)
} text "Only shows variants where child is a compound variant combined from both parants" } text "Only shows variants where child is a compound variant combined from both parants"
opt[String]("deNovoInSample") maxOccurs 1 unbounded () valueName "<sample>" action { (x, c) => opt[String]("deNovoInSample") maxOccurs 1 unbounded () valueName "<sample>" action { (x, c) =>
c.copy(deNovoInSample = x) c.copy(uniqueVariantInSample = x)
} text "Only show variants that contain unique alleles in complete set for given sample" } text "Only show variants that contain unique alleles in complete set for given sample"
opt[String]("deNovoTrio") unbounded () valueName "<child:father:mother>" action { (x, c) => opt[String]("deNovoTrio") unbounded () valueName "<child:father:mother>" action { (x, c) =>
c.copy(deNovoTrio = new Trio(x) :: c.deNovoTrio) c.copy(deNovoTrio = new Trio(x) :: c.deNovoTrio)
...@@ -113,11 +118,17 @@ object VcfFilter extends ToolCommand { ...@@ -113,11 +118,17 @@ object VcfFilter extends ToolCommand {
} validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample") } validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample")
} text "If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filtered" } text "If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filtered"
opt[Unit]("filterRefCalls") unbounded () action { (x, c) => opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
c.copy(filterRefCalls = true) c.copy(booleanArgs = c.booleanArgs.copy(filterRefCalls = true))
} text "Filter when there are only ref calls" } text "Filter when there are only ref calls"
opt[Unit]("filterNoCalls") unbounded () action { (x, c) => opt[Unit]("filterNoCalls") unbounded () action { (x, c) =>
c.copy(filterNoCalls = true) c.copy(booleanArgs = c.booleanArgs.copy(filterNoCalls = true))
} text "Filter when there are only no calls" } text "Filter when there are only no calls"
opt[Unit]("uniqueOnly") unbounded () action { (x, c) =>
c.copy(booleanArgs = c.booleanArgs.copy(uniqueOnly = true))
} text "Filter when there more then 1 sample have this variant"
opt[Unit]("sharedOnly") unbounded () action { (x, c) =>
c.copy(booleanArgs = c.booleanArgs.copy(sharedOnly = true))
} text "Filter when not all samples have this variant"
opt[Double]("minQualScore") unbounded () action { (x, c) => opt[Double]("minQualScore") unbounded () action { (x, c) =>
c.copy(minQualScore = Some(x)) c.copy(minQualScore = Some(x))
} text "Min qual score" } text "Min qual score"
...@@ -136,7 +147,7 @@ object VcfFilter extends ToolCommand { ...@@ -136,7 +147,7 @@ object VcfFilter extends ToolCommand {
def main(args: Array[String]): Unit = { def main(args: Array[String]): Unit = {
logger.info("Start") logger.info("Start")
val argsParser = new OptParser val argsParser = new OptParser
val cmdArgs = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException) val cmdArgs = argsParser.parse(args, Args()).getOrElse { throw new IllegalArgumentException }
val reader = new VCFFileReader(cmdArgs.inputVcf, false) val reader = new VCFFileReader(cmdArgs.inputVcf, false)
val header = reader.getFileHeader val header = reader.getFileHeader
...@@ -158,8 +169,10 @@ object VcfFilter extends ToolCommand { ...@@ -158,8 +169,10 @@ object VcfFilter extends ToolCommand {
var counterLeft = 0 var counterLeft = 0
for (record <- reader) { for (record <- reader) {
if (cmdArgs.minQualScore.map(minQualscore(record, _)).getOrElse(true) && if (cmdArgs.minQualScore.map(minQualscore(record, _)).getOrElse(true) &&
(!cmdArgs.filterRefCalls || hasNonRefCalls(record)) && (!cmdArgs.booleanArgs.filterRefCalls || hasNonRefCalls(record)) &&
(!cmdArgs.filterNoCalls || hasCalls(record)) && (!cmdArgs.booleanArgs.filterNoCalls || hasCalls(record)) &&
(!cmdArgs.booleanArgs.uniqueOnly || hasUniqeSample(record)) &&
(!cmdArgs.booleanArgs.sharedOnly || allSamplesVariant(record)) &&
hasMinTotalDepth(record, cmdArgs.minTotalDepth) && hasMinTotalDepth(record, cmdArgs.minTotalDepth) &&
hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) && hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) &&
minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) && minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) &&
...@@ -172,7 +185,7 @@ object VcfFilter extends ToolCommand { ...@@ -172,7 +185,7 @@ object VcfFilter extends ToolCommand {
cmdArgs.filterHetVarToHomVar.isEmpty || cmdArgs.filterHetVarToHomVar.isEmpty ||
cmdArgs.filterHetVarToHomVar.forall(x => filterHetVarToHomVar(record, x._1, x._2)) cmdArgs.filterHetVarToHomVar.forall(x => filterHetVarToHomVar(record, x._1, x._2))
) && ) &&
denovoInSample(record, cmdArgs.deNovoInSample) && uniqueVariantInSample(record, cmdArgs.uniqueVariantInSample) &&
denovoTrio(record, cmdArgs.deNovoTrio) && denovoTrio(record, cmdArgs.deNovoTrio) &&
denovoTrio(record, cmdArgs.trioLossOfHet, onlyLossHet = true) && denovoTrio(record, cmdArgs.trioLossOfHet, onlyLossHet = true) &&
resToDom(record, cmdArgs.resToDom) && resToDom(record, cmdArgs.resToDom) &&
...@@ -194,6 +207,7 @@ object VcfFilter extends ToolCommand { ...@@ -194,6 +207,7 @@ object VcfFilter extends ToolCommand {
/** /**
* Checks if given samples are called * Checks if given samples are called
*
* @param record VCF record * @param record VCF record
* @param samples Samples that need this sample to be called * @param samples Samples that need this sample to be called
* @return false when filters fail * @return false when filters fail
...@@ -205,16 +219,20 @@ object VcfFilter extends ToolCommand { ...@@ -205,16 +219,20 @@ object VcfFilter extends ToolCommand {
/** /**
* Checks if given genotypes for given samples are there * Checks if given genotypes for given samples are there
*
* @param record VCF record * @param record VCF record
* @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype) * @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype)
* @return false when filter fails * @return false when filter fails
*/ */
def hasGenotype(record: VariantContext, samplesGenotypes: List[(String, GenotypeType)]): Boolean = { def hasGenotype(record: VariantContext, samplesGenotypes: List[(String, GenotypeType)]): Boolean = {
samplesGenotypes.forall(x => record.getGenotype(x._1).getType == x._2) samplesGenotypes.forall { x =>
record.getGenotype(x._1).getType == x._2
}
} }
/** /**
* Checks if record has atleast minQualScore * Checks if record has atleast minQualScore
*
* @param record VCF record * @param record VCF record
* @param minQualScore Minimal quality score * @param minQualScore Minimal quality score
* @return false when filters fail * @return false when filters fail
...@@ -233,6 +251,16 @@ object VcfFilter extends ToolCommand { ...@@ -233,6 +251,16 @@ object VcfFilter extends ToolCommand {
record.getGenotypes.exists(g => !g.isNoCall) record.getGenotypes.exists(g => !g.isNoCall)
} }
/** Checks if there is a variant in only 1 sample */
def hasUniqeSample(record: VariantContext): Boolean = {
record.getSampleNames.exists(uniqueVariantInSample(record, _))
}
/** Checks if all samples are a variant */
def allSamplesVariant(record: VariantContext): Boolean = {
record.getGenotypes.forall(g => !g.isNonInformative && g.getAlleles.exists(a => a.isNonReference && !a.isNoCall))
}
/** returns true when DP INFO field is atleast the given value */ /** returns true when DP INFO field is atleast the given value */
def hasMinTotalDepth(record: VariantContext, minTotalDepth: Int): Boolean = { def hasMinTotalDepth(record: VariantContext, minTotalDepth: Int): Boolean = {
record.getAttributeAsInt("DP", -1) >= minTotalDepth record.getAttributeAsInt("DP", -1) >= minTotalDepth
...@@ -240,6 +268,7 @@ object VcfFilter extends ToolCommand { ...@@ -240,6 +268,7 @@ object VcfFilter extends ToolCommand {
/** /**
* Checks if DP genotype field have a minimal value * Checks if DP genotype field have a minimal value
*
* @param record VCF record * @param record VCF record
* @param minSampleDepth minimal depth * @param minSampleDepth minimal depth
* @param minSamplesPass Minimal number of samples to pass filter * @param minSamplesPass Minimal number of samples to pass filter
...@@ -254,6 +283,7 @@ object VcfFilter extends ToolCommand { ...@@ -254,6 +283,7 @@ object VcfFilter extends ToolCommand {
/** /**
* Checks if non-ref AD genotype field have a minimal value * Checks if non-ref AD genotype field have a minimal value
*
* @param record VCF record * @param record VCF record
* @param minAlternateDepth minimal depth * @param minAlternateDepth minimal depth
* @param minSamplesPass Minimal number of samples to pass filter * @param minSamplesPass Minimal number of samples to pass filter
...@@ -268,6 +298,7 @@ object VcfFilter extends ToolCommand { ...@@ -268,6 +298,7 @@ object VcfFilter extends ToolCommand {
/** /**
* Checks if genome quality field has minimum value * Checks if genome quality field has minimum value
*
* @param record VCF record * @param record VCF record
* @param minGQ smallest GQ allowed * @param minGQ smallest GQ allowed
* @param minSamplesPass number of samples to consider * @param minSamplesPass number of samples to consider
...@@ -282,6 +313,7 @@ object VcfFilter extends ToolCommand { ...@@ -282,6 +313,7 @@ object VcfFilter extends ToolCommand {
/** /**
* Checks if given samples does have a variant hin this record * Checks if given samples does have a variant hin this record
*
* @param record VCF record * @param record VCF record
* @param mustHaveVariant List of samples that should have this variant * @param mustHaveVariant List of samples that should have this variant
* @return true if filter passed * @return true if filter passed
...@@ -311,11 +343,12 @@ object VcfFilter extends ToolCommand { ...@@ -311,11 +343,12 @@ object VcfFilter extends ToolCommand {
} }
/** Checks if given sample have alternative alleles that are unique in the VCF record */ /** Checks if given sample have alternative alleles that are unique in the VCF record */
def denovoInSample(record: VariantContext, sample: String): Boolean = { def uniqueVariantInSample(record: VariantContext, sample: String): Boolean = {
if (sample == null) return true if (sample == null) return true
val genotype = record.getGenotype(sample) val genotype = record.getGenotype(sample)
if (genotype.isNoCall) return false if (genotype.isNoCall) return false
for (allele <- genotype.getAlleles) { if (genotype.getAlleles.forall(_.isReference)) return false
for (allele <- genotype.getAlleles if allele.isNonReference) {
for (g <- record.getGenotypes if g.getSampleName != sample) { for (g <- record.getGenotypes if g.getSampleName != sample) {
if (g.getAlleles.exists(_.basesMatch(allele))) return false if (g.getAlleles.exists(_.basesMatch(allele))) return false
} }
......
...@@ -70,24 +70,22 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -70,24 +70,22 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers {
/** /**
* This should simply not raise an exception * This should simply not raise an exception
*/ */
val tmp = File.createTempFile("VCfFilter", ".vcf.gz") val tmp = File.createTempFile("VCfFilter", ".vcf")
tmp.deleteOnExit() tmp.deleteOnExit()
val tmpPath = tmp.getAbsolutePath val arguments: Array[String] = Array("-I", veppedPath, "-o", tmp.getAbsolutePath,
val arguments: Array[String] = Array("-I", veppedPath, "-o", tmpPath,
"--mustHaveGenotype", "Sample_101:HET") "--mustHaveGenotype", "Sample_101:HET")
main(arguments) main(arguments)
val size = new VCFFileReader(new File(tmpPath), false).size val size = new VCFFileReader(tmp, false).size
size shouldBe 1 size shouldBe 1
val tmp2 = File.createTempFile("VcfFilter", ".vcf.gz") val tmp2 = File.createTempFile("VcfFilter", ".vcf.gz")
tmp2.deleteOnExit() tmp2.deleteOnExit()
val tmpPath2 = tmp2.getAbsolutePath val arguments2: Array[String] = Array("-I", veppedPath, "-o", tmp2.getAbsolutePath,
val arguments2: Array[String] = Array("-I", veppedPath, "-o", tmpPath2,
"--mustHaveGenotype", "Sample_101:HOM_VAR") "--mustHaveGenotype", "Sample_101:HOM_VAR")
main(arguments2) main(arguments2)
val size2 = new VCFFileReader(new File(tmpPath2), false).size val size2 = new VCFFileReader(tmp2, false).size
size2 shouldBe 0 size2 shouldBe 0
} }
...@@ -209,9 +207,9 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -209,9 +207,9 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers {
val reader = new VCFFileReader(vepped, false) val reader = new VCFFileReader(vepped, false)
val record = reader.iterator().next() val record = reader.iterator().next()
denovoInSample(record, "Sample_101") shouldBe false uniqueVariantInSample(record, "Sample_101") shouldBe false
denovoInSample(record, "Sample_102") shouldBe false uniqueVariantInSample(record, "Sample_102") shouldBe false
denovoInSample(record, "Sample_103") shouldBe false uniqueVariantInSample(record, "Sample_103") shouldBe false
} }
@Test def testResToDom() = { @Test def testResToDom() = {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment