Commit 6a965b1c authored by bow's avatar bow
Browse files

Merge branch 'feature-unique_sample_filter' into 'develop'

Added more filter options

fixes #316 

See merge request !410
parents ece09a76 02936179
......@@ -32,6 +32,11 @@ object VcfFilter extends ToolCommand {
}
}
case class BooleanArgs(uniqueOnly: Boolean = false,
sharedOnly: Boolean = false,
filterRefCalls: Boolean = false,
filterNoCalls: Boolean = false)
case class Args(inputVcf: File = null,
outputVcf: File = null,
invertedOutputVcf: Option[File] = None,
......@@ -43,17 +48,17 @@ object VcfFilter extends ToolCommand {
mustHaveVariant: List[String] = Nil,
calledIn: List[String] = Nil,
mustHaveGenotype: List[(String, GenotypeType)] = Nil,
deNovoInSample: String = null,
uniqueVariantInSample: String = null,
resToDom: List[Trio] = Nil,
trioCompound: List[Trio] = Nil,
deNovoTrio: List[Trio] = Nil,
trioLossOfHet: List[Trio] = Nil,
booleanArgs: BooleanArgs = BooleanArgs(),
diffGenotype: List[(String, String)] = Nil,
filterHetVarToHomVar: List[(String, String)] = Nil,
filterRefCalls: Boolean = false,
filterNoCalls: Boolean = false,
iDset: Set[String] = Set(),
minGenomeQuality: Int = 0) extends AbstractArgs
minGenomeQuality: Int = 0) extends AbstractArgs {
}
class OptParser extends AbstractOptParser {
opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
......@@ -84,7 +89,7 @@ object VcfFilter extends ToolCommand {
c.copy(trioCompound = new Trio(x) :: c.trioCompound)
} text "Only shows variants where child is a compound variant combined from both parants"
opt[String]("deNovoInSample") maxOccurs 1 unbounded () valueName "<sample>" action { (x, c) =>
c.copy(deNovoInSample = x)
c.copy(uniqueVariantInSample = x)
} text "Only show variants that contain unique alleles in complete set for given sample"
opt[String]("deNovoTrio") unbounded () valueName "<child:father:mother>" action { (x, c) =>
c.copy(deNovoTrio = new Trio(x) :: c.deNovoTrio)
......@@ -113,11 +118,17 @@ object VcfFilter extends ToolCommand {
} validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample")
} text "If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filtered"
opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
c.copy(filterRefCalls = true)
c.copy(booleanArgs = c.booleanArgs.copy(filterRefCalls = true))
} text "Filter when there are only ref calls"
opt[Unit]("filterNoCalls") unbounded () action { (x, c) =>
c.copy(filterNoCalls = true)
c.copy(booleanArgs = c.booleanArgs.copy(filterNoCalls = true))
} text "Filter when there are only no calls"
opt[Unit]("uniqueOnly") unbounded () action { (x, c) =>
c.copy(booleanArgs = c.booleanArgs.copy(uniqueOnly = true))
} text "Filter when there more then 1 sample have this variant"
opt[Unit]("sharedOnly") unbounded () action { (x, c) =>
c.copy(booleanArgs = c.booleanArgs.copy(sharedOnly = true))
} text "Filter when not all samples have this variant"
opt[Double]("minQualScore") unbounded () action { (x, c) =>
c.copy(minQualScore = Some(x))
} text "Min qual score"
......@@ -136,7 +147,7 @@ object VcfFilter extends ToolCommand {
def main(args: Array[String]): Unit = {
logger.info("Start")
val argsParser = new OptParser
val cmdArgs = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
val cmdArgs = argsParser.parse(args, Args()).getOrElse { throw new IllegalArgumentException }
val reader = new VCFFileReader(cmdArgs.inputVcf, false)
val header = reader.getFileHeader
......@@ -158,8 +169,10 @@ object VcfFilter extends ToolCommand {
var counterLeft = 0
for (record <- reader) {
if (cmdArgs.minQualScore.map(minQualscore(record, _)).getOrElse(true) &&
(!cmdArgs.filterRefCalls || hasNonRefCalls(record)) &&
(!cmdArgs.filterNoCalls || hasCalls(record)) &&
(!cmdArgs.booleanArgs.filterRefCalls || hasNonRefCalls(record)) &&
(!cmdArgs.booleanArgs.filterNoCalls || hasCalls(record)) &&
(!cmdArgs.booleanArgs.uniqueOnly || hasUniqeSample(record)) &&
(!cmdArgs.booleanArgs.sharedOnly || allSamplesVariant(record)) &&
hasMinTotalDepth(record, cmdArgs.minTotalDepth) &&
hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) &&
minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) &&
......@@ -172,7 +185,7 @@ object VcfFilter extends ToolCommand {
cmdArgs.filterHetVarToHomVar.isEmpty ||
cmdArgs.filterHetVarToHomVar.forall(x => filterHetVarToHomVar(record, x._1, x._2))
) &&
denovoInSample(record, cmdArgs.deNovoInSample) &&
uniqueVariantInSample(record, cmdArgs.uniqueVariantInSample) &&
denovoTrio(record, cmdArgs.deNovoTrio) &&
denovoTrio(record, cmdArgs.trioLossOfHet, onlyLossHet = true) &&
resToDom(record, cmdArgs.resToDom) &&
......@@ -194,6 +207,7 @@ object VcfFilter extends ToolCommand {
/**
* Checks if given samples are called
*
* @param record VCF record
* @param samples Samples that need this sample to be called
* @return false when filters fail
......@@ -205,16 +219,20 @@ object VcfFilter extends ToolCommand {
/**
* Checks if given genotypes for given samples are there
*
* @param record VCF record
* @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype)
* @return false when filter fails
*/
def hasGenotype(record: VariantContext, samplesGenotypes: List[(String, GenotypeType)]): Boolean = {
samplesGenotypes.forall(x => record.getGenotype(x._1).getType == x._2)
samplesGenotypes.forall { x =>
record.getGenotype(x._1).getType == x._2
}
}
/**
* Checks if record has atleast minQualScore
*
* @param record VCF record
* @param minQualScore Minimal quality score
* @return false when filters fail
......@@ -233,6 +251,16 @@ object VcfFilter extends ToolCommand {
record.getGenotypes.exists(g => !g.isNoCall)
}
/** Checks if there is a variant in only 1 sample */
def hasUniqeSample(record: VariantContext): Boolean = {
record.getSampleNames.exists(uniqueVariantInSample(record, _))
}
/** Checks if all samples are a variant */
def allSamplesVariant(record: VariantContext): Boolean = {
record.getGenotypes.forall(g => !g.isNonInformative && g.getAlleles.exists(a => a.isNonReference && !a.isNoCall))
}
/** returns true when DP INFO field is atleast the given value */
def hasMinTotalDepth(record: VariantContext, minTotalDepth: Int): Boolean = {
record.getAttributeAsInt("DP", -1) >= minTotalDepth
......@@ -240,6 +268,7 @@ object VcfFilter extends ToolCommand {
/**
* Checks if DP genotype field have a minimal value
*
* @param record VCF record
* @param minSampleDepth minimal depth
* @param minSamplesPass Minimal number of samples to pass filter
......@@ -254,6 +283,7 @@ object VcfFilter extends ToolCommand {
/**
* Checks if non-ref AD genotype field have a minimal value
*
* @param record VCF record
* @param minAlternateDepth minimal depth
* @param minSamplesPass Minimal number of samples to pass filter
......@@ -268,6 +298,7 @@ object VcfFilter extends ToolCommand {
/**
* Checks if genome quality field has minimum value
*
* @param record VCF record
* @param minGQ smallest GQ allowed
* @param minSamplesPass number of samples to consider
......@@ -282,6 +313,7 @@ object VcfFilter extends ToolCommand {
/**
* Checks if given samples does have a variant hin this record
*
* @param record VCF record
* @param mustHaveVariant List of samples that should have this variant
* @return true if filter passed
......@@ -311,11 +343,12 @@ object VcfFilter extends ToolCommand {
}
/** Checks if given sample have alternative alleles that are unique in the VCF record */
def denovoInSample(record: VariantContext, sample: String): Boolean = {
def uniqueVariantInSample(record: VariantContext, sample: String): Boolean = {
if (sample == null) return true
val genotype = record.getGenotype(sample)
if (genotype.isNoCall) return false
for (allele <- genotype.getAlleles) {
if (genotype.getAlleles.forall(_.isReference)) return false
for (allele <- genotype.getAlleles if allele.isNonReference) {
for (g <- record.getGenotypes if g.getSampleName != sample) {
if (g.getAlleles.exists(_.basesMatch(allele))) return false
}
......
......@@ -70,24 +70,22 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers {
/**
* This should simply not raise an exception
*/
val tmp = File.createTempFile("VCfFilter", ".vcf.gz")
val tmp = File.createTempFile("VCfFilter", ".vcf")
tmp.deleteOnExit()
val tmpPath = tmp.getAbsolutePath
val arguments: Array[String] = Array("-I", veppedPath, "-o", tmpPath,
val arguments: Array[String] = Array("-I", veppedPath, "-o", tmp.getAbsolutePath,
"--mustHaveGenotype", "Sample_101:HET")
main(arguments)
val size = new VCFFileReader(new File(tmpPath), false).size
val size = new VCFFileReader(tmp, false).size
size shouldBe 1
val tmp2 = File.createTempFile("VcfFilter", ".vcf.gz")
tmp2.deleteOnExit()
val tmpPath2 = tmp2.getAbsolutePath
val arguments2: Array[String] = Array("-I", veppedPath, "-o", tmpPath2,
val arguments2: Array[String] = Array("-I", veppedPath, "-o", tmp2.getAbsolutePath,
"--mustHaveGenotype", "Sample_101:HOM_VAR")
main(arguments2)
val size2 = new VCFFileReader(new File(tmpPath2), false).size
val size2 = new VCFFileReader(tmp2, false).size
size2 shouldBe 0
}
......@@ -209,9 +207,9 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers {
val reader = new VCFFileReader(vepped, false)
val record = reader.iterator().next()
denovoInSample(record, "Sample_101") shouldBe false
denovoInSample(record, "Sample_102") shouldBe false
denovoInSample(record, "Sample_103") shouldBe false
uniqueVariantInSample(record, "Sample_101") shouldBe false
uniqueVariantInSample(record, "Sample_102") shouldBe false
uniqueVariantInSample(record, "Sample_103") shouldBe false
}
@Test def testResToDom() = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment