VcfFilter.scala 16.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
17
18

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19

20
import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
22
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf.VCFFileReader
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.utils.ToolCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
25

Peter van 't Hof's avatar
Peter van 't Hof committed
26
import scala.collection.JavaConversions._
27
import scala.io.Source
Peter van 't Hof's avatar
Peter van 't Hof committed
28

Peter van 't Hof's avatar
Peter van 't Hof committed
29
object VcfFilter extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
30
  /** Container class for a trio */
Sander Bollen's avatar
Sander Bollen committed
31
  protected[tools] case class Trio(child: String, father: String, mother: String) {
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
34
35
36
    def this(arg: String) = {
      this(arg.split(":")(0), arg.split(":")(1), arg.split(":")(2))
    }
  }

37
38
  case class Args(inputVcf: File = null,
                  outputVcf: File = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
39
                  invertedOutputVcf: Option[File] = None,
40
                  minQualScore: Option[Double] = None,
41
42
43
                  minSampleDepth: Int = -1,
                  minTotalDepth: Int = -1,
                  minAlternateDepth: Int = -1,
Peter van 't Hof's avatar
Peter van 't Hof committed
44
                  minSamplesPass: Int = 1,
45
                  mustHaveVariant: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
46
                  calledIn: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
47
                  mustHaveGenotype: List[(String, GenotypeType)] = Nil,
48
                  deNovoInSample: String = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
49
                  resToDom: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
50
                  trioCompound: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
51
52
                  deNovoTrio: List[Trio] = Nil,
                  trioLossOfHet: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
53
                  diffGenotype: List[(String, String)] = Nil,
54
                  filterHetVarToHomVar: List[(String, String)] = Nil,
55
                  filterRefCalls: Boolean = false,
56
                  filterNoCalls: Boolean = false,
57
58
                  iDset: Set[String] = Set(),
                  minGenomeQuality: Int = 0) extends AbstractArgs
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
61
    opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
62
      c.copy(inputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
    } text "Input vcf file"
    opt[File]('o', "outputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
65
      c.copy(outputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
    } text "Output vcf file"
    opt[File]("invertedOutputVcf") maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
68
      c.copy(invertedOutputVcf = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
69
70
    } text "inverted output vcf file"
    opt[Int]("minSampleDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
71
      c.copy(minSampleDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
    } text "Min value for DP in genotype fields"
    opt[Int]("minTotalDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
74
      c.copy(minTotalDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
    } text "Min value of DP field in INFO fields"
    opt[Int]("minAlternateDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
77
      c.copy(minAlternateDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
    } text "Min value of AD field in genotype fields"
    opt[Int]("minSamplesPass") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
80
      c.copy(minSamplesPass = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
    } text "Min number off samples to pass --minAlternateDepth, --minBamAlternateDepth and --minSampleDepth"
    opt[String]("resToDom") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
83
      c.copy(resToDom = new Trio(x) :: c.resToDom)
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
    } text "Only shows variants where child is homozygous and both parants hetrozygous"
    opt[String]("trioCompound") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
86
      c.copy(trioCompound = new Trio(x) :: c.trioCompound)
Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
    } text "Only shows variants where child is a compound variant combined from both parants"
    opt[String]("deNovoInSample") maxOccurs 1 unbounded () valueName "<sample>" action { (x, c) =>
89
      c.copy(deNovoInSample = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
    } text "Only show variants that contain unique alleles in complete set for given sample"
    opt[String]("deNovoTrio") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
92
      c.copy(deNovoTrio = new Trio(x) :: c.deNovoTrio)
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
    } text "Only show variants that are denovo in the trio"
    opt[String]("trioLossOfHet") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
95
      c.copy(trioLossOfHet = new Trio(x) :: c.trioLossOfHet)
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
    } text "Only show variants where a loss of hetrozygosity is detected"
    opt[String]("mustHaveVariant") unbounded () valueName "<sample>" action { (x, c) =>
98
      c.copy(mustHaveVariant = x :: c.mustHaveVariant)
Peter van 't Hof's avatar
Peter van 't Hof committed
99
100
    } text "Given sample must have 1 alternative allele"
    opt[String]("calledIn") unbounded () valueName "<sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
101
      c.copy(calledIn = x :: c.calledIn)
Peter van 't Hof's avatar
Peter van 't Hof committed
102
    } text "Must be called in this sample"
103
    opt[String]("mustHaveGenotype") unbounded () valueName "<sample:genotype>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106
107
108
      c.copy(mustHaveGenotype = (x.split(":")(0), GenotypeType.valueOf(x.split(":")(1))) :: c.mustHaveGenotype)
    } validate { x =>
      if (x.split(":").length == 2 && GenotypeType.values().contains(x.split(":")(1))) success
      else failure("--mustHaveGenotype should be in this format: sample:genotype")
    } text "Must have genotoype <genotype> for this sample. Genotype can be " + GenotypeType.values().mkString(", ")
Peter van 't Hof's avatar
Peter van 't Hof committed
109
    opt[String]("diffGenotype") unbounded () valueName "<sample:sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
      c.copy(diffGenotype = (x.split(":")(0), x.split(":")(1)) :: c.diffGenotype)
    } validate { x => if (x.split(":").length == 2) success else failure("--notSameGenotype should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
112
113
    } text "Given samples must have a different genotype"
    opt[String]("filterHetVarToHomVar") unbounded () valueName "<sample:sample>" action { (x, c) =>
114
      c.copy(filterHetVarToHomVar = (x.split(":")(0), x.split(":")(1)) :: c.filterHetVarToHomVar)
Peter van 't Hof's avatar
Peter van 't Hof committed
115
    } validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
116
    } text "If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filtered"
Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
    opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
      c.copy(filterRefCalls = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
119
    } text "Filter when there are only ref calls"
120
121
    opt[Unit]("filterNoCalls") unbounded () action { (x, c) =>
      c.copy(filterNoCalls = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
122
    } text "Filter when there are only no calls"
123
124
    opt[Double]("minQualScore") unbounded () action { (x, c) =>
      c.copy(minQualScore = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
125
    } text "Min qual score"
126
127
    opt[String]("id") unbounded () action { (x, c) =>
      c.copy(iDset = c.iDset + x)
Peter van 't Hof's avatar
Peter van 't Hof committed
128
    } text "Id that may pass the filter"
129
    opt[File]("idFile") unbounded () action { (x, c) =>
130
      c.copy(iDset = c.iDset ++ Source.fromFile(x).getLines())
Peter van 't Hof's avatar
Peter van 't Hof committed
131
    } text "File that contain list of IDs to get from vcf file"
Sander Bollen's avatar
Sander Bollen committed
132
133
134
    opt[Int]("minGenomeQuality") unbounded () action { (x, c) =>
      c.copy(minGenomeQuality = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
135
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
136

Peter van 't Hof's avatar
Peter van 't Hof committed
137
  /** @param args the command line arguments */
Peter van 't Hof's avatar
Peter van 't Hof committed
138
  def main(args: Array[String]): Unit = {
139
    logger.info("Start")
Peter van 't Hof's avatar
Peter van 't Hof committed
140
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
141
    val cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)
Peter van 't Hof's avatar
Peter van 't Hof committed
142

Peter van 't Hof's avatar
Peter van 't Hof committed
143
    val reader = new VCFFileReader(cmdArgs.inputVcf, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
144
    val header = reader.getFileHeader
145
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().
Peter van 't Hof's avatar
Peter van 't Hof committed
146
      setOutputFile(cmdArgs.outputVcf).
147
148
      setReferenceDictionary(header.getSequenceDictionary).
      build)
Peter van 't Hof's avatar
Peter van 't Hof committed
149
    writer.writeHeader(header)
Peter van 't Hof's avatar
Peter van 't Hof committed
150

Peter van 't Hof's avatar
Peter van 't Hof committed
151
    val invertedWriter = cmdArgs.invertedOutputVcf.collect {
Sander Bollen's avatar
Sander Bollen committed
152
153
154
155
156
      case x => new VariantContextWriterBuilder().
        setOutputFile(x).
        setReferenceDictionary(header.getSequenceDictionary).
        build
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
157
    invertedWriter.foreach(_.writeHeader(header))
Peter van 't Hof's avatar
Peter van 't Hof committed
158

159
160
    var counterTotal = 0
    var counterLeft = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
161
    for (record <- reader) {
Peter van 't Hof's avatar
Peter van 't Hof committed
162
163
164
165
166
167
      if (cmdArgs.minQualScore.map(minQualscore(record, _)).getOrElse(true) &&
        (!cmdArgs.filterRefCalls || hasNonRefCalls(record)) &&
        (!cmdArgs.filterNoCalls || hasCalls(record)) &&
        hasMinTotalDepth(record, cmdArgs.minTotalDepth) &&
        hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) &&
        minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) &&
168
        minGenomeQuality(record, cmdArgs.minGenomeQuality, cmdArgs.minSamplesPass) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
169
        (cmdArgs.mustHaveVariant.isEmpty || mustHaveVariant(record, cmdArgs.mustHaveVariant)) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
170
171
        calledIn(record, cmdArgs.calledIn) &&
        hasGenotype(record, cmdArgs.mustHaveGenotype) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
172
173
174
175
176
177
178
179
180
181
182
        (cmdArgs.diffGenotype.isEmpty || cmdArgs.diffGenotype.forall(x => notSameGenotype(record, x._1, x._2))) &&
        (
          cmdArgs.filterHetVarToHomVar.isEmpty ||
          cmdArgs.filterHetVarToHomVar.forall(x => filterHetVarToHomVar(record, x._1, x._2))
        ) &&
          denovoInSample(record, cmdArgs.deNovoInSample) &&
          denovoTrio(record, cmdArgs.deNovoTrio) &&
          denovoTrio(record, cmdArgs.trioLossOfHet, onlyLossHet = true) &&
          resToDom(record, cmdArgs.resToDom) &&
          trioCompound(record, cmdArgs.trioCompound) &&
          (cmdArgs.iDset.isEmpty || inIdSet(record, cmdArgs.iDset))) {
Peter van 't Hof's avatar
Peter van 't Hof committed
183
        writer.add(record)
184
        counterLeft += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
185
186
      } else
        invertedWriter.foreach(_.add(record))
187
      counterTotal += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
188
      if (counterTotal % 100000 == 0) logger.info(s"$counterTotal variants processed, $counterLeft passed filter")
Peter van 't Hof's avatar
Peter van 't Hof committed
189
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
190
    logger.info(s"$counterTotal variants processed, $counterLeft passed filter")
Peter van 't Hof's avatar
Peter van 't Hof committed
191
192
    reader.close()
    writer.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
193
    invertedWriter.foreach(_.close())
194
    logger.info("Done")
Peter van 't Hof's avatar
Peter van 't Hof committed
195
  }
196

Peter van 't Hof's avatar
Peter van 't Hof committed
197
198
199
200
201
202
203
204
  /**
   * Checks if given samples are called
   * @param record VCF record
   * @param samples Samples that need this sample to be called
   * @return false when filters fail
   */
  def calledIn(record: VariantContext, samples: List[String]): Boolean = {
    if (!samples.forall(record.getGenotype(_).isCalled)) false
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
207
    else true
  }

208
209
210
211
212
213
  /**
   * Checks if given genotypes for given samples are there
   * @param record VCF record
   * @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype)
   * @return false when filter fails
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
214
215
  def hasGenotype(record: VariantContext, samplesGenotypes: List[(String, GenotypeType)]): Boolean = {
    samplesGenotypes.forall(x => record.getGenotype(x._1).getType == x._2)
216
217
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
218
219
220
221
222
223
224
225
  /**
   * Checks if record has atleast minQualScore
   * @param record VCF record
   * @param minQualScore Minimal quality score
   * @return false when filters fail
   */
  def minQualscore(record: VariantContext, minQualScore: Double): Boolean = {
    record.getPhredScaledQual >= minQualScore
Peter van 't Hof's avatar
Peter van 't Hof committed
226
227
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
228
  /** returns true record contains Non reference genotypes */
Peter van 't Hof's avatar
Peter van 't Hof committed
229
230
  def hasNonRefCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isHomRef)
231
232
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
233
  /** returns true when record has calls */
Peter van 't Hof's avatar
Peter van 't Hof committed
234
235
  def hasCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isNoCall)
236
237
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
238
  /** returns true when DP INFO field is atleast the given value */
Peter van 't Hof's avatar
Peter van 't Hof committed
239
240
  def hasMinTotalDepth(record: VariantContext, minTotalDepth: Int): Boolean = {
    record.getAttributeAsInt("DP", -1) >= minTotalDepth
241
242
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
243
244
245
246
247
248
249
250
  /**
   * Checks if DP genotype field have a minimal value
   * @param record VCF record
   * @param minSampleDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def hasMinSampleDepth(record: VariantContext, minSampleDepth: Int, minSamplesPass: Int = 1): Boolean = {
251
252
    record.getGenotypes.count(genotype => {
      val DP = if (genotype.hasDP) genotype.getDP else -1
Peter van 't Hof's avatar
Peter van 't Hof committed
253
254
      DP >= minSampleDepth
    }) >= minSamplesPass
255
256
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
257
  /**
258
   * Checks if non-ref AD genotype field have a minimal value
Peter van 't Hof's avatar
Peter van 't Hof committed
259
260
261
262
263
264
   * @param record VCF record
   * @param minAlternateDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def minAlternateDepth(record: VariantContext, minAlternateDepth: Int, minSamplesPass: Int = 1): Boolean = {
265
266
    record.getGenotypes.count(genotype => {
      val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
267
      if (AD.nonEmpty) AD.tail.count(_ >= minAlternateDepth) > 0 else true
Peter van 't Hof's avatar
Peter van 't Hof committed
268
    }) >= minSamplesPass
269
270
  }

271
272
273
274
275
276
277
278
279
280
281
282
  /**
   * Checks if genome quality field has minimum value
   * @param record VCF record
   * @param minGQ smallest GQ allowed
   * @param minSamplesPass number of samples to consider
   * @return
   */
  def minGenomeQuality(record: VariantContext, minGQ: Int, minSamplesPass: Int = 1): Boolean = {
    record.getGenotypes.count(x => if (!x.hasGQ) false
    else if (x.getGQ >= minGQ) true else false) >= minSamplesPass
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
283
284
285
286
287
288
  /**
   * Checks if given samples does have a variant hin this record
   * @param record VCF record
   * @param mustHaveVariant List of samples that should have this variant
   * @return true if filter passed
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
289
290
  def mustHaveVariant(record: VariantContext, mustHaveVariant: List[String]): Boolean = {
    !mustHaveVariant.map(record.getGenotype).exists(a => a.isHomRef || a.isNoCall)
291
292
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
293
  /** Checks if given samples have the same genotype */
Peter van 't Hof's avatar
Peter van 't Hof committed
294
295
296
297
298
  def notSameGenotype(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.sameGenotype(genotype2)) false
    else true
299
300
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
301
  /** Checks if sample1 is hetrozygous and if sample2 is homozygous for a alternative allele in sample1 */
Peter van 't Hof's avatar
Peter van 't Hof committed
302
303
304
305
306
307
  def filterHetVarToHomVar(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.isHet && !genotype1.getAlleles.forall(_.isNonReference)) {
      for (allele <- genotype1.getAlleles if allele.isNonReference) {
        if (genotype2.getAlleles.forall(_.basesMatch(allele))) return false
308
309
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
310
    true
311
312
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
313
  /** Checks if given sample have alternative alleles that are unique in the VCF record */
Peter van 't Hof's avatar
Peter van 't Hof committed
314
315
316
317
318
319
  def denovoInSample(record: VariantContext, sample: String): Boolean = {
    if (sample == null) return true
    val genotype = record.getGenotype(sample)
    if (genotype.isNoCall) return false
    for (allele <- genotype.getAlleles) {
      for (g <- record.getGenotypes if g.getSampleName != sample) {
320
321
322
        if (g.getAlleles.exists(_.basesMatch(allele))) return false
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
323
    true
324
  }
325

Peter van 't Hof's avatar
Peter van 't Hof committed
326
  /** Return true when variant is homozygous in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
327
328
329
330
331
332
333
334
335
  def resToDom(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHomVar && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) == 1 &&
          record.getGenotype(trio.mother).countAllele(allele) == 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
336
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
337
338
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
339
  /** Returns true when variant a compound variant in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
340
341
342
343
344
345
346
347
348
  def trioCompound(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHetNonRef && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) >= 1 &&
          record.getGenotype(trio.mother).countAllele(allele) >= 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
349
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
350
351
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
352
  /** Returns true when child got a deNovo variant */
Peter van 't Hof's avatar
Peter van 't Hof committed
353
354
355
356
357
358
359
360
361
362
363
  def denovoTrio(record: VariantContext, trios: List[Trio], onlyLossHet: Boolean = false): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)
      val father = record.getGenotype(trio.father)
      val mother = record.getGenotype(trio.mother)

      for (allele <- child.getAlleles) {
        val childCount = child.countAllele(allele)
        val fatherCount = father.countAllele(allele)
        val motherCount = mother.countAllele(allele)

Peter van 't Hof's avatar
Peter van 't Hof committed
364
365
366
367
368
369
370
371
        if (onlyLossHet) {
          if (childCount == 2 && (
            (fatherCount == 2 && motherCount == 0) ||
            (fatherCount == 0 && motherCount == 2))) return true
        } else {
          if (childCount == 1 && fatherCount == 0 && motherCount == 0) return true
          else if (childCount == 2 && (fatherCount == 0 || motherCount == 0)) return true
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
372
373
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
374
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
375
376
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
377
  /** Returns true when VCF record contains a ID from the given list */
Peter van 't Hof's avatar
Peter van 't Hof committed
378
379
  def inIdSet(record: VariantContext, idSet: Set[String]): Boolean = {
    record.getID.split(",").exists(idSet.contains)
380
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
381
}