VcfFilter.scala 16.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
17
18

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19

20
import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
22
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf.VCFFileReader
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.utils.ToolCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
25

Peter van 't Hof's avatar
Peter van 't Hof committed
26
import scala.collection.JavaConversions._
27
import scala.io.Source
Peter van 't Hof's avatar
Peter van 't Hof committed
28

Peter van 't Hof's avatar
Peter van 't Hof committed
29
object VcfFilter extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
30
  /** Container class for a trio */
Sander Bollen's avatar
Sander Bollen committed
31
  protected[tools] case class Trio(child: String, father: String, mother: String) {
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
34
35
36
    def this(arg: String) = {
      this(arg.split(":")(0), arg.split(":")(1), arg.split(":")(2))
    }
  }

37
38
  case class Args(inputVcf: File = null,
                  outputVcf: File = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
39
                  invertedOutputVcf: Option[File] = None,
40
                  minQualScore: Option[Double] = None,
41
42
43
                  minSampleDepth: Int = -1,
                  minTotalDepth: Int = -1,
                  minAlternateDepth: Int = -1,
Peter van 't Hof's avatar
Peter van 't Hof committed
44
                  minSamplesPass: Int = 1,
45
                  mustHaveVariant: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
46
                  calledIn: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
47
                  mustHaveGenotype: List[(String, GenotypeType)] = Nil,
48
                  deNovoInSample: String = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
49
                  resToDom: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
50
                  trioCompound: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
51
52
                  deNovoTrio: List[Trio] = Nil,
                  trioLossOfHet: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
53
                  diffGenotype: List[(String, String)] = Nil,
54
                  filterHetVarToHomVar: List[(String, String)] = Nil,
55
                  filterRefCalls: Boolean = false,
56
                  filterNoCalls: Boolean = false,
57
58
                  iDset: Set[String] = Set(),
                  minGenomeQuality: Int = 0) extends AbstractArgs
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
61
    opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
62
      c.copy(inputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
    } text "Input vcf file"
    opt[File]('o', "outputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
65
      c.copy(outputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
    } text "Output vcf file"
    opt[File]("invertedOutputVcf") maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
68
      c.copy(invertedOutputVcf = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
69
70
    } text "inverted output vcf file"
    opt[Int]("minSampleDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
71
      c.copy(minSampleDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
    } text "Min value for DP in genotype fields"
    opt[Int]("minTotalDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
74
      c.copy(minTotalDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
    } text "Min value of DP field in INFO fields"
    opt[Int]("minAlternateDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
77
      c.copy(minAlternateDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
    } text "Min value of AD field in genotype fields"
    opt[Int]("minSamplesPass") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
80
      c.copy(minSamplesPass = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
    } text "Min number off samples to pass --minAlternateDepth, --minBamAlternateDepth and --minSampleDepth"
    opt[String]("resToDom") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
83
      c.copy(resToDom = new Trio(x) :: c.resToDom)
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
    } text "Only shows variants where child is homozygous and both parants hetrozygous"
    opt[String]("trioCompound") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
86
      c.copy(trioCompound = new Trio(x) :: c.trioCompound)
Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
    } text "Only shows variants where child is a compound variant combined from both parants"
    opt[String]("deNovoInSample") maxOccurs 1 unbounded () valueName "<sample>" action { (x, c) =>
89
      c.copy(deNovoInSample = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
    } text "Only show variants that contain unique alleles in complete set for given sample"
    opt[String]("deNovoTrio") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
92
      c.copy(deNovoTrio = new Trio(x) :: c.deNovoTrio)
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
    } text "Only show variants that are denovo in the trio"
    opt[String]("trioLossOfHet") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
95
      c.copy(trioLossOfHet = new Trio(x) :: c.trioLossOfHet)
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
    } text "Only show variants where a loss of hetrozygosity is detected"
    opt[String]("mustHaveVariant") unbounded () valueName "<sample>" action { (x, c) =>
98
      c.copy(mustHaveVariant = x :: c.mustHaveVariant)
Peter van 't Hof's avatar
Peter van 't Hof committed
99
100
    } text "Given sample must have 1 alternative allele"
    opt[String]("calledIn") unbounded () valueName "<sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
101
      c.copy(calledIn = x :: c.calledIn)
Peter van 't Hof's avatar
Peter van 't Hof committed
102
    } text "Must be called in this sample"
103
    opt[String]("mustHaveGenotype") unbounded () valueName "<sample:genotype>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106
107
108
      c.copy(mustHaveGenotype = (x.split(":")(0), GenotypeType.valueOf(x.split(":")(1))) :: c.mustHaveGenotype)
    } validate { x =>
      if (x.split(":").length == 2 && GenotypeType.values().contains(x.split(":")(1))) success
      else failure("--mustHaveGenotype should be in this format: sample:genotype")
    } text "Must have genotoype <genotype> for this sample. Genotype can be " + GenotypeType.values().mkString(", ")
Peter van 't Hof's avatar
Peter van 't Hof committed
109
    opt[String]("diffGenotype") unbounded () valueName "<sample:sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
      c.copy(diffGenotype = (x.split(":")(0), x.split(":")(1)) :: c.diffGenotype)
    } validate { x => if (x.split(":").length == 2) success else failure("--notSameGenotype should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
112
113
    } text "Given samples must have a different genotype"
    opt[String]("filterHetVarToHomVar") unbounded () valueName "<sample:sample>" action { (x, c) =>
114
      c.copy(filterHetVarToHomVar = (x.split(":")(0), x.split(":")(1)) :: c.filterHetVarToHomVar)
Peter van 't Hof's avatar
Peter van 't Hof committed
115
    } validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
116
    } text "If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filtered"
Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
    opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
      c.copy(filterRefCalls = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
119
    } text "Filter when there are only ref calls"
120
121
    opt[Unit]("filterNoCalls") unbounded () action { (x, c) =>
      c.copy(filterNoCalls = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
122
    } text "Filter when there are only no calls"
123
124
    opt[Double]("minQualScore") unbounded () action { (x, c) =>
      c.copy(minQualScore = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
125
    } text "Min qual score"
126
127
    opt[String]("id") unbounded () action { (x, c) =>
      c.copy(iDset = c.iDset + x)
Peter van 't Hof's avatar
Peter van 't Hof committed
128
    } text "Id that may pass the filter"
129
    opt[File]("idFile") unbounded () action { (x, c) =>
130
      c.copy(iDset = c.iDset ++ Source.fromFile(x).getLines())
Peter van 't Hof's avatar
Peter van 't Hof committed
131
    } text "File that contain list of IDs to get from vcf file"
132
133
    opt[Int]("minGenomeQuality") unbounded() action { (x, c) =>
      c.copy(minGenomeQuality = x)}
Peter van 't Hof's avatar
Peter van 't Hof committed
134
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
135

Peter van 't Hof's avatar
Peter van 't Hof committed
136
  /** @param args the command line arguments */
Peter van 't Hof's avatar
Peter van 't Hof committed
137
  def main(args: Array[String]): Unit = {
138
    logger.info("Start")
Peter van 't Hof's avatar
Peter van 't Hof committed
139
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
140
    val cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)
Peter van 't Hof's avatar
Peter van 't Hof committed
141

Peter van 't Hof's avatar
Peter van 't Hof committed
142
    val reader = new VCFFileReader(cmdArgs.inputVcf, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
143
    val header = reader.getFileHeader
144
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().
Peter van 't Hof's avatar
Peter van 't Hof committed
145
      setOutputFile(cmdArgs.outputVcf).
146
147
      setReferenceDictionary(header.getSequenceDictionary).
      build)
Peter van 't Hof's avatar
Peter van 't Hof committed
148
    writer.writeHeader(header)
Peter van 't Hof's avatar
Peter van 't Hof committed
149

Peter van 't Hof's avatar
Peter van 't Hof committed
150
    val invertedWriter = cmdArgs.invertedOutputVcf.collect {
Sander Bollen's avatar
Sander Bollen committed
151
152
153
154
155
      case x => new VariantContextWriterBuilder().
        setOutputFile(x).
        setReferenceDictionary(header.getSequenceDictionary).
        build
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
156
    invertedWriter.foreach(_.writeHeader(header))
Peter van 't Hof's avatar
Peter van 't Hof committed
157

158
159
    var counterTotal = 0
    var counterLeft = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
160
    for (record <- reader) {
Peter van 't Hof's avatar
Peter van 't Hof committed
161
162
163
164
165
166
      if (cmdArgs.minQualScore.map(minQualscore(record, _)).getOrElse(true) &&
        (!cmdArgs.filterRefCalls || hasNonRefCalls(record)) &&
        (!cmdArgs.filterNoCalls || hasCalls(record)) &&
        hasMinTotalDepth(record, cmdArgs.minTotalDepth) &&
        hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) &&
        minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) &&
167
        minGenomeQuality(record, cmdArgs.minGenomeQuality, cmdArgs.minSamplesPass) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
168
        (cmdArgs.mustHaveVariant.isEmpty || mustHaveVariant(record, cmdArgs.mustHaveVariant)) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
169
170
        calledIn(record, cmdArgs.calledIn) &&
        hasGenotype(record, cmdArgs.mustHaveGenotype) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
173
174
175
176
177
178
179
180
181
        (cmdArgs.diffGenotype.isEmpty || cmdArgs.diffGenotype.forall(x => notSameGenotype(record, x._1, x._2))) &&
        (
          cmdArgs.filterHetVarToHomVar.isEmpty ||
          cmdArgs.filterHetVarToHomVar.forall(x => filterHetVarToHomVar(record, x._1, x._2))
        ) &&
          denovoInSample(record, cmdArgs.deNovoInSample) &&
          denovoTrio(record, cmdArgs.deNovoTrio) &&
          denovoTrio(record, cmdArgs.trioLossOfHet, onlyLossHet = true) &&
          resToDom(record, cmdArgs.resToDom) &&
          trioCompound(record, cmdArgs.trioCompound) &&
          (cmdArgs.iDset.isEmpty || inIdSet(record, cmdArgs.iDset))) {
Peter van 't Hof's avatar
Peter van 't Hof committed
182
        writer.add(record)
183
        counterLeft += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
      } else
        invertedWriter.foreach(_.add(record))
186
      counterTotal += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
187
      if (counterTotal % 100000 == 0) logger.info(s"$counterTotal variants processed, $counterLeft passed filter")
Peter van 't Hof's avatar
Peter van 't Hof committed
188
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
189
    logger.info(s"$counterTotal variants processed, $counterLeft passed filter")
Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
    reader.close()
    writer.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
192
    invertedWriter.foreach(_.close())
193
    logger.info("Done")
Peter van 't Hof's avatar
Peter van 't Hof committed
194
  }
195

Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
198
199
200
201
202
203
  /**
   * Checks if given samples are called
   * @param record VCF record
   * @param samples Samples that need this sample to be called
   * @return false when filters fail
   */
  def calledIn(record: VariantContext, samples: List[String]): Boolean = {
    if (!samples.forall(record.getGenotype(_).isCalled)) false
Peter van 't Hof's avatar
Peter van 't Hof committed
204
205
206
    else true
  }

207
208
209
210
211
212
  /**
   * Checks if given genotypes for given samples are there
   * @param record VCF record
   * @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype)
   * @return false when filter fails
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
213
214
  def hasGenotype(record: VariantContext, samplesGenotypes: List[(String, GenotypeType)]): Boolean = {
    samplesGenotypes.forall(x => record.getGenotype(x._1).getType == x._2)
215
216
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
217
218
219
220
221
222
223
224
  /**
   * Checks if record has atleast minQualScore
   * @param record VCF record
   * @param minQualScore Minimal quality score
   * @return false when filters fail
   */
  def minQualscore(record: VariantContext, minQualScore: Double): Boolean = {
    record.getPhredScaledQual >= minQualScore
Peter van 't Hof's avatar
Peter van 't Hof committed
225
226
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
227
  /** returns true record contains Non reference genotypes */
Peter van 't Hof's avatar
Peter van 't Hof committed
228
229
  def hasNonRefCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isHomRef)
230
231
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
232
  /** returns true when record has calls */
Peter van 't Hof's avatar
Peter van 't Hof committed
233
234
  def hasCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isNoCall)
235
236
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
237
  /** returns true when DP INFO field is atleast the given value */
Peter van 't Hof's avatar
Peter van 't Hof committed
238
239
  def hasMinTotalDepth(record: VariantContext, minTotalDepth: Int): Boolean = {
    record.getAttributeAsInt("DP", -1) >= minTotalDepth
240
241
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
242
243
244
245
246
247
248
249
  /**
   * Checks if DP genotype field have a minimal value
   * @param record VCF record
   * @param minSampleDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def hasMinSampleDepth(record: VariantContext, minSampleDepth: Int, minSamplesPass: Int = 1): Boolean = {
250
251
    record.getGenotypes.count(genotype => {
      val DP = if (genotype.hasDP) genotype.getDP else -1
Peter van 't Hof's avatar
Peter van 't Hof committed
252
253
      DP >= minSampleDepth
    }) >= minSamplesPass
254
255
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
256
  /**
257
   * Checks if non-ref AD genotype field have a minimal value
Peter van 't Hof's avatar
Peter van 't Hof committed
258
259
260
261
262
263
   * @param record VCF record
   * @param minAlternateDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def minAlternateDepth(record: VariantContext, minAlternateDepth: Int, minSamplesPass: Int = 1): Boolean = {
264
265
    record.getGenotypes.count(genotype => {
      val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
266
      if (AD.nonEmpty) AD.tail.count(_ >= minAlternateDepth) > 0 else true
Peter van 't Hof's avatar
Peter van 't Hof committed
267
    }) >= minSamplesPass
268
269
  }

270
271
272
273
274
275
276
277
278
279
280
281
  /**
   * Checks if genome quality field has minimum value
   * @param record VCF record
   * @param minGQ smallest GQ allowed
   * @param minSamplesPass number of samples to consider
   * @return
   */
  def minGenomeQuality(record: VariantContext, minGQ: Int, minSamplesPass: Int = 1): Boolean = {
    record.getGenotypes.count(x => if (!x.hasGQ) false
    else if (x.getGQ >= minGQ) true else false) >= minSamplesPass
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
282
283
284
285
286
287
  /**
   * Checks if given samples does have a variant hin this record
   * @param record VCF record
   * @param mustHaveVariant List of samples that should have this variant
   * @return true if filter passed
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
288
289
  def mustHaveVariant(record: VariantContext, mustHaveVariant: List[String]): Boolean = {
    !mustHaveVariant.map(record.getGenotype).exists(a => a.isHomRef || a.isNoCall)
290
291
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
292
  /** Checks if given samples have the same genotype */
Peter van 't Hof's avatar
Peter van 't Hof committed
293
294
295
296
297
  def notSameGenotype(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.sameGenotype(genotype2)) false
    else true
298
299
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
300
  /** Checks if sample1 is hetrozygous and if sample2 is homozygous for a alternative allele in sample1 */
Peter van 't Hof's avatar
Peter van 't Hof committed
301
302
303
304
305
306
  def filterHetVarToHomVar(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.isHet && !genotype1.getAlleles.forall(_.isNonReference)) {
      for (allele <- genotype1.getAlleles if allele.isNonReference) {
        if (genotype2.getAlleles.forall(_.basesMatch(allele))) return false
307
308
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
309
    true
310
311
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
312
  /** Checks if given sample have alternative alleles that are unique in the VCF record */
Peter van 't Hof's avatar
Peter van 't Hof committed
313
314
315
316
317
318
  def denovoInSample(record: VariantContext, sample: String): Boolean = {
    if (sample == null) return true
    val genotype = record.getGenotype(sample)
    if (genotype.isNoCall) return false
    for (allele <- genotype.getAlleles) {
      for (g <- record.getGenotypes if g.getSampleName != sample) {
319
320
321
        if (g.getAlleles.exists(_.basesMatch(allele))) return false
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
322
    true
323
  }
324

Peter van 't Hof's avatar
Peter van 't Hof committed
325
  /** Return true when variant is homozygous in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
326
327
328
329
330
331
332
333
334
  def resToDom(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHomVar && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) == 1 &&
          record.getGenotype(trio.mother).countAllele(allele) == 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
335
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
336
337
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
338
  /** Returns true when variant a compound variant in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
339
340
341
342
343
344
345
346
347
  def trioCompound(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHetNonRef && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) >= 1 &&
          record.getGenotype(trio.mother).countAllele(allele) >= 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
348
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
349
350
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
351
  /** Returns true when child got a deNovo variant */
Peter van 't Hof's avatar
Peter van 't Hof committed
352
353
354
355
356
357
358
359
360
361
362
  def denovoTrio(record: VariantContext, trios: List[Trio], onlyLossHet: Boolean = false): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)
      val father = record.getGenotype(trio.father)
      val mother = record.getGenotype(trio.mother)

      for (allele <- child.getAlleles) {
        val childCount = child.countAllele(allele)
        val fatherCount = father.countAllele(allele)
        val motherCount = mother.countAllele(allele)

Peter van 't Hof's avatar
Peter van 't Hof committed
363
364
365
366
367
368
369
370
        if (onlyLossHet) {
          if (childCount == 2 && (
            (fatherCount == 2 && motherCount == 0) ||
            (fatherCount == 0 && motherCount == 2))) return true
        } else {
          if (childCount == 1 && fatherCount == 0 && motherCount == 0) return true
          else if (childCount == 2 && (fatherCount == 0 || motherCount == 0)) return true
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
371
372
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
373
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
374
375
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
376
  /** Returns true when VCF record contains a ID from the given list */
Peter van 't Hof's avatar
Peter van 't Hof committed
377
378
  def inIdSet(record: VariantContext, idSet: Set[String]): Boolean = {
    record.getID.split(",").exists(idSet.contains)
379
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
380
}