VcfFilter.scala 17.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
17
18

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19

20
import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
22
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf.VCFFileReader
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.core.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26
import nl.lumc.sasc.biopet.core.{ ToolCommand, ToolCommandFuntion }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

Peter van 't Hof's avatar
Peter van 't Hof committed
27
import scala.collection.JavaConversions._
28
import scala.io.Source
Peter van 't Hof's avatar
Peter van 't Hof committed
29

30
class VcfFilter(val root: Configurable) extends ToolCommandFuntion {
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
33
34
  javaMainClass = getClass.getName

  @Input(doc = "Input vcf", shortName = "I", required = true)
  var inputVcf: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
35

Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
  @Output(doc = "Output vcf", shortName = "o", required = false)
  var outputVcf: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
38

Peter van 't Hof's avatar
Peter van 't Hof committed
39
40
41
42
  var minSampleDepth: Option[Int] = config("min_sample_depth")
  var minTotalDepth: Option[Int] = config("min_total_depth")
  var minAlternateDepth: Option[Int] = config("min_alternate_depth")
  var minSamplesPass: Option[Int] = config("min_samples_pass")
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var filterRefCalls: Boolean = config("filter_ref_calls", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
44

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  override def defaultCoreMemory = 3.0
Peter van 't Hof's avatar
Peter van 't Hof committed
46
47
48

  override def commandLine = super.commandLine +
    required("-I", inputVcf) +
Peter van 't Hof's avatar
Peter van 't Hof committed
49
    required("-o", outputVcf) +
Peter van 't Hof's avatar
Peter van 't Hof committed
50
51
    optional("--minSampleDepth", minSampleDepth) +
    optional("--minTotalDepth", minTotalDepth) +
Peter van 't Hof's avatar
Peter van 't Hof committed
52
    optional("--minAlternateDepth", minAlternateDepth) +
Peter van 't Hof's avatar
Peter van 't Hof committed
53
54
    optional("--minSamplesPass", minSamplesPass) +
    conditional(filterRefCalls, "--filterRefCalls")
Peter van 't Hof's avatar
Peter van 't Hof committed
55
56
}

Peter van 't Hof's avatar
Peter van 't Hof committed
57
object VcfFilter extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
  /** Container class for a trio */
Sander Bollen's avatar
Sander Bollen committed
59
  protected[tools] case class Trio(child: String, father: String, mother: String) {
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
62
63
64
    def this(arg: String) = {
      this(arg.split(":")(0), arg.split(":")(1), arg.split(":")(2))
    }
  }

65
66
  case class Args(inputVcf: File = null,
                  outputVcf: File = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
67
                  invertedOutputVcf: Option[File] = None,
68
                  minQualScore: Option[Double] = None,
69
70
71
                  minSampleDepth: Int = -1,
                  minTotalDepth: Int = -1,
                  minAlternateDepth: Int = -1,
Peter van 't Hof's avatar
Peter van 't Hof committed
72
                  minSamplesPass: Int = 1,
73
                  mustHaveVariant: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
74
                  calledIn: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
75
                  mustHaveGenotype: List[(String, GenotypeType)] = Nil,
76
                  deNovoInSample: String = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
77
                  resToDom: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
78
                  trioCompound: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
                  deNovoTrio: List[Trio] = Nil,
                  trioLossOfHet: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
81
                  diffGenotype: List[(String, String)] = Nil,
82
                  filterHetVarToHomVar: List[(String, String)] = Nil,
83
                  filterRefCalls: Boolean = false,
84
85
                  filterNoCalls: Boolean = false,
                  iDset: Set[String] = Set()) extends AbstractArgs
Peter van 't Hof's avatar
Peter van 't Hof committed
86
87

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
88
    opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
89
      c.copy(inputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
    } text "Input vcf file"
    opt[File]('o', "outputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
92
      c.copy(outputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
    } text "Output vcf file"
    opt[File]("invertedOutputVcf") maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
95
      c.copy(invertedOutputVcf = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
    } text "inverted output vcf file"
    opt[Int]("minSampleDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
98
      c.copy(minSampleDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
99
100
    } text "Min value for DP in genotype fields"
    opt[Int]("minTotalDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
101
      c.copy(minTotalDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
    } text "Min value of DP field in INFO fields"
    opt[Int]("minAlternateDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
104
      c.copy(minAlternateDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
    } text "Min value of AD field in genotype fields"
    opt[Int]("minSamplesPass") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
107
      c.copy(minSamplesPass = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
    } text "Min number off samples to pass --minAlternateDepth, --minBamAlternateDepth and --minSampleDepth"
    opt[String]("resToDom") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
110
      c.copy(resToDom = new Trio(x) :: c.resToDom)
Peter van 't Hof's avatar
Peter van 't Hof committed
111
112
    } text "Only shows variants where child is homozygous and both parants hetrozygous"
    opt[String]("trioCompound") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
113
      c.copy(trioCompound = new Trio(x) :: c.trioCompound)
Peter van 't Hof's avatar
Peter van 't Hof committed
114
115
    } text "Only shows variants where child is a compound variant combined from both parants"
    opt[String]("deNovoInSample") maxOccurs 1 unbounded () valueName "<sample>" action { (x, c) =>
116
      c.copy(deNovoInSample = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
    } text "Only show variants that contain unique alleles in complete set for given sample"
    opt[String]("deNovoTrio") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
119
      c.copy(deNovoTrio = new Trio(x) :: c.deNovoTrio)
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
    } text "Only show variants that are denovo in the trio"
    opt[String]("trioLossOfHet") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
122
      c.copy(trioLossOfHet = new Trio(x) :: c.trioLossOfHet)
Peter van 't Hof's avatar
Peter van 't Hof committed
123
124
    } text "Only show variants where a loss of hetrozygosity is detected"
    opt[String]("mustHaveVariant") unbounded () valueName "<sample>" action { (x, c) =>
125
      c.copy(mustHaveVariant = x :: c.mustHaveVariant)
Peter van 't Hof's avatar
Peter van 't Hof committed
126
127
    } text "Given sample must have 1 alternative allele"
    opt[String]("calledIn") unbounded () valueName "<sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
128
      c.copy(calledIn = x :: c.calledIn)
Peter van 't Hof's avatar
Peter van 't Hof committed
129
    } text "Must be called in this sample"
130
    opt[String]("mustHaveGenotype") unbounded () valueName "<sample:genotype>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
131
132
133
134
135
      c.copy(mustHaveGenotype = (x.split(":")(0), GenotypeType.valueOf(x.split(":")(1))) :: c.mustHaveGenotype)
    } validate { x =>
      if (x.split(":").length == 2 && GenotypeType.values().contains(x.split(":")(1))) success
      else failure("--mustHaveGenotype should be in this format: sample:genotype")
    } text "Must have genotoype <genotype> for this sample. Genotype can be " + GenotypeType.values().mkString(", ")
Peter van 't Hof's avatar
Peter van 't Hof committed
136
    opt[String]("diffGenotype") unbounded () valueName "<sample:sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
137
138
      c.copy(diffGenotype = (x.split(":")(0), x.split(":")(1)) :: c.diffGenotype)
    } validate { x => if (x.split(":").length == 2) success else failure("--notSameGenotype should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
139
140
    } text "Given samples must have a different genotype"
    opt[String]("filterHetVarToHomVar") unbounded () valueName "<sample:sample>" action { (x, c) =>
141
      c.copy(filterHetVarToHomVar = (x.split(":")(0), x.split(":")(1)) :: c.filterHetVarToHomVar)
Peter van 't Hof's avatar
Peter van 't Hof committed
142
    } validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
143
    } text "If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filtered"
Peter van 't Hof's avatar
Peter van 't Hof committed
144
145
    opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
      c.copy(filterRefCalls = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
146
    } text "Filter when there are only ref calls"
147
148
    opt[Unit]("filterNoCalls") unbounded () action { (x, c) =>
      c.copy(filterNoCalls = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
149
    } text "Filter when there are only no calls"
150
151
    opt[Double]("minQualScore") unbounded () action { (x, c) =>
      c.copy(minQualScore = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
152
    } text "Min qual score"
153
154
    opt[String]("id") unbounded () action { (x, c) =>
      c.copy(iDset = c.iDset + x)
Peter van 't Hof's avatar
Peter van 't Hof committed
155
    } text "Id that may pass the filter"
156
    opt[File]("idFile") unbounded () action { (x, c) =>
157
      c.copy(iDset = c.iDset ++ Source.fromFile(x).getLines())
Peter van 't Hof's avatar
Peter van 't Hof committed
158
    } text "File that contain list of IDs to get from vcf file"
Peter van 't Hof's avatar
Peter van 't Hof committed
159
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
160

Peter van 't Hof's avatar
Peter van 't Hof committed
161
  /** @param args the command line arguments */
Peter van 't Hof's avatar
Peter van 't Hof committed
162
  def main(args: Array[String]): Unit = {
163
    logger.info("Start")
Peter van 't Hof's avatar
Peter van 't Hof committed
164
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
165
    val cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)
Peter van 't Hof's avatar
Peter van 't Hof committed
166

Peter van 't Hof's avatar
Peter van 't Hof committed
167
    val reader = new VCFFileReader(cmdArgs.inputVcf, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
168
    val header = reader.getFileHeader
169
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().
Peter van 't Hof's avatar
Peter van 't Hof committed
170
      setOutputFile(cmdArgs.outputVcf).
171
172
      setReferenceDictionary(header.getSequenceDictionary).
      build)
Peter van 't Hof's avatar
Peter van 't Hof committed
173
    writer.writeHeader(header)
Peter van 't Hof's avatar
Peter van 't Hof committed
174

Peter van 't Hof's avatar
Peter van 't Hof committed
175
    val invertedWriter = cmdArgs.invertedOutputVcf.collect {
Sander Bollen's avatar
Sander Bollen committed
176
177
178
179
180
      case x => new VariantContextWriterBuilder().
        setOutputFile(x).
        setReferenceDictionary(header.getSequenceDictionary).
        build
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
181
    invertedWriter.foreach(_.writeHeader(header))
Peter van 't Hof's avatar
Peter van 't Hof committed
182

183
184
    var counterTotal = 0
    var counterLeft = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
185
    for (record <- reader) {
Peter van 't Hof's avatar
Peter van 't Hof committed
186
187
188
189
190
191
192
      if (cmdArgs.minQualScore.map(minQualscore(record, _)).getOrElse(true) &&
        (!cmdArgs.filterRefCalls || hasNonRefCalls(record)) &&
        (!cmdArgs.filterNoCalls || hasCalls(record)) &&
        hasMinTotalDepth(record, cmdArgs.minTotalDepth) &&
        hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) &&
        minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) &&
        (cmdArgs.mustHaveVariant.isEmpty || mustHaveVariant(record, cmdArgs.mustHaveVariant)) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
193
194
        calledIn(record, cmdArgs.calledIn) &&
        hasGenotype(record, cmdArgs.mustHaveGenotype) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
195
196
197
198
199
200
201
202
203
204
205
        (cmdArgs.diffGenotype.isEmpty || cmdArgs.diffGenotype.forall(x => notSameGenotype(record, x._1, x._2))) &&
        (
          cmdArgs.filterHetVarToHomVar.isEmpty ||
          cmdArgs.filterHetVarToHomVar.forall(x => filterHetVarToHomVar(record, x._1, x._2))
        ) &&
          denovoInSample(record, cmdArgs.deNovoInSample) &&
          denovoTrio(record, cmdArgs.deNovoTrio) &&
          denovoTrio(record, cmdArgs.trioLossOfHet, onlyLossHet = true) &&
          resToDom(record, cmdArgs.resToDom) &&
          trioCompound(record, cmdArgs.trioCompound) &&
          (cmdArgs.iDset.isEmpty || inIdSet(record, cmdArgs.iDset))) {
Peter van 't Hof's avatar
Peter van 't Hof committed
206
        writer.add(record)
207
        counterLeft += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
208
209
      } else
        invertedWriter.foreach(_.add(record))
210
211
      counterTotal += 1
      if (counterTotal % 100000 == 0) logger.info(counterTotal + " variants processed, " + counterLeft + " left")
Peter van 't Hof's avatar
Peter van 't Hof committed
212
    }
213
    logger.info(counterTotal + " variants processed, " + counterLeft + " left")
Peter van 't Hof's avatar
Peter van 't Hof committed
214
215
    reader.close()
    writer.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
216
    invertedWriter.foreach(_.close())
217
    logger.info("Done")
Peter van 't Hof's avatar
Peter van 't Hof committed
218
  }
219

Peter van 't Hof's avatar
Peter van 't Hof committed
220
221
222
223
224
225
226
227
  /**
   * Checks if given samples are called
   * @param record VCF record
   * @param samples Samples that need this sample to be called
   * @return false when filters fail
   */
  def calledIn(record: VariantContext, samples: List[String]): Boolean = {
    if (!samples.forall(record.getGenotype(_).isCalled)) false
Peter van 't Hof's avatar
Peter van 't Hof committed
228
229
230
    else true
  }

231
232
233
234
235
236
  /**
   * Checks if given genotypes for given samples are there
   * @param record VCF record
   * @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype)
   * @return false when filter fails
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
237
238
  def hasGenotype(record: VariantContext, samplesGenotypes: List[(String, GenotypeType)]): Boolean = {
    samplesGenotypes.forall(x => record.getGenotype(x._1).getType == x._2)
239
240
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
241
242
243
244
245
246
247
248
  /**
   * Checks if record has atleast minQualScore
   * @param record VCF record
   * @param minQualScore Minimal quality score
   * @return false when filters fail
   */
  def minQualscore(record: VariantContext, minQualScore: Double): Boolean = {
    record.getPhredScaledQual >= minQualScore
Peter van 't Hof's avatar
Peter van 't Hof committed
249
250
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
251
  /** returns true record contains Non reference genotypes */
Peter van 't Hof's avatar
Peter van 't Hof committed
252
253
  def hasNonRefCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isHomRef)
254
255
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
256
  /** returns true when record has calls */
Peter van 't Hof's avatar
Peter van 't Hof committed
257
258
  def hasCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isNoCall)
259
260
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
261
  /** returns true when DP INFO field is atleast the given value */
Peter van 't Hof's avatar
Peter van 't Hof committed
262
263
  def hasMinTotalDepth(record: VariantContext, minTotalDepth: Int): Boolean = {
    record.getAttributeAsInt("DP", -1) >= minTotalDepth
264
265
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
268
269
270
271
272
273
  /**
   * Checks if DP genotype field have a minimal value
   * @param record VCF record
   * @param minSampleDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def hasMinSampleDepth(record: VariantContext, minSampleDepth: Int, minSamplesPass: Int = 1): Boolean = {
274
275
    record.getGenotypes.count(genotype => {
      val DP = if (genotype.hasDP) genotype.getDP else -1
Peter van 't Hof's avatar
Peter van 't Hof committed
276
277
      DP >= minSampleDepth
    }) >= minSamplesPass
278
279
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
280
  /**
281
   * Checks if non-ref AD genotype field have a minimal value
Peter van 't Hof's avatar
Peter van 't Hof committed
282
283
284
285
286
287
   * @param record VCF record
   * @param minAlternateDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def minAlternateDepth(record: VariantContext, minAlternateDepth: Int, minSamplesPass: Int = 1): Boolean = {
288
289
    record.getGenotypes.count(genotype => {
      val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
290
      if (AD.nonEmpty) AD.tail.count(_ >= minAlternateDepth) > 0 else true
Peter van 't Hof's avatar
Peter van 't Hof committed
291
    }) >= minSamplesPass
292
293
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
294
295
296
297
298
299
  /**
   * Checks if given samples does have a variant hin this record
   * @param record VCF record
   * @param mustHaveVariant List of samples that should have this variant
   * @return true if filter passed
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
300
301
  def mustHaveVariant(record: VariantContext, mustHaveVariant: List[String]): Boolean = {
    !mustHaveVariant.map(record.getGenotype).exists(a => a.isHomRef || a.isNoCall)
302
303
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
304
  /** Checks if given samples have the same genotype */
Peter van 't Hof's avatar
Peter van 't Hof committed
305
306
307
308
309
  def notSameGenotype(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.sameGenotype(genotype2)) false
    else true
310
311
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
312
  /** Checks if sample1 is hetrozygous and if sample2 is homozygous for a alternative allele in sample1 */
Peter van 't Hof's avatar
Peter van 't Hof committed
313
314
315
316
317
318
  def filterHetVarToHomVar(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.isHet && !genotype1.getAlleles.forall(_.isNonReference)) {
      for (allele <- genotype1.getAlleles if allele.isNonReference) {
        if (genotype2.getAlleles.forall(_.basesMatch(allele))) return false
319
320
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
321
    true
322
323
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
324
  /** Checks if given sample have alternative alleles that are unique in the VCF record */
Peter van 't Hof's avatar
Peter van 't Hof committed
325
326
327
328
329
330
  def denovoInSample(record: VariantContext, sample: String): Boolean = {
    if (sample == null) return true
    val genotype = record.getGenotype(sample)
    if (genotype.isNoCall) return false
    for (allele <- genotype.getAlleles) {
      for (g <- record.getGenotypes if g.getSampleName != sample) {
331
332
333
        if (g.getAlleles.exists(_.basesMatch(allele))) return false
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
334
    true
335
  }
336

Peter van 't Hof's avatar
Peter van 't Hof committed
337
  /** Return true when variant is homozygous in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
338
339
340
341
342
343
344
345
346
  def resToDom(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHomVar && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) == 1 &&
          record.getGenotype(trio.mother).countAllele(allele) == 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
347
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
348
349
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
350
  /** Returns true when variant a compound variant in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
351
352
353
354
355
356
357
358
359
  def trioCompound(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHetNonRef && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) >= 1 &&
          record.getGenotype(trio.mother).countAllele(allele) >= 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
360
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
361
362
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
363
  /** Returns true when child got a deNovo variant */
Peter van 't Hof's avatar
Peter van 't Hof committed
364
365
366
367
368
369
370
371
372
373
374
  def denovoTrio(record: VariantContext, trios: List[Trio], onlyLossHet: Boolean = false): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)
      val father = record.getGenotype(trio.father)
      val mother = record.getGenotype(trio.mother)

      for (allele <- child.getAlleles) {
        val childCount = child.countAllele(allele)
        val fatherCount = father.countAllele(allele)
        val motherCount = mother.countAllele(allele)

Peter van 't Hof's avatar
Peter van 't Hof committed
375
376
377
378
379
380
381
382
        if (onlyLossHet) {
          if (childCount == 2 && (
            (fatherCount == 2 && motherCount == 0) ||
            (fatherCount == 0 && motherCount == 2))) return true
        } else {
          if (childCount == 1 && fatherCount == 0 && motherCount == 0) return true
          else if (childCount == 2 && (fatherCount == 0 || motherCount == 0)) return true
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
383
384
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
385
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
386
387
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
388
  /** Returns true when VCF record contains a ID from the given list */
Peter van 't Hof's avatar
Peter van 't Hof committed
389
390
  def inIdSet(record: VariantContext, idSet: Set[String]): Boolean = {
    record.getID.split(",").exists(idSet.contains)
391
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
392
}