VcfFilter.scala 17.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
17
18

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19

20
import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
22
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf.VCFFileReader
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.core.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26
import nl.lumc.sasc.biopet.core.{ ToolCommand, ToolCommandFuntion }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

Peter van 't Hof's avatar
Peter van 't Hof committed
27
import scala.collection.JavaConversions._
28
import scala.io.Source
Peter van 't Hof's avatar
Peter van 't Hof committed
29

30
class VcfFilter(val root: Configurable) extends ToolCommandFuntion {
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
33
34
  javaMainClass = getClass.getName

  @Input(doc = "Input vcf", shortName = "I", required = true)
  var inputVcf: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
35

Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
  @Output(doc = "Output vcf", shortName = "o", required = false)
  var outputVcf: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
38

Peter van 't Hof's avatar
Peter van 't Hof committed
39
40
41
42
  var minSampleDepth: Option[Int] = config("min_sample_depth")
  var minTotalDepth: Option[Int] = config("min_total_depth")
  var minAlternateDepth: Option[Int] = config("min_alternate_depth")
  var minSamplesPass: Option[Int] = config("min_samples_pass")
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var filterRefCalls: Boolean = config("filter_ref_calls", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
44

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  override def defaultCoreMemory = 3.0
Peter van 't Hof's avatar
Peter van 't Hof committed
46
47
48

  override def commandLine = super.commandLine +
    required("-I", inputVcf) +
Peter van 't Hof's avatar
Peter van 't Hof committed
49
    required("-o", outputVcf) +
Peter van 't Hof's avatar
Peter van 't Hof committed
50
51
    optional("--minSampleDepth", minSampleDepth) +
    optional("--minTotalDepth", minTotalDepth) +
Peter van 't Hof's avatar
Peter van 't Hof committed
52
    optional("--minAlternateDepth", minAlternateDepth) +
Peter van 't Hof's avatar
Peter van 't Hof committed
53
54
    optional("--minSamplesPass", minSamplesPass) +
    conditional(filterRefCalls, "--filterRefCalls")
Peter van 't Hof's avatar
Peter van 't Hof committed
55
56
}

Peter van 't Hof's avatar
Peter van 't Hof committed
57
object VcfFilter extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
  /** Container class for a trio */
  protected case class Trio(child: String, father: String, mother: String) {
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
62
63
64
    def this(arg: String) = {
      this(arg.split(":")(0), arg.split(":")(1), arg.split(":")(2))
    }
  }

65
66
  case class Args(inputVcf: File = null,
                  outputVcf: File = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
67
                  invertedOutputVcf: Option[File] = None,
68
                  minQualScore: Option[Double] = None,
69
70
71
                  minSampleDepth: Int = -1,
                  minTotalDepth: Int = -1,
                  minAlternateDepth: Int = -1,
Peter van 't Hof's avatar
Peter van 't Hof committed
72
                  minSamplesPass: Int = 1,
73
                  mustHaveVariant: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
74
                  calledIn: List[String] = Nil,
75
                  mustHaveGenotype: List[String] = Nil,
76
                  deNovoInSample: String = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
77
                  resToDom: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
78
                  trioCompound: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
                  deNovoTrio: List[Trio] = Nil,
                  trioLossOfHet: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
81
                  diffGenotype: List[(String, String)] = Nil,
82
                  filterHetVarToHomVar: List[(String, String)] = Nil,
83
                  filterRefCalls: Boolean = false,
84
85
                  filterNoCalls: Boolean = false,
                  iDset: Set[String] = Set()) extends AbstractArgs
Peter van 't Hof's avatar
Peter van 't Hof committed
86
87

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
88
    opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
89
      c.copy(inputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
    } text "Input vcf file"
    opt[File]('o', "outputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
92
      c.copy(outputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
    } text "Output vcf file"
    opt[File]("invertedOutputVcf") maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
95
      c.copy(invertedOutputVcf = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
    } text "inverted output vcf file"
    opt[Int]("minSampleDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
98
      c.copy(minSampleDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
99
100
    } text "Min value for DP in genotype fields"
    opt[Int]("minTotalDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
101
      c.copy(minTotalDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
    } text "Min value of DP field in INFO fields"
    opt[Int]("minAlternateDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
104
      c.copy(minAlternateDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
    } text "Min value of AD field in genotype fields"
    opt[Int]("minSamplesPass") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
107
      c.copy(minSamplesPass = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
    } text "Min number off samples to pass --minAlternateDepth, --minBamAlternateDepth and --minSampleDepth"
    opt[String]("resToDom") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
110
      c.copy(resToDom = new Trio(x) :: c.resToDom)
Peter van 't Hof's avatar
Peter van 't Hof committed
111
112
    } text "Only shows variants where child is homozygous and both parants hetrozygous"
    opt[String]("trioCompound") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
113
      c.copy(trioCompound = new Trio(x) :: c.trioCompound)
Peter van 't Hof's avatar
Peter van 't Hof committed
114
115
    } text "Only shows variants where child is a compound variant combined from both parants"
    opt[String]("deNovoInSample") maxOccurs 1 unbounded () valueName "<sample>" action { (x, c) =>
116
      c.copy(deNovoInSample = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
    } text "Only show variants that contain unique alleles in complete set for given sample"
    opt[String]("deNovoTrio") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
119
      c.copy(deNovoTrio = new Trio(x) :: c.deNovoTrio)
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
    } text "Only show variants that are denovo in the trio"
    opt[String]("trioLossOfHet") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
122
      c.copy(trioLossOfHet = new Trio(x) :: c.trioLossOfHet)
Peter van 't Hof's avatar
Peter van 't Hof committed
123
124
    } text "Only show variants where a loss of hetrozygosity is detected"
    opt[String]("mustHaveVariant") unbounded () valueName "<sample>" action { (x, c) =>
125
      c.copy(mustHaveVariant = x :: c.mustHaveVariant)
Peter van 't Hof's avatar
Peter van 't Hof committed
126
127
    } text "Given sample must have 1 alternative allele"
    opt[String]("calledIn") unbounded () valueName "<sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
128
      c.copy(calledIn = x :: c.calledIn)
Peter van 't Hof's avatar
Peter van 't Hof committed
129
    } text "Must be called in this sample"
130
131
132
133
    opt[String]("mustHaveGenotype") unbounded () valueName "<sample:genotype>" action { (x, c) =>
      c.copy(mustHaveGenotype = x :: c.mustHaveGenotype)
    } validate { x => if (x.split(":").length == 2 && Set("HET", "HOM_REF", "HOM_VAR", "MIXED", "NO_CALL", "UNAVAILABLE").contains(x.split(":")(1))) success else failure("--mustHaveGenotype should be in this format: sample:genotype")
    } text "Must have genotoype <genotype> for this sample. Genotype can be HET, HOM_REF, HOM_VAR, MIXED, NO_CALL and UNAVAILABLE"
Peter van 't Hof's avatar
Peter van 't Hof committed
134
    opt[String]("diffGenotype") unbounded () valueName "<sample:sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
135
136
      c.copy(diffGenotype = (x.split(":")(0), x.split(":")(1)) :: c.diffGenotype)
    } validate { x => if (x.split(":").length == 2) success else failure("--notSameGenotype should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
137
138
    } text "Given samples must have a different genotype"
    opt[String]("filterHetVarToHomVar") unbounded () valueName "<sample:sample>" action { (x, c) =>
139
      c.copy(filterHetVarToHomVar = (x.split(":")(0), x.split(":")(1)) :: c.filterHetVarToHomVar)
Peter van 't Hof's avatar
Peter van 't Hof committed
140
    } validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
141
    } text "If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filtered"
Peter van 't Hof's avatar
Peter van 't Hof committed
142
143
    opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
      c.copy(filterRefCalls = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
144
    } text "Filter when there are only ref calls"
145
146
    opt[Unit]("filterNoCalls") unbounded () action { (x, c) =>
      c.copy(filterNoCalls = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
147
    } text "Filter when there are only no calls"
148
149
    opt[Double]("minQualScore") unbounded () action { (x, c) =>
      c.copy(minQualScore = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
150
    } text "Min qual score"
151
152
    opt[String]("id") unbounded () action { (x, c) =>
      c.copy(iDset = c.iDset + x)
Peter van 't Hof's avatar
Peter van 't Hof committed
153
    } text "Id that may pass the filter"
154
    opt[File]("idFile") unbounded () action { (x, c) =>
155
      c.copy(iDset = c.iDset ++ Source.fromFile(x).getLines())
Peter van 't Hof's avatar
Peter van 't Hof committed
156
    } text "File that contain list of IDs to get from vcf file"
Peter van 't Hof's avatar
Peter van 't Hof committed
157
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
158

Peter van 't Hof's avatar
Peter van 't Hof committed
159
  /** @param args the command line arguments */
Peter van 't Hof's avatar
Peter van 't Hof committed
160
  def main(args: Array[String]): Unit = {
161
    logger.info("Start")
Peter van 't Hof's avatar
Peter van 't Hof committed
162
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
163
    val cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)
Peter van 't Hof's avatar
Peter van 't Hof committed
164

Peter van 't Hof's avatar
Peter van 't Hof committed
165
    val reader = new VCFFileReader(cmdArgs.inputVcf, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
166
    val header = reader.getFileHeader
167
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().
Peter van 't Hof's avatar
Peter van 't Hof committed
168
      setOutputFile(cmdArgs.outputVcf).
169
170
      setReferenceDictionary(header.getSequenceDictionary).
      build)
Peter van 't Hof's avatar
Peter van 't Hof committed
171
    writer.writeHeader(header)
Peter van 't Hof's avatar
Peter van 't Hof committed
172

Peter van 't Hof's avatar
Peter van 't Hof committed
173
    val invertedWriter = cmdArgs.invertedOutputVcf.collect {
Sander Bollen's avatar
Sander Bollen committed
174
175
176
177
178
      case x => new VariantContextWriterBuilder().
        setOutputFile(x).
        setReferenceDictionary(header.getSequenceDictionary).
        build
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
179
    invertedWriter.foreach(_.writeHeader(header))
Peter van 't Hof's avatar
Peter van 't Hof committed
180

181
182
    var counterTotal = 0
    var counterLeft = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
183
    for (record <- reader) {
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
186
187
188
189
190
      if (cmdArgs.minQualScore.map(minQualscore(record, _)).getOrElse(true) &&
        (!cmdArgs.filterRefCalls || hasNonRefCalls(record)) &&
        (!cmdArgs.filterNoCalls || hasCalls(record)) &&
        hasMinTotalDepth(record, cmdArgs.minTotalDepth) &&
        hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) &&
        minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) &&
        (cmdArgs.mustHaveVariant.isEmpty || mustHaveVariant(record, cmdArgs.mustHaveVariant)) &&
191
        calledIn(record, cmdArgs.calledIn) && hasGenotype(record, cmdArgs.mustHaveGenotype) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
192
193
194
195
196
197
198
199
200
201
202
        (cmdArgs.diffGenotype.isEmpty || cmdArgs.diffGenotype.forall(x => notSameGenotype(record, x._1, x._2))) &&
        (
          cmdArgs.filterHetVarToHomVar.isEmpty ||
          cmdArgs.filterHetVarToHomVar.forall(x => filterHetVarToHomVar(record, x._1, x._2))
        ) &&
          denovoInSample(record, cmdArgs.deNovoInSample) &&
          denovoTrio(record, cmdArgs.deNovoTrio) &&
          denovoTrio(record, cmdArgs.trioLossOfHet, onlyLossHet = true) &&
          resToDom(record, cmdArgs.resToDom) &&
          trioCompound(record, cmdArgs.trioCompound) &&
          (cmdArgs.iDset.isEmpty || inIdSet(record, cmdArgs.iDset))) {
Peter van 't Hof's avatar
Peter van 't Hof committed
203
        writer.add(record)
204
        counterLeft += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
      } else
        invertedWriter.foreach(_.add(record))
207
208
      counterTotal += 1
      if (counterTotal % 100000 == 0) logger.info(counterTotal + " variants processed, " + counterLeft + " left")
Peter van 't Hof's avatar
Peter van 't Hof committed
209
    }
210
    logger.info(counterTotal + " variants processed, " + counterLeft + " left")
Peter van 't Hof's avatar
Peter van 't Hof committed
211
212
    reader.close()
    writer.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
213
    invertedWriter.foreach(_.close())
214
    logger.info("Done")
Peter van 't Hof's avatar
Peter van 't Hof committed
215
  }
216

Peter van 't Hof's avatar
Peter van 't Hof committed
217
218
219
220
221
222
223
224
  /**
   * Checks if given samples are called
   * @param record VCF record
   * @param samples Samples that need this sample to be called
   * @return false when filters fail
   */
  def calledIn(record: VariantContext, samples: List[String]): Boolean = {
    if (!samples.forall(record.getGenotype(_).isCalled)) false
Peter van 't Hof's avatar
Peter van 't Hof committed
225
226
227
    else true
  }

228
229
230
231
232
233
234
235
236
237
238
  /**
   * Checks if given genotypes for given samples are there
   * @param record VCF record
   * @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype)
   * @return false when filter fails
   */
  def hasGenotype(record: VariantContext, samplesGenotypes: List[String]): Boolean = {
    if (!samplesGenotypes.forall(x => record.getGenotype(x.split(":")(0)).getType == GenotypeType.valueOf(x.split(":")(1)))) false
    else true
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
239
240
241
242
243
244
245
246
  /**
   * Checks if record has atleast minQualScore
   * @param record VCF record
   * @param minQualScore Minimal quality score
   * @return false when filters fail
   */
  def minQualscore(record: VariantContext, minQualScore: Double): Boolean = {
    record.getPhredScaledQual >= minQualScore
Peter van 't Hof's avatar
Peter van 't Hof committed
247
248
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
249
  /** returns true record contains Non reference genotypes */
Peter van 't Hof's avatar
Peter van 't Hof committed
250
251
  def hasNonRefCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isHomRef)
252
253
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
254
  /** returns true when record has calls */
Peter van 't Hof's avatar
Peter van 't Hof committed
255
256
  def hasCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isNoCall)
257
258
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
259
  /** returns true when DP INFO field is atleast the given value */
Peter van 't Hof's avatar
Peter van 't Hof committed
260
261
  def hasMinTotalDepth(record: VariantContext, minTotalDepth: Int): Boolean = {
    record.getAttributeAsInt("DP", -1) >= minTotalDepth
262
263
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
264
265
266
267
268
269
270
271
  /**
   * Checks if DP genotype field have a minimal value
   * @param record VCF record
   * @param minSampleDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def hasMinSampleDepth(record: VariantContext, minSampleDepth: Int, minSamplesPass: Int = 1): Boolean = {
272
273
    record.getGenotypes.count(genotype => {
      val DP = if (genotype.hasDP) genotype.getDP else -1
Peter van 't Hof's avatar
Peter van 't Hof committed
274
275
      DP >= minSampleDepth
    }) >= minSamplesPass
276
277
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
278
279
280
281
282
283
284
285
  /**
   * Checks if AD genotype field have a minimal value
   * @param record VCF record
   * @param minAlternateDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def minAlternateDepth(record: VariantContext, minAlternateDepth: Int, minSamplesPass: Int = 1): Boolean = {
286
287
    record.getGenotypes.count(genotype => {
      val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
288
      if (AD.nonEmpty) AD.tail.count(_ >= minAlternateDepth) > 0 else true
Peter van 't Hof's avatar
Peter van 't Hof committed
289
    }) >= minSamplesPass
290
291
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
292
293
294
295
296
297
  /**
   * Checks if given samples does have a variant hin this record
   * @param record VCF record
   * @param mustHaveVariant List of samples that should have this variant
   * @return true if filter passed
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
298
299
  def mustHaveVariant(record: VariantContext, mustHaveVariant: List[String]): Boolean = {
    !mustHaveVariant.map(record.getGenotype).exists(a => a.isHomRef || a.isNoCall)
300
301
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
302
  /** Checks if given samples have the same genotype */
Peter van 't Hof's avatar
Peter van 't Hof committed
303
304
305
306
307
  def notSameGenotype(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.sameGenotype(genotype2)) false
    else true
308
309
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
310
  /** Checks if sample1 is hetrozygous and if sample2 is homozygous for a alternative allele in sample1 */
Peter van 't Hof's avatar
Peter van 't Hof committed
311
312
313
314
315
316
  def filterHetVarToHomVar(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.isHet && !genotype1.getAlleles.forall(_.isNonReference)) {
      for (allele <- genotype1.getAlleles if allele.isNonReference) {
        if (genotype2.getAlleles.forall(_.basesMatch(allele))) return false
317
318
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
319
    true
320
321
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
322
  /** Checks if given sample have alternative alleles that are unique in the VCF record */
Peter van 't Hof's avatar
Peter van 't Hof committed
323
324
325
326
327
328
  def denovoInSample(record: VariantContext, sample: String): Boolean = {
    if (sample == null) return true
    val genotype = record.getGenotype(sample)
    if (genotype.isNoCall) return false
    for (allele <- genotype.getAlleles) {
      for (g <- record.getGenotypes if g.getSampleName != sample) {
329
330
331
        if (g.getAlleles.exists(_.basesMatch(allele))) return false
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
332
    true
333
  }
334

Peter van 't Hof's avatar
Peter van 't Hof committed
335
  /** Return true when variant is homozygous in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
336
337
338
339
340
341
342
343
344
  def resToDom(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHomVar && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) == 1 &&
          record.getGenotype(trio.mother).countAllele(allele) == 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
345
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
346
347
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
348
  /** Returns true when variant a compound variant in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
349
350
351
352
353
354
355
356
357
  def trioCompound(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHetNonRef && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) >= 1 &&
          record.getGenotype(trio.mother).countAllele(allele) >= 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
358
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
359
360
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
361
  /** Returns true when child got a deNovo variant */
Peter van 't Hof's avatar
Peter van 't Hof committed
362
363
364
365
366
367
368
369
370
371
372
  def denovoTrio(record: VariantContext, trios: List[Trio], onlyLossHet: Boolean = false): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)
      val father = record.getGenotype(trio.father)
      val mother = record.getGenotype(trio.mother)

      for (allele <- child.getAlleles) {
        val childCount = child.countAllele(allele)
        val fatherCount = father.countAllele(allele)
        val motherCount = mother.countAllele(allele)

Peter van 't Hof's avatar
Peter van 't Hof committed
373
374
375
376
377
378
379
380
        if (onlyLossHet) {
          if (childCount == 2 && (
            (fatherCount == 2 && motherCount == 0) ||
            (fatherCount == 0 && motherCount == 2))) return true
        } else {
          if (childCount == 1 && fatherCount == 0 && motherCount == 0) return true
          else if (childCount == 2 && (fatherCount == 0 || motherCount == 0)) return true
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
381
382
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
383
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
384
385
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
386
  /** Returns true when VCF record contains a ID from the given list */
Peter van 't Hof's avatar
Peter van 't Hof committed
387
388
  def inIdSet(record: VariantContext, idSet: Set[String]): Boolean = {
    record.getID.split(",").exists(idSet.contains)
389
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
390
}