VcfFilter.scala 18.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
17
18

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19

20
import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
22
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf.VCFFileReader
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.utils.ToolCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
24

Peter van 't Hof's avatar
Peter van 't Hof committed
25
import scala.collection.JavaConversions._
26
import scala.io.Source
Peter van 't Hof's avatar
Peter van 't Hof committed
27

Peter van 't Hof's avatar
Peter van 't Hof committed
28
object VcfFilter extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
29
  /** Container class for a trio */
Sander Bollen's avatar
Sander Bollen committed
30
  protected[tools] case class Trio(child: String, father: String, mother: String) {
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
33
34
35
    def this(arg: String) = {
      this(arg.split(":")(0), arg.split(":")(1), arg.split(":")(2))
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
38
39
40
  case class BooleanArgs(uniqueOnly: Boolean = false,
                         sharedOnly: Boolean = false,
                         filterRefCalls: Boolean = false,
                         filterNoCalls: Boolean = false)

41
42
  case class Args(inputVcf: File = null,
                  outputVcf: File = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
43
                  invertedOutputVcf: Option[File] = None,
44
                  minQualScore: Option[Double] = None,
45
46
47
                  minSampleDepth: Int = -1,
                  minTotalDepth: Int = -1,
                  minAlternateDepth: Int = -1,
Peter van 't Hof's avatar
Peter van 't Hof committed
48
                  minSamplesPass: Int = 1,
49
                  mustHaveVariant: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
50
                  calledIn: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
51
                  mustHaveGenotype: List[(String, GenotypeType)] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
52
                  uniqueVariantInSample: String = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
53
                  resToDom: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
54
                  trioCompound: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
55
56
                  deNovoTrio: List[Trio] = Nil,
                  trioLossOfHet: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
57
                  booleanArgs: BooleanArgs = BooleanArgs(),
Peter van 't Hof's avatar
Peter van 't Hof committed
58
                  diffGenotype: List[(String, String)] = Nil,
59
                  filterHetVarToHomVar: List[(String, String)] = Nil,
60
                  iDset: Set[String] = Set(),
Peter van 't Hof's avatar
Peter van 't Hof committed
61
62
                  minGenomeQuality: Int = 0) extends AbstractArgs {
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
63
64

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
65
    opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
66
      c.copy(inputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
    } text "Input vcf file"
    opt[File]('o', "outputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
69
      c.copy(outputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
    } text "Output vcf file"
    opt[File]("invertedOutputVcf") maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
72
      c.copy(invertedOutputVcf = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
73
74
    } text "inverted output vcf file"
    opt[Int]("minSampleDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
75
      c.copy(minSampleDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
    } text "Min value for DP in genotype fields"
    opt[Int]("minTotalDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
78
      c.copy(minTotalDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
    } text "Min value of DP field in INFO fields"
    opt[Int]("minAlternateDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
81
      c.copy(minAlternateDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
    } text "Min value of AD field in genotype fields"
    opt[Int]("minSamplesPass") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
84
      c.copy(minSamplesPass = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
    } text "Min number off samples to pass --minAlternateDepth, --minBamAlternateDepth and --minSampleDepth"
    opt[String]("resToDom") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
87
      c.copy(resToDom = new Trio(x) :: c.resToDom)
Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
    } text "Only shows variants where child is homozygous and both parants hetrozygous"
    opt[String]("trioCompound") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
90
      c.copy(trioCompound = new Trio(x) :: c.trioCompound)
Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
    } text "Only shows variants where child is a compound variant combined from both parants"
    opt[String]("deNovoInSample") maxOccurs 1 unbounded () valueName "<sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
93
      c.copy(uniqueVariantInSample = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
    } text "Only show variants that contain unique alleles in complete set for given sample"
    opt[String]("deNovoTrio") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
96
      c.copy(deNovoTrio = new Trio(x) :: c.deNovoTrio)
Peter van 't Hof's avatar
Peter van 't Hof committed
97
98
    } text "Only show variants that are denovo in the trio"
    opt[String]("trioLossOfHet") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
99
      c.copy(trioLossOfHet = new Trio(x) :: c.trioLossOfHet)
Peter van 't Hof's avatar
Peter van 't Hof committed
100
101
    } text "Only show variants where a loss of hetrozygosity is detected"
    opt[String]("mustHaveVariant") unbounded () valueName "<sample>" action { (x, c) =>
102
      c.copy(mustHaveVariant = x :: c.mustHaveVariant)
Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
    } text "Given sample must have 1 alternative allele"
    opt[String]("calledIn") unbounded () valueName "<sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
105
      c.copy(calledIn = x :: c.calledIn)
Peter van 't Hof's avatar
Peter van 't Hof committed
106
    } text "Must be called in this sample"
107
    opt[String]("mustHaveGenotype") unbounded () valueName "<sample:genotype>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
      c.copy(mustHaveGenotype = (x.split(":")(0), GenotypeType.valueOf(x.split(":")(1))) :: c.mustHaveGenotype)
    } validate { x =>
Sander Bollen's avatar
Sander Bollen committed
110
      if (x.split(":").length == 2 && GenotypeType.values().map(_.toString).contains(x.split(":")(1))) success
Peter van 't Hof's avatar
Peter van 't Hof committed
111
112
      else failure("--mustHaveGenotype should be in this format: sample:genotype")
    } text "Must have genotoype <genotype> for this sample. Genotype can be " + GenotypeType.values().mkString(", ")
Peter van 't Hof's avatar
Peter van 't Hof committed
113
    opt[String]("diffGenotype") unbounded () valueName "<sample:sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
114
115
      c.copy(diffGenotype = (x.split(":")(0), x.split(":")(1)) :: c.diffGenotype)
    } validate { x => if (x.split(":").length == 2) success else failure("--notSameGenotype should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
116
117
    } text "Given samples must have a different genotype"
    opt[String]("filterHetVarToHomVar") unbounded () valueName "<sample:sample>" action { (x, c) =>
118
      c.copy(filterHetVarToHomVar = (x.split(":")(0), x.split(":")(1)) :: c.filterHetVarToHomVar)
Peter van 't Hof's avatar
Peter van 't Hof committed
119
    } validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
120
    } text "If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filtered"
Peter van 't Hof's avatar
Peter van 't Hof committed
121
    opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
122
      c.copy(booleanArgs = c.booleanArgs.copy(filterRefCalls = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
123
    } text "Filter when there are only ref calls"
124
    opt[Unit]("filterNoCalls") unbounded () action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
125
      c.copy(booleanArgs = c.booleanArgs.copy(filterNoCalls = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
126
    } text "Filter when there are only no calls"
Peter van 't Hof's avatar
Peter van 't Hof committed
127
    opt[Unit]("uniqueOnly") unbounded () action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
128
      c.copy(booleanArgs = c.booleanArgs.copy(uniqueOnly = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
129
130
    } text "Filter when there more then 1 sample have this variant"
    opt[Unit]("sharedOnly") unbounded () action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
131
      c.copy(booleanArgs = c.booleanArgs.copy(sharedOnly = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
132
    } text "Filter when not all samples have this variant"
133
134
    opt[Double]("minQualScore") unbounded () action { (x, c) =>
      c.copy(minQualScore = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
135
    } text "Min qual score"
136
137
    opt[String]("id") unbounded () action { (x, c) =>
      c.copy(iDset = c.iDset + x)
Peter van 't Hof's avatar
Peter van 't Hof committed
138
    } text "Id that may pass the filter"
139
    opt[File]("idFile") unbounded () action { (x, c) =>
140
      c.copy(iDset = c.iDset ++ Source.fromFile(x).getLines())
Peter van 't Hof's avatar
Peter van 't Hof committed
141
    } text "File that contain list of IDs to get from vcf file"
Sander Bollen's avatar
Sander Bollen committed
142
143
144
    opt[Int]("minGenomeQuality") unbounded () action { (x, c) =>
      c.copy(minGenomeQuality = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
145
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
146

Peter van 't Hof's avatar
Peter van 't Hof committed
147
  /** @param args the command line arguments */
Peter van 't Hof's avatar
Peter van 't Hof committed
148
  def main(args: Array[String]): Unit = {
149
    logger.info("Start")
Peter van 't Hof's avatar
Peter van 't Hof committed
150
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
151
    val cmdArgs = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
Peter van 't Hof's avatar
Peter van 't Hof committed
152

Peter van 't Hof's avatar
Peter van 't Hof committed
153
    val reader = new VCFFileReader(cmdArgs.inputVcf, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
154
    val header = reader.getFileHeader
155
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().
Peter van 't Hof's avatar
Peter van 't Hof committed
156
      setOutputFile(cmdArgs.outputVcf).
157
158
      setReferenceDictionary(header.getSequenceDictionary).
      build)
Peter van 't Hof's avatar
Peter van 't Hof committed
159
    writer.writeHeader(header)
Peter van 't Hof's avatar
Peter van 't Hof committed
160

Peter van 't Hof's avatar
Peter van 't Hof committed
161
    val invertedWriter = cmdArgs.invertedOutputVcf.collect {
Sander Bollen's avatar
Sander Bollen committed
162
163
164
165
166
      case x => new VariantContextWriterBuilder().
        setOutputFile(x).
        setReferenceDictionary(header.getSequenceDictionary).
        build
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
167
    invertedWriter.foreach(_.writeHeader(header))
Peter van 't Hof's avatar
Peter van 't Hof committed
168

169
170
    var counterTotal = 0
    var counterLeft = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
171
    for (record <- reader) {
Peter van 't Hof's avatar
Peter van 't Hof committed
172
      if (cmdArgs.minQualScore.map(minQualscore(record, _)).getOrElse(true) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
173
174
175
176
        (!cmdArgs.booleanArgs.filterRefCalls || hasNonRefCalls(record)) &&
        (!cmdArgs.booleanArgs.filterNoCalls || hasCalls(record)) &&
        (!cmdArgs.booleanArgs.uniqueOnly || hasUniqeSample(record)) &&
        (!cmdArgs.booleanArgs.sharedOnly || allSamplesVariant(record)) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
177
178
179
        hasMinTotalDepth(record, cmdArgs.minTotalDepth) &&
        hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) &&
        minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) &&
180
        minGenomeQuality(record, cmdArgs.minGenomeQuality, cmdArgs.minSamplesPass) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
181
        (cmdArgs.mustHaveVariant.isEmpty || mustHaveVariant(record, cmdArgs.mustHaveVariant)) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
182
183
        calledIn(record, cmdArgs.calledIn) &&
        hasGenotype(record, cmdArgs.mustHaveGenotype) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
186
187
188
        (cmdArgs.diffGenotype.isEmpty || cmdArgs.diffGenotype.forall(x => notSameGenotype(record, x._1, x._2))) &&
        (
          cmdArgs.filterHetVarToHomVar.isEmpty ||
          cmdArgs.filterHetVarToHomVar.forall(x => filterHetVarToHomVar(record, x._1, x._2))
        ) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
189
          uniqueVariantInSample(record, cmdArgs.uniqueVariantInSample) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
192
193
194
          denovoTrio(record, cmdArgs.deNovoTrio) &&
          denovoTrio(record, cmdArgs.trioLossOfHet, onlyLossHet = true) &&
          resToDom(record, cmdArgs.resToDom) &&
          trioCompound(record, cmdArgs.trioCompound) &&
          (cmdArgs.iDset.isEmpty || inIdSet(record, cmdArgs.iDset))) {
Peter van 't Hof's avatar
Peter van 't Hof committed
195
        writer.add(record)
196
        counterLeft += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
197
198
      } else
        invertedWriter.foreach(_.add(record))
199
      counterTotal += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
200
      if (counterTotal % 100000 == 0) logger.info(s"$counterTotal variants processed, $counterLeft passed filter")
Peter van 't Hof's avatar
Peter van 't Hof committed
201
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
202
    logger.info(s"$counterTotal variants processed, $counterLeft passed filter")
Peter van 't Hof's avatar
Peter van 't Hof committed
203
204
    reader.close()
    writer.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
205
    invertedWriter.foreach(_.close())
206
    logger.info("Done")
Peter van 't Hof's avatar
Peter van 't Hof committed
207
  }
208

Peter van 't Hof's avatar
Peter van 't Hof committed
209
210
  /**
   * Checks if given samples are called
Peter van 't Hof's avatar
Peter van 't Hof committed
211
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
212
213
214
215
216
217
   * @param record VCF record
   * @param samples Samples that need this sample to be called
   * @return false when filters fail
   */
  def calledIn(record: VariantContext, samples: List[String]): Boolean = {
    if (!samples.forall(record.getGenotype(_).isCalled)) false
Peter van 't Hof's avatar
Peter van 't Hof committed
218
219
220
    else true
  }

221
222
  /**
   * Checks if given genotypes for given samples are there
Peter van 't Hof's avatar
Peter van 't Hof committed
223
   *
224
225
226
227
   * @param record VCF record
   * @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype)
   * @return false when filter fails
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
228
229
  def hasGenotype(record: VariantContext, samplesGenotypes: List[(String, GenotypeType)]): Boolean = {
    samplesGenotypes.forall(x => record.getGenotype(x._1).getType == x._2)
230
231
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
232
233
  /**
   * Checks if record has atleast minQualScore
Peter van 't Hof's avatar
Peter van 't Hof committed
234
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
235
236
237
238
239
240
   * @param record VCF record
   * @param minQualScore Minimal quality score
   * @return false when filters fail
   */
  def minQualscore(record: VariantContext, minQualScore: Double): Boolean = {
    record.getPhredScaledQual >= minQualScore
Peter van 't Hof's avatar
Peter van 't Hof committed
241
242
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
243
  /** returns true record contains Non reference genotypes */
Peter van 't Hof's avatar
Peter van 't Hof committed
244
245
  def hasNonRefCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isHomRef)
246
247
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
248
  /** returns true when record has calls */
Peter van 't Hof's avatar
Peter van 't Hof committed
249
250
  def hasCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isNoCall)
251
252
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
253
254
  /** Checks if there is a variant in only 1 sample */
  def hasUniqeSample(record: VariantContext): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
255
    record.getSampleNames.exists(uniqueVariantInSample(record, _))
Peter van 't Hof's avatar
Peter van 't Hof committed
256
257
258
259
  }

  /** Checks if all samples are a variant */
  def allSamplesVariant(record: VariantContext): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
260
    record.getGenotypes.forall(g=> !g.isNonInformative && g.getAlleles.exists(a => a.isNonReference && !a.isNoCall))
Peter van 't Hof's avatar
Peter van 't Hof committed
261
262
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
263
  /** returns true when DP INFO field is atleast the given value */
Peter van 't Hof's avatar
Peter van 't Hof committed
264
265
  def hasMinTotalDepth(record: VariantContext, minTotalDepth: Int): Boolean = {
    record.getAttributeAsInt("DP", -1) >= minTotalDepth
266
267
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
268
269
  /**
   * Checks if DP genotype field have a minimal value
Peter van 't Hof's avatar
Peter van 't Hof committed
270
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
271
272
273
274
275
276
   * @param record VCF record
   * @param minSampleDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def hasMinSampleDepth(record: VariantContext, minSampleDepth: Int, minSamplesPass: Int = 1): Boolean = {
277
278
    record.getGenotypes.count(genotype => {
      val DP = if (genotype.hasDP) genotype.getDP else -1
Peter van 't Hof's avatar
Peter van 't Hof committed
279
280
      DP >= minSampleDepth
    }) >= minSamplesPass
281
282
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
283
  /**
284
   * Checks if non-ref AD genotype field have a minimal value
Peter van 't Hof's avatar
Peter van 't Hof committed
285
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
286
287
288
289
290
291
   * @param record VCF record
   * @param minAlternateDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def minAlternateDepth(record: VariantContext, minAlternateDepth: Int, minSamplesPass: Int = 1): Boolean = {
292
293
    record.getGenotypes.count(genotype => {
      val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
294
      if (AD.nonEmpty && minAlternateDepth >= 0) AD.tail.count(_ >= minAlternateDepth) > 0 else true
Peter van 't Hof's avatar
Peter van 't Hof committed
295
    }) >= minSamplesPass
296
297
  }

298
299
  /**
   * Checks if genome quality field has minimum value
Peter van 't Hof's avatar
Peter van 't Hof committed
300
   *
301
302
303
304
305
306
   * @param record VCF record
   * @param minGQ smallest GQ allowed
   * @param minSamplesPass number of samples to consider
   * @return
   */
  def minGenomeQuality(record: VariantContext, minGQ: Int, minSamplesPass: Int = 1): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
307
308
309
310
    record.getGenotypes.count(x =>
      if (minGQ == 0) true
      else if (!x.hasGQ) false
      else if (x.getGQ >= minGQ) true else false) >= minSamplesPass
311
312
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
313
314
  /**
   * Checks if given samples does have a variant hin this record
Peter van 't Hof's avatar
Peter van 't Hof committed
315
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
316
317
318
319
   * @param record VCF record
   * @param mustHaveVariant List of samples that should have this variant
   * @return true if filter passed
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
320
321
  def mustHaveVariant(record: VariantContext, mustHaveVariant: List[String]): Boolean = {
    !mustHaveVariant.map(record.getGenotype).exists(a => a.isHomRef || a.isNoCall)
322
323
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
324
  /** Checks if given samples have the same genotype */
Peter van 't Hof's avatar
Peter van 't Hof committed
325
326
327
328
329
  def notSameGenotype(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.sameGenotype(genotype2)) false
    else true
330
331
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
332
  /** Checks if sample1 is hetrozygous and if sample2 is homozygous for a alternative allele in sample1 */
Peter van 't Hof's avatar
Peter van 't Hof committed
333
334
335
336
337
338
  def filterHetVarToHomVar(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.isHet && !genotype1.getAlleles.forall(_.isNonReference)) {
      for (allele <- genotype1.getAlleles if allele.isNonReference) {
        if (genotype2.getAlleles.forall(_.basesMatch(allele))) return false
339
340
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
341
    true
342
343
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
344
  /** Checks if given sample have alternative alleles that are unique in the VCF record */
Peter van 't Hof's avatar
Peter van 't Hof committed
345
  def uniqueVariantInSample(record: VariantContext, sample: String): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
346
347
348
    if (sample == null) return true
    val genotype = record.getGenotype(sample)
    if (genotype.isNoCall) return false
Peter van 't Hof's avatar
Peter van 't Hof committed
349
    for (allele <- genotype.getAlleles if allele.isNonReference) {
Peter van 't Hof's avatar
Peter van 't Hof committed
350
      for (g <- record.getGenotypes if g.getSampleName != sample) {
351
352
353
        if (g.getAlleles.exists(_.basesMatch(allele))) return false
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
354
    true
355
  }
356

Peter van 't Hof's avatar
Peter van 't Hof committed
357
  /** Return true when variant is homozygous in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
358
359
360
361
362
363
364
365
366
  def resToDom(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHomVar && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) == 1 &&
          record.getGenotype(trio.mother).countAllele(allele) == 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
367
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
368
369
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
370
  /** Returns true when variant a compound variant in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
371
372
373
374
375
376
377
378
379
  def trioCompound(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHetNonRef && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) >= 1 &&
          record.getGenotype(trio.mother).countAllele(allele) >= 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
380
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
381
382
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
383
  /** Returns true when child got a deNovo variant */
Peter van 't Hof's avatar
Peter van 't Hof committed
384
385
386
387
388
389
390
391
392
393
394
  def denovoTrio(record: VariantContext, trios: List[Trio], onlyLossHet: Boolean = false): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)
      val father = record.getGenotype(trio.father)
      val mother = record.getGenotype(trio.mother)

      for (allele <- child.getAlleles) {
        val childCount = child.countAllele(allele)
        val fatherCount = father.countAllele(allele)
        val motherCount = mother.countAllele(allele)

Peter van 't Hof's avatar
Peter van 't Hof committed
395
396
397
398
399
400
401
402
        if (onlyLossHet) {
          if (childCount == 2 && (
            (fatherCount == 2 && motherCount == 0) ||
            (fatherCount == 0 && motherCount == 2))) return true
        } else {
          if (childCount == 1 && fatherCount == 0 && motherCount == 0) return true
          else if (childCount == 2 && (fatherCount == 0 || motherCount == 0)) return true
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
403
404
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
405
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
406
407
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
408
  /** Returns true when VCF record contains a ID from the given list */
Peter van 't Hof's avatar
Peter van 't Hof committed
409
410
  def inIdSet(record: VariantContext, idSet: Set[String]): Boolean = {
    record.getID.split(",").exists(idSet.contains)
411
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
412
}