VcfFilter.scala 18.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18

19
20
import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext }
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import htsjdk.variant.vcf.VCFFileReader
22
import nl.lumc.sasc.biopet.utils.{ ToolCommand, VcfUtils }
Peter van 't Hof's avatar
Peter van 't Hof committed
23

Peter van 't Hof's avatar
Peter van 't Hof committed
24
import scala.collection.JavaConversions._
25
import scala.io.Source
Peter van 't Hof's avatar
Peter van 't Hof committed
26

Peter van 't Hof's avatar
Peter van 't Hof committed
27
object VcfFilter extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
28
  /** Container class for a trio */
Sander Bollen's avatar
Sander Bollen committed
29
  protected[tools] case class Trio(child: String, father: String, mother: String) {
Peter van 't Hof's avatar
Peter van 't Hof committed
30
31
32
33
34
    def this(arg: String) = {
      this(arg.split(":")(0), arg.split(":")(1), arg.split(":")(2))
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
37
38
39
  case class BooleanArgs(uniqueOnly: Boolean = false,
                         sharedOnly: Boolean = false,
                         filterRefCalls: Boolean = false,
                         filterNoCalls: Boolean = false)

40
41
  case class Args(inputVcf: File = null,
                  outputVcf: File = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
42
                  invertedOutputVcf: Option[File] = None,
43
                  minQualScore: Option[Double] = None,
44
45
46
                  minSampleDepth: Int = -1,
                  minTotalDepth: Int = -1,
                  minAlternateDepth: Int = -1,
Peter van 't Hof's avatar
Peter van 't Hof committed
47
                  minSamplesPass: Int = 1,
48
                  mustHaveVariant: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
49
                  calledIn: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
50
                  mustHaveGenotype: List[(String, GenotypeType)] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
51
                  uniqueVariantInSample: String = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
52
                  resToDom: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
53
                  trioCompound: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
                  deNovoTrio: List[Trio] = Nil,
                  trioLossOfHet: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
56
                  booleanArgs: BooleanArgs = BooleanArgs(),
Peter van 't Hof's avatar
Peter van 't Hof committed
57
                  diffGenotype: List[(String, String)] = Nil,
58
                  filterHetVarToHomVar: List[(String, String)] = Nil,
59
                  iDset: Set[String] = Set(),
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
                  minGenomeQuality: Int = 0) extends AbstractArgs {
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
62
63

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
64
    opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
65
      c.copy(inputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
    } text "Input vcf file"
    opt[File]('o', "outputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
68
      c.copy(outputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
69
70
    } text "Output vcf file"
    opt[File]("invertedOutputVcf") maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
71
      c.copy(invertedOutputVcf = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
    } text "inverted output vcf file"
    opt[Int]("minSampleDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
74
      c.copy(minSampleDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
    } text "Min value for DP in genotype fields"
    opt[Int]("minTotalDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
77
      c.copy(minTotalDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
    } text "Min value of DP field in INFO fields"
    opt[Int]("minAlternateDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
80
      c.copy(minAlternateDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
    } text "Min value of AD field in genotype fields"
    opt[Int]("minSamplesPass") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
83
      c.copy(minSamplesPass = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
    } text "Min number off samples to pass --minAlternateDepth, --minBamAlternateDepth and --minSampleDepth"
    opt[String]("resToDom") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
86
      c.copy(resToDom = new Trio(x) :: c.resToDom)
Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
    } text "Only shows variants where child is homozygous and both parants hetrozygous"
    opt[String]("trioCompound") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
89
      c.copy(trioCompound = new Trio(x) :: c.trioCompound)
Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
    } text "Only shows variants where child is a compound variant combined from both parants"
    opt[String]("deNovoInSample") maxOccurs 1 unbounded () valueName "<sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
92
      c.copy(uniqueVariantInSample = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
    } text "Only show variants that contain unique alleles in complete set for given sample"
    opt[String]("deNovoTrio") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
95
      c.copy(deNovoTrio = new Trio(x) :: c.deNovoTrio)
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
    } text "Only show variants that are denovo in the trio"
    opt[String]("trioLossOfHet") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
98
      c.copy(trioLossOfHet = new Trio(x) :: c.trioLossOfHet)
Peter van 't Hof's avatar
Peter van 't Hof committed
99
100
    } text "Only show variants where a loss of hetrozygosity is detected"
    opt[String]("mustHaveVariant") unbounded () valueName "<sample>" action { (x, c) =>
101
      c.copy(mustHaveVariant = x :: c.mustHaveVariant)
Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
    } text "Given sample must have 1 alternative allele"
    opt[String]("calledIn") unbounded () valueName "<sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
104
      c.copy(calledIn = x :: c.calledIn)
Peter van 't Hof's avatar
Peter van 't Hof committed
105
    } text "Must be called in this sample"
106
    opt[String]("mustHaveGenotype") unbounded () valueName "<sample:genotype>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
107
108
      c.copy(mustHaveGenotype = (x.split(":")(0), GenotypeType.valueOf(x.split(":")(1))) :: c.mustHaveGenotype)
    } validate { x =>
Sander Bollen's avatar
Sander Bollen committed
109
      if (x.split(":").length == 2 && GenotypeType.values().map(_.toString).contains(x.split(":")(1))) success
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
      else failure("--mustHaveGenotype should be in this format: sample:genotype")
    } text "Must have genotoype <genotype> for this sample. Genotype can be " + GenotypeType.values().mkString(", ")
Peter van 't Hof's avatar
Peter van 't Hof committed
112
    opt[String]("diffGenotype") unbounded () valueName "<sample:sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
      c.copy(diffGenotype = (x.split(":")(0), x.split(":")(1)) :: c.diffGenotype)
    } validate { x => if (x.split(":").length == 2) success else failure("--notSameGenotype should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
115
116
    } text "Given samples must have a different genotype"
    opt[String]("filterHetVarToHomVar") unbounded () valueName "<sample:sample>" action { (x, c) =>
117
      c.copy(filterHetVarToHomVar = (x.split(":")(0), x.split(":")(1)) :: c.filterHetVarToHomVar)
Peter van 't Hof's avatar
Peter van 't Hof committed
118
    } validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
119
    } text "If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filtered"
Peter van 't Hof's avatar
Peter van 't Hof committed
120
    opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
121
      c.copy(booleanArgs = c.booleanArgs.copy(filterRefCalls = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
122
    } text "Filter when there are only ref calls"
123
    opt[Unit]("filterNoCalls") unbounded () action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
124
      c.copy(booleanArgs = c.booleanArgs.copy(filterNoCalls = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
125
    } text "Filter when there are only no calls"
Peter van 't Hof's avatar
Peter van 't Hof committed
126
    opt[Unit]("uniqueOnly") unbounded () action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
127
      c.copy(booleanArgs = c.booleanArgs.copy(uniqueOnly = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
    } text "Filter when there more then 1 sample have this variant"
    opt[Unit]("sharedOnly") unbounded () action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
130
      c.copy(booleanArgs = c.booleanArgs.copy(sharedOnly = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
131
    } text "Filter when not all samples have this variant"
132
133
    opt[Double]("minQualScore") unbounded () action { (x, c) =>
      c.copy(minQualScore = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
134
    } text "Min qual score"
135
136
    opt[String]("id") unbounded () action { (x, c) =>
      c.copy(iDset = c.iDset + x)
Peter van 't Hof's avatar
Peter van 't Hof committed
137
    } text "Id that may pass the filter"
138
    opt[File]("idFile") unbounded () action { (x, c) =>
139
      c.copy(iDset = c.iDset ++ Source.fromFile(x).getLines())
Peter van 't Hof's avatar
Peter van 't Hof committed
140
    } text "File that contain list of IDs to get from vcf file"
Sander Bollen's avatar
Sander Bollen committed
141
142
143
    opt[Int]("minGenomeQuality") unbounded () action { (x, c) =>
      c.copy(minGenomeQuality = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
144
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
145

Peter van 't Hof's avatar
Peter van 't Hof committed
146
  /** @param args the command line arguments */
Peter van 't Hof's avatar
Peter van 't Hof committed
147
  def main(args: Array[String]): Unit = {
148
    logger.info("Start")
Peter van 't Hof's avatar
Peter van 't Hof committed
149
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
150
    val cmdArgs = argsParser.parse(args, Args()).getOrElse { throw new IllegalArgumentException }
Peter van 't Hof's avatar
Peter van 't Hof committed
151

Peter van 't Hof's avatar
Peter van 't Hof committed
152
    val reader = new VCFFileReader(cmdArgs.inputVcf, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
153
    val header = reader.getFileHeader
154
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().
Peter van 't Hof's avatar
Peter van 't Hof committed
155
      setOutputFile(cmdArgs.outputVcf).
156
157
      setReferenceDictionary(header.getSequenceDictionary).
      build)
Peter van 't Hof's avatar
Peter van 't Hof committed
158
    writer.writeHeader(header)
Peter van 't Hof's avatar
Peter van 't Hof committed
159

Peter van 't Hof's avatar
Peter van 't Hof committed
160
    val invertedWriter = cmdArgs.invertedOutputVcf.collect {
Sander Bollen's avatar
Sander Bollen committed
161
162
163
164
165
      case x => new VariantContextWriterBuilder().
        setOutputFile(x).
        setReferenceDictionary(header.getSequenceDictionary).
        build
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
166
    invertedWriter.foreach(_.writeHeader(header))
Peter van 't Hof's avatar
Peter van 't Hof committed
167

168
169
    var counterTotal = 0
    var counterLeft = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
170
    for (record <- reader) {
Peter van 't Hof's avatar
Peter van 't Hof committed
171
      if (cmdArgs.minQualScore.map(minQualscore(record, _)).getOrElse(true) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
172
173
174
175
        (!cmdArgs.booleanArgs.filterRefCalls || hasNonRefCalls(record)) &&
        (!cmdArgs.booleanArgs.filterNoCalls || hasCalls(record)) &&
        (!cmdArgs.booleanArgs.uniqueOnly || hasUniqeSample(record)) &&
        (!cmdArgs.booleanArgs.sharedOnly || allSamplesVariant(record)) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
178
        hasMinTotalDepth(record, cmdArgs.minTotalDepth) &&
        hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) &&
        minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) &&
179
        minGenomeQuality(record, cmdArgs.minGenomeQuality, cmdArgs.minSamplesPass) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
180
        (cmdArgs.mustHaveVariant.isEmpty || mustHaveVariant(record, cmdArgs.mustHaveVariant)) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
181
182
        calledIn(record, cmdArgs.calledIn) &&
        hasGenotype(record, cmdArgs.mustHaveGenotype) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
183
184
185
186
187
        (cmdArgs.diffGenotype.isEmpty || cmdArgs.diffGenotype.forall(x => notSameGenotype(record, x._1, x._2))) &&
        (
          cmdArgs.filterHetVarToHomVar.isEmpty ||
          cmdArgs.filterHetVarToHomVar.forall(x => filterHetVarToHomVar(record, x._1, x._2))
        ) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
188
          uniqueVariantInSample(record, cmdArgs.uniqueVariantInSample) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
189
190
191
192
193
          denovoTrio(record, cmdArgs.deNovoTrio) &&
          denovoTrio(record, cmdArgs.trioLossOfHet, onlyLossHet = true) &&
          resToDom(record, cmdArgs.resToDom) &&
          trioCompound(record, cmdArgs.trioCompound) &&
          (cmdArgs.iDset.isEmpty || inIdSet(record, cmdArgs.iDset))) {
Peter van 't Hof's avatar
Peter van 't Hof committed
194
        writer.add(record)
195
        counterLeft += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
      } else
        invertedWriter.foreach(_.add(record))
198
      counterTotal += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
199
      if (counterTotal % 100000 == 0) logger.info(s"$counterTotal variants processed, $counterLeft passed filter")
Peter van 't Hof's avatar
Peter van 't Hof committed
200
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
201
    logger.info(s"$counterTotal variants processed, $counterLeft passed filter")
Peter van 't Hof's avatar
Peter van 't Hof committed
202
203
    reader.close()
    writer.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
204
    invertedWriter.foreach(_.close())
205
    logger.info("Done")
Peter van 't Hof's avatar
Peter van 't Hof committed
206
  }
207

Peter van 't Hof's avatar
Peter van 't Hof committed
208
209
  /**
   * Checks if given samples are called
Peter van 't Hof's avatar
Peter van 't Hof committed
210
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
211
212
213
214
215
216
   * @param record VCF record
   * @param samples Samples that need this sample to be called
   * @return false when filters fail
   */
  def calledIn(record: VariantContext, samples: List[String]): Boolean = {
    if (!samples.forall(record.getGenotype(_).isCalled)) false
Peter van 't Hof's avatar
Peter van 't Hof committed
217
218
219
    else true
  }

220
221
  /**
   * Checks if given genotypes for given samples are there
Peter van 't Hof's avatar
Peter van 't Hof committed
222
   *
223
224
225
226
   * @param record VCF record
   * @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype)
   * @return false when filter fails
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
227
  def hasGenotype(record: VariantContext, samplesGenotypes: List[(String, GenotypeType)]): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
228
229
230
    samplesGenotypes.forall { x =>
      record.getGenotype(x._1).getType == x._2
    }
231
232
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
233
234
  /**
   * Checks if record has atleast minQualScore
Peter van 't Hof's avatar
Peter van 't Hof committed
235
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
236
237
238
239
240
241
   * @param record VCF record
   * @param minQualScore Minimal quality score
   * @return false when filters fail
   */
  def minQualscore(record: VariantContext, minQualScore: Double): Boolean = {
    record.getPhredScaledQual >= minQualScore
Peter van 't Hof's avatar
Peter van 't Hof committed
242
243
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
244
  /** returns true record contains Non reference genotypes */
Peter van 't Hof's avatar
Peter van 't Hof committed
245
246
  def hasNonRefCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isHomRef)
247
248
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
249
  /** returns true when record has calls */
Peter van 't Hof's avatar
Peter van 't Hof committed
250
251
  def hasCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isNoCall)
252
253
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
254
255
  /** Checks if there is a variant in only 1 sample */
  def hasUniqeSample(record: VariantContext): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
256
    record.getSampleNames.exists(uniqueVariantInSample(record, _))
Peter van 't Hof's avatar
Peter van 't Hof committed
257
258
259
260
  }

  /** Checks if all samples are a variant */
  def allSamplesVariant(record: VariantContext): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
261
    record.getGenotypes.forall(g => !g.isNonInformative && g.getAlleles.exists(a => a.isNonReference && !a.isNoCall))
Peter van 't Hof's avatar
Peter van 't Hof committed
262
263
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
264
  /** returns true when DP INFO field is atleast the given value */
Peter van 't Hof's avatar
Peter van 't Hof committed
265
266
  def hasMinTotalDepth(record: VariantContext, minTotalDepth: Int): Boolean = {
    record.getAttributeAsInt("DP", -1) >= minTotalDepth
267
268
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
269
270
  /**
   * Checks if DP genotype field have a minimal value
Peter van 't Hof's avatar
Peter van 't Hof committed
271
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
272
273
274
275
276
277
   * @param record VCF record
   * @param minSampleDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def hasMinSampleDepth(record: VariantContext, minSampleDepth: Int, minSamplesPass: Int = 1): Boolean = {
278
279
    record.getGenotypes.count(genotype => {
      val DP = if (genotype.hasDP) genotype.getDP else -1
Peter van 't Hof's avatar
Peter van 't Hof committed
280
281
      DP >= minSampleDepth
    }) >= minSamplesPass
282
283
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
284
  /**
285
   * Checks if non-ref AD genotype field have a minimal value
Peter van 't Hof's avatar
Peter van 't Hof committed
286
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
287
288
289
290
291
292
   * @param record VCF record
   * @param minAlternateDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def minAlternateDepth(record: VariantContext, minAlternateDepth: Int, minSamplesPass: Int = 1): Boolean = {
293
294
    record.getGenotypes.count(genotype => {
      val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
295
      if (AD.nonEmpty && minAlternateDepth >= 0) AD.tail.count(_ >= minAlternateDepth) > 0 else true
Peter van 't Hof's avatar
Peter van 't Hof committed
296
    }) >= minSamplesPass
297
298
  }

299
300
  /**
   * Checks if genome quality field has minimum value
Peter van 't Hof's avatar
Peter van 't Hof committed
301
   *
302
303
304
305
306
307
   * @param record VCF record
   * @param minGQ smallest GQ allowed
   * @param minSamplesPass number of samples to consider
   * @return
   */
  def minGenomeQuality(record: VariantContext, minGQ: Int, minSamplesPass: Int = 1): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
308
309
310
311
    record.getGenotypes.count(x =>
      if (minGQ == 0) true
      else if (!x.hasGQ) false
      else if (x.getGQ >= minGQ) true else false) >= minSamplesPass
312
313
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
314
315
  /**
   * Checks if given samples does have a variant hin this record
Peter van 't Hof's avatar
Peter van 't Hof committed
316
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
317
   * @param record VCF record
Sander Bollen's avatar
Sander Bollen committed
318
   * @param samples List of samples that should have this variant
Peter van 't Hof's avatar
Peter van 't Hof committed
319
320
   * @return true if filter passed
   */
Sander Bollen's avatar
Sander Bollen committed
321
322
  def mustHaveVariant(record: VariantContext, samples: List[String]): Boolean = {
    !samples.map(record.getGenotype).exists(a => a.isHomRef || a.isNoCall || VcfUtils.isCompoundNoCall(a))
323
324
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
325
  /** Checks if given samples have the same genotype */
Peter van 't Hof's avatar
Peter van 't Hof committed
326
327
328
329
330
  def notSameGenotype(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.sameGenotype(genotype2)) false
    else true
331
332
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
333
  /** Checks if sample1 is hetrozygous and if sample2 is homozygous for a alternative allele in sample1 */
Peter van 't Hof's avatar
Peter van 't Hof committed
334
335
336
337
338
339
  def filterHetVarToHomVar(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.isHet && !genotype1.getAlleles.forall(_.isNonReference)) {
      for (allele <- genotype1.getAlleles if allele.isNonReference) {
        if (genotype2.getAlleles.forall(_.basesMatch(allele))) return false
340
341
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
342
    true
343
344
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
345
  /** Checks if given sample have alternative alleles that are unique in the VCF record */
Peter van 't Hof's avatar
Peter van 't Hof committed
346
  def uniqueVariantInSample(record: VariantContext, sample: String): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
347
348
349
    if (sample == null) return true
    val genotype = record.getGenotype(sample)
    if (genotype.isNoCall) return false
Peter van 't Hof's avatar
Peter van 't Hof committed
350
    if (genotype.getAlleles.forall(_.isReference)) return false
Peter van 't Hof's avatar
Peter van 't Hof committed
351
    for (allele <- genotype.getAlleles if allele.isNonReference) {
Peter van 't Hof's avatar
Peter van 't Hof committed
352
      for (g <- record.getGenotypes if g.getSampleName != sample) {
353
354
355
        if (g.getAlleles.exists(_.basesMatch(allele))) return false
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
356
    true
357
  }
358

Peter van 't Hof's avatar
Peter van 't Hof committed
359
  /** Return true when variant is homozygous in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
360
361
362
363
364
365
366
367
368
  def resToDom(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHomVar && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) == 1 &&
          record.getGenotype(trio.mother).countAllele(allele) == 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
369
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
370
371
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
372
  /** Returns true when variant a compound variant in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
373
374
375
376
377
378
379
380
381
  def trioCompound(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHetNonRef && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) >= 1 &&
          record.getGenotype(trio.mother).countAllele(allele) >= 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
382
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
383
384
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
385
  /** Returns true when child got a deNovo variant */
Peter van 't Hof's avatar
Peter van 't Hof committed
386
387
388
389
390
391
392
393
394
395
396
  def denovoTrio(record: VariantContext, trios: List[Trio], onlyLossHet: Boolean = false): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)
      val father = record.getGenotype(trio.father)
      val mother = record.getGenotype(trio.mother)

      for (allele <- child.getAlleles) {
        val childCount = child.countAllele(allele)
        val fatherCount = father.countAllele(allele)
        val motherCount = mother.countAllele(allele)

Peter van 't Hof's avatar
Peter van 't Hof committed
397
398
399
400
401
402
403
404
        if (onlyLossHet) {
          if (childCount == 2 && (
            (fatherCount == 2 && motherCount == 0) ||
            (fatherCount == 0 && motherCount == 2))) return true
        } else {
          if (childCount == 1 && fatherCount == 0 && motherCount == 0) return true
          else if (childCount == 2 && (fatherCount == 0 || motherCount == 0)) return true
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
405
406
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
407
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
408
409
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
410
  /** Returns true when VCF record contains a ID from the given list */
Peter van 't Hof's avatar
Peter van 't Hof committed
411
412
  def inIdSet(record: VariantContext, idSet: Set[String]): Boolean = {
    record.getID.split(",").exists(idSet.contains)
413
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
414
}