VcfFilter.scala 18.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
16 17

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18

19 20
import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext }
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import htsjdk.variant.vcf.VCFFileReader
22
import nl.lumc.sasc.biopet.utils.{ ToolCommand, VcfUtils }
Peter van 't Hof's avatar
Peter van 't Hof committed
23

Peter van 't Hof's avatar
Peter van 't Hof committed
24
import scala.collection.JavaConversions._
25
import scala.io.Source
Peter van 't Hof's avatar
Peter van 't Hof committed
26

Peter van 't Hof's avatar
Peter van 't Hof committed
27
object VcfFilter extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
28
  /** Container class for a trio */
Sander Bollen's avatar
Sander Bollen committed
29
  protected[tools] case class Trio(child: String, father: String, mother: String) {
Peter van 't Hof's avatar
Peter van 't Hof committed
30 31 32 33 34
    def this(arg: String) = {
      this(arg.split(":")(0), arg.split(":")(1), arg.split(":")(2))
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
35 36 37 38 39
  case class BooleanArgs(uniqueOnly: Boolean = false,
                         sharedOnly: Boolean = false,
                         filterRefCalls: Boolean = false,
                         filterNoCalls: Boolean = false)

40 41
  case class Args(inputVcf: File = null,
                  outputVcf: File = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
42
                  invertedOutputVcf: Option[File] = None,
43
                  minQualScore: Option[Double] = None,
44 45 46
                  minSampleDepth: Int = -1,
                  minTotalDepth: Int = -1,
                  minAlternateDepth: Int = -1,
Peter van 't Hof's avatar
Peter van 't Hof committed
47
                  minSamplesPass: Int = 1,
48
                  mustHaveVariant: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
49
                  calledIn: List[String] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
50
                  mustHaveGenotype: List[(String, GenotypeType)] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
51
                  uniqueVariantInSample: String = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
52
                  resToDom: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
53
                  trioCompound: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
54 55
                  deNovoTrio: List[Trio] = Nil,
                  trioLossOfHet: List[Trio] = Nil,
Peter van 't Hof's avatar
Peter van 't Hof committed
56
                  booleanArgs: BooleanArgs = BooleanArgs(),
Peter van 't Hof's avatar
Peter van 't Hof committed
57
                  diffGenotype: List[(String, String)] = Nil,
58
                  filterHetVarToHomVar: List[(String, String)] = Nil,
59
                  iDset: Set[String] = Set(),
Peter van 't Hof's avatar
Peter van 't Hof committed
60 61
                  minGenomeQuality: Int = 0) extends AbstractArgs {
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
62 63

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
64
    opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
65
      c.copy(inputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
66 67
    } text "Input vcf file"
    opt[File]('o', "outputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
68
      c.copy(outputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
69 70
    } text "Output vcf file"
    opt[File]("invertedOutputVcf") maxOccurs 1 valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
71
      c.copy(invertedOutputVcf = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
72 73
    } text "inverted output vcf file"
    opt[Int]("minSampleDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
74
      c.copy(minSampleDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
75 76
    } text "Min value for DP in genotype fields"
    opt[Int]("minTotalDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
77
      c.copy(minTotalDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
78 79
    } text "Min value of DP field in INFO fields"
    opt[Int]("minAlternateDepth") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
80
      c.copy(minAlternateDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
81 82
    } text "Min value of AD field in genotype fields"
    opt[Int]("minSamplesPass") unbounded () valueName "<int>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
83
      c.copy(minSamplesPass = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
84 85
    } text "Min number off samples to pass --minAlternateDepth, --minBamAlternateDepth and --minSampleDepth"
    opt[String]("resToDom") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
86
      c.copy(resToDom = new Trio(x) :: c.resToDom)
Peter van 't Hof's avatar
Peter van 't Hof committed
87 88
    } text "Only shows variants where child is homozygous and both parants hetrozygous"
    opt[String]("trioCompound") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
89
      c.copy(trioCompound = new Trio(x) :: c.trioCompound)
Peter van 't Hof's avatar
Peter van 't Hof committed
90 91
    } text "Only shows variants where child is a compound variant combined from both parants"
    opt[String]("deNovoInSample") maxOccurs 1 unbounded () valueName "<sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
92
      c.copy(uniqueVariantInSample = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
93 94
    } text "Only show variants that contain unique alleles in complete set for given sample"
    opt[String]("deNovoTrio") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
95
      c.copy(deNovoTrio = new Trio(x) :: c.deNovoTrio)
Peter van 't Hof's avatar
Peter van 't Hof committed
96 97
    } text "Only show variants that are denovo in the trio"
    opt[String]("trioLossOfHet") unbounded () valueName "<child:father:mother>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
98
      c.copy(trioLossOfHet = new Trio(x) :: c.trioLossOfHet)
Peter van 't Hof's avatar
Peter van 't Hof committed
99 100
    } text "Only show variants where a loss of hetrozygosity is detected"
    opt[String]("mustHaveVariant") unbounded () valueName "<sample>" action { (x, c) =>
101
      c.copy(mustHaveVariant = x :: c.mustHaveVariant)
Peter van 't Hof's avatar
Peter van 't Hof committed
102 103
    } text "Given sample must have 1 alternative allele"
    opt[String]("calledIn") unbounded () valueName "<sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
104
      c.copy(calledIn = x :: c.calledIn)
Peter van 't Hof's avatar
Peter van 't Hof committed
105
    } text "Must be called in this sample"
106
    opt[String]("mustHaveGenotype") unbounded () valueName "<sample:genotype>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
107 108
      c.copy(mustHaveGenotype = (x.split(":")(0), GenotypeType.valueOf(x.split(":")(1))) :: c.mustHaveGenotype)
    } validate { x =>
Sander Bollen's avatar
Sander Bollen committed
109
      if (x.split(":").length == 2 && GenotypeType.values().map(_.toString).contains(x.split(":")(1))) success
Peter van 't Hof's avatar
Peter van 't Hof committed
110 111
      else failure("--mustHaveGenotype should be in this format: sample:genotype")
    } text "Must have genotoype <genotype> for this sample. Genotype can be " + GenotypeType.values().mkString(", ")
Peter van 't Hof's avatar
Peter van 't Hof committed
112
    opt[String]("diffGenotype") unbounded () valueName "<sample:sample>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
113 114
      c.copy(diffGenotype = (x.split(":")(0), x.split(":")(1)) :: c.diffGenotype)
    } validate { x => if (x.split(":").length == 2) success else failure("--notSameGenotype should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
115 116
    } text "Given samples must have a different genotype"
    opt[String]("filterHetVarToHomVar") unbounded () valueName "<sample:sample>" action { (x, c) =>
117
      c.copy(filterHetVarToHomVar = (x.split(":")(0), x.split(":")(1)) :: c.filterHetVarToHomVar)
Peter van 't Hof's avatar
Peter van 't Hof committed
118
    } validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
119
    } text "If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filtered"
Peter van 't Hof's avatar
Peter van 't Hof committed
120
    opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
121
      c.copy(booleanArgs = c.booleanArgs.copy(filterRefCalls = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
122
    } text "Filter when there are only ref calls"
123
    opt[Unit]("filterNoCalls") unbounded () action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
124
      c.copy(booleanArgs = c.booleanArgs.copy(filterNoCalls = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
125
    } text "Filter when there are only no calls"
Peter van 't Hof's avatar
Peter van 't Hof committed
126
    opt[Unit]("uniqueOnly") unbounded () action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
127
      c.copy(booleanArgs = c.booleanArgs.copy(uniqueOnly = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
128 129
    } text "Filter when there more then 1 sample have this variant"
    opt[Unit]("sharedOnly") unbounded () action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
130
      c.copy(booleanArgs = c.booleanArgs.copy(sharedOnly = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
131
    } text "Filter when not all samples have this variant"
132 133
    opt[Double]("minQualScore") unbounded () action { (x, c) =>
      c.copy(minQualScore = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
134
    } text "Min qual score"
135 136
    opt[String]("id") unbounded () action { (x, c) =>
      c.copy(iDset = c.iDset + x)
Peter van 't Hof's avatar
Peter van 't Hof committed
137
    } text "Id that may pass the filter"
138
    opt[File]("idFile") unbounded () action { (x, c) =>
139
      c.copy(iDset = c.iDset ++ Source.fromFile(x).getLines())
Peter van 't Hof's avatar
Peter van 't Hof committed
140
    } text "File that contain list of IDs to get from vcf file"
Sander Bollen's avatar
Sander Bollen committed
141 142 143
    opt[Int]("minGenomeQuality") unbounded () action { (x, c) =>
      c.copy(minGenomeQuality = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
144
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
145

Peter van 't Hof's avatar
Peter van 't Hof committed
146
  /** @param args the command line arguments */
Peter van 't Hof's avatar
Peter van 't Hof committed
147
  def main(args: Array[String]): Unit = {
148
    logger.info("Start")
Peter van 't Hof's avatar
Peter van 't Hof committed
149
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
150
    val cmdArgs = argsParser.parse(args, Args()).getOrElse { throw new IllegalArgumentException }
Peter van 't Hof's avatar
Peter van 't Hof committed
151

Peter van 't Hof's avatar
Peter van 't Hof committed
152
    val reader = new VCFFileReader(cmdArgs.inputVcf, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
153
    val header = reader.getFileHeader
154
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().
Peter van 't Hof's avatar
Peter van 't Hof committed
155
      setOutputFile(cmdArgs.outputVcf).
156 157
      setReferenceDictionary(header.getSequenceDictionary).
      build)
Peter van 't Hof's avatar
Peter van 't Hof committed
158
    writer.writeHeader(header)
Peter van 't Hof's avatar
Peter van 't Hof committed
159

Peter van 't Hof's avatar
Peter van 't Hof committed
160
    val invertedWriter = cmdArgs.invertedOutputVcf.collect {
Sander Bollen's avatar
Sander Bollen committed
161 162 163 164 165
      case x => new VariantContextWriterBuilder().
        setOutputFile(x).
        setReferenceDictionary(header.getSequenceDictionary).
        build
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
166
    invertedWriter.foreach(_.writeHeader(header))
Peter van 't Hof's avatar
Peter van 't Hof committed
167

168 169
    var counterTotal = 0
    var counterLeft = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
170
    for (record <- reader) {
Peter van 't Hof's avatar
Peter van 't Hof committed
171
      if (cmdArgs.minQualScore.map(minQualscore(record, _)).getOrElse(true) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
172 173 174 175
        (!cmdArgs.booleanArgs.filterRefCalls || hasNonRefCalls(record)) &&
        (!cmdArgs.booleanArgs.filterNoCalls || hasCalls(record)) &&
        (!cmdArgs.booleanArgs.uniqueOnly || hasUniqeSample(record)) &&
        (!cmdArgs.booleanArgs.sharedOnly || allSamplesVariant(record)) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
176 177 178
        hasMinTotalDepth(record, cmdArgs.minTotalDepth) &&
        hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) &&
        minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) &&
179
        minGenomeQuality(record, cmdArgs.minGenomeQuality, cmdArgs.minSamplesPass) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
180
        (cmdArgs.mustHaveVariant.isEmpty || mustHaveVariant(record, cmdArgs.mustHaveVariant)) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
181 182
        calledIn(record, cmdArgs.calledIn) &&
        hasGenotype(record, cmdArgs.mustHaveGenotype) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
183 184 185 186 187
        (cmdArgs.diffGenotype.isEmpty || cmdArgs.diffGenotype.forall(x => notSameGenotype(record, x._1, x._2))) &&
        (
          cmdArgs.filterHetVarToHomVar.isEmpty ||
          cmdArgs.filterHetVarToHomVar.forall(x => filterHetVarToHomVar(record, x._1, x._2))
        ) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
188
          uniqueVariantInSample(record, cmdArgs.uniqueVariantInSample) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
189 190 191 192 193
          denovoTrio(record, cmdArgs.deNovoTrio) &&
          denovoTrio(record, cmdArgs.trioLossOfHet, onlyLossHet = true) &&
          resToDom(record, cmdArgs.resToDom) &&
          trioCompound(record, cmdArgs.trioCompound) &&
          (cmdArgs.iDset.isEmpty || inIdSet(record, cmdArgs.iDset))) {
Peter van 't Hof's avatar
Peter van 't Hof committed
194
        writer.add(record)
195
        counterLeft += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
196 197
      } else
        invertedWriter.foreach(_.add(record))
198
      counterTotal += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
199
      if (counterTotal % 100000 == 0) logger.info(s"$counterTotal variants processed, $counterLeft passed filter")
Peter van 't Hof's avatar
Peter van 't Hof committed
200
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
201
    logger.info(s"$counterTotal variants processed, $counterLeft passed filter")
Peter van 't Hof's avatar
Peter van 't Hof committed
202 203
    reader.close()
    writer.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
204
    invertedWriter.foreach(_.close())
205
    logger.info("Done")
Peter van 't Hof's avatar
Peter van 't Hof committed
206
  }
207

Peter van 't Hof's avatar
Peter van 't Hof committed
208 209
  /**
   * Checks if given samples are called
Peter van 't Hof's avatar
Peter van 't Hof committed
210
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
211 212 213 214 215 216
   * @param record VCF record
   * @param samples Samples that need this sample to be called
   * @return false when filters fail
   */
  def calledIn(record: VariantContext, samples: List[String]): Boolean = {
    if (!samples.forall(record.getGenotype(_).isCalled)) false
Peter van 't Hof's avatar
Peter van 't Hof committed
217 218 219
    else true
  }

220 221
  /**
   * Checks if given genotypes for given samples are there
Peter van 't Hof's avatar
Peter van 't Hof committed
222
   *
223 224 225 226
   * @param record VCF record
   * @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype)
   * @return false when filter fails
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
227
  def hasGenotype(record: VariantContext, samplesGenotypes: List[(String, GenotypeType)]): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
228 229 230
    samplesGenotypes.forall { x =>
      record.getGenotype(x._1).getType == x._2
    }
231 232
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
233 234
  /**
   * Checks if record has atleast minQualScore
Peter van 't Hof's avatar
Peter van 't Hof committed
235
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
236 237 238 239 240 241
   * @param record VCF record
   * @param minQualScore Minimal quality score
   * @return false when filters fail
   */
  def minQualscore(record: VariantContext, minQualScore: Double): Boolean = {
    record.getPhredScaledQual >= minQualScore
Peter van 't Hof's avatar
Peter van 't Hof committed
242 243
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
244
  /** returns true record contains Non reference genotypes */
Peter van 't Hof's avatar
Peter van 't Hof committed
245 246
  def hasNonRefCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isHomRef)
247 248
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
249
  /** returns true when record has calls */
Peter van 't Hof's avatar
Peter van 't Hof committed
250 251
  def hasCalls(record: VariantContext): Boolean = {
    record.getGenotypes.exists(g => !g.isNoCall)
252 253
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
254 255
  /** Checks if there is a variant in only 1 sample */
  def hasUniqeSample(record: VariantContext): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
256
    record.getSampleNames.exists(uniqueVariantInSample(record, _))
Peter van 't Hof's avatar
Peter van 't Hof committed
257 258 259 260
  }

  /** Checks if all samples are a variant */
  def allSamplesVariant(record: VariantContext): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
261
    record.getGenotypes.forall(g => !g.isNonInformative && g.getAlleles.exists(a => a.isNonReference && !a.isNoCall))
Peter van 't Hof's avatar
Peter van 't Hof committed
262 263
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
264
  /** returns true when DP INFO field is atleast the given value */
Peter van 't Hof's avatar
Peter van 't Hof committed
265 266
  def hasMinTotalDepth(record: VariantContext, minTotalDepth: Int): Boolean = {
    record.getAttributeAsInt("DP", -1) >= minTotalDepth
267 268
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
269 270
  /**
   * Checks if DP genotype field have a minimal value
Peter van 't Hof's avatar
Peter van 't Hof committed
271
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
272 273 274 275 276 277
   * @param record VCF record
   * @param minSampleDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def hasMinSampleDepth(record: VariantContext, minSampleDepth: Int, minSamplesPass: Int = 1): Boolean = {
278 279
    record.getGenotypes.count(genotype => {
      val DP = if (genotype.hasDP) genotype.getDP else -1
Peter van 't Hof's avatar
Peter van 't Hof committed
280 281
      DP >= minSampleDepth
    }) >= minSamplesPass
282 283
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
284
  /**
285
   * Checks if non-ref AD genotype field have a minimal value
Peter van 't Hof's avatar
Peter van 't Hof committed
286
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
287 288 289 290 291 292
   * @param record VCF record
   * @param minAlternateDepth minimal depth
   * @param minSamplesPass Minimal number of samples to pass filter
   * @return true if filter passed
   */
  def minAlternateDepth(record: VariantContext, minAlternateDepth: Int, minSamplesPass: Int = 1): Boolean = {
293 294
    record.getGenotypes.count(genotype => {
      val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
295
      if (AD.nonEmpty && minAlternateDepth >= 0) AD.tail.count(_ >= minAlternateDepth) > 0 else true
Peter van 't Hof's avatar
Peter van 't Hof committed
296
    }) >= minSamplesPass
297 298
  }

299 300
  /**
   * Checks if genome quality field has minimum value
Peter van 't Hof's avatar
Peter van 't Hof committed
301
   *
302 303 304 305 306 307
   * @param record VCF record
   * @param minGQ smallest GQ allowed
   * @param minSamplesPass number of samples to consider
   * @return
   */
  def minGenomeQuality(record: VariantContext, minGQ: Int, minSamplesPass: Int = 1): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
308 309 310 311
    record.getGenotypes.count(x =>
      if (minGQ == 0) true
      else if (!x.hasGQ) false
      else if (x.getGQ >= minGQ) true else false) >= minSamplesPass
312 313
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
314 315
  /**
   * Checks if given samples does have a variant hin this record
Peter van 't Hof's avatar
Peter van 't Hof committed
316
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
317
   * @param record VCF record
Sander Bollen's avatar
Sander Bollen committed
318
   * @param samples List of samples that should have this variant
Peter van 't Hof's avatar
Peter van 't Hof committed
319 320
   * @return true if filter passed
   */
Sander Bollen's avatar
Sander Bollen committed
321
  def mustHaveVariant(record: VariantContext, samples: List[String]): Boolean = {
322 323 324 325 326
    samples.foreach { s =>
      if (!record.getSampleNames.toList.contains(s)) {
        throw new IllegalArgumentException(s"Sample name $s does not exist in VCF file")
      }
    }
Sander Bollen's avatar
Sander Bollen committed
327
    !samples.map(record.getGenotype).exists(a => a.isHomRef || a.isNoCall || VcfUtils.isCompoundNoCall(a))
328 329
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
330
  /** Checks if given samples have the same genotype */
Peter van 't Hof's avatar
Peter van 't Hof committed
331 332 333 334 335
  def notSameGenotype(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.sameGenotype(genotype2)) false
    else true
336 337
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
338
  /** Checks if sample1 is hetrozygous and if sample2 is homozygous for a alternative allele in sample1 */
Peter van 't Hof's avatar
Peter van 't Hof committed
339 340 341 342 343 344
  def filterHetVarToHomVar(record: VariantContext, sample1: String, sample2: String): Boolean = {
    val genotype1 = record.getGenotype(sample1)
    val genotype2 = record.getGenotype(sample2)
    if (genotype1.isHet && !genotype1.getAlleles.forall(_.isNonReference)) {
      for (allele <- genotype1.getAlleles if allele.isNonReference) {
        if (genotype2.getAlleles.forall(_.basesMatch(allele))) return false
345 346
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
347
    true
348 349
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
350
  /** Checks if given sample have alternative alleles that are unique in the VCF record */
Peter van 't Hof's avatar
Peter van 't Hof committed
351
  def uniqueVariantInSample(record: VariantContext, sample: String): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
352 353 354
    if (sample == null) return true
    val genotype = record.getGenotype(sample)
    if (genotype.isNoCall) return false
Peter van 't Hof's avatar
Peter van 't Hof committed
355
    if (genotype.getAlleles.forall(_.isReference)) return false
Peter van 't Hof's avatar
Peter van 't Hof committed
356
    for (allele <- genotype.getAlleles if allele.isNonReference) {
Peter van 't Hof's avatar
Peter van 't Hof committed
357
      for (g <- record.getGenotypes if g.getSampleName != sample) {
358 359 360
        if (g.getAlleles.exists(_.basesMatch(allele))) return false
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
361
    true
362
  }
363

Peter van 't Hof's avatar
Peter van 't Hof committed
364
  /** Return true when variant is homozygous in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
365 366 367 368 369 370 371 372 373
  def resToDom(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHomVar && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) == 1 &&
          record.getGenotype(trio.mother).countAllele(allele) == 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
374
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
375 376
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
377
  /** Returns true when variant a compound variant in the child and hetrozygous in parants */
Peter van 't Hof's avatar
Peter van 't Hof committed
378 379 380 381 382 383 384 385 386
  def trioCompound(record: VariantContext, trios: List[Trio]): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)

      if (child.isHetNonRef && child.getAlleles.forall(allele => {
        record.getGenotype(trio.father).countAllele(allele) >= 1 &&
          record.getGenotype(trio.mother).countAllele(allele) >= 1
      })) return true
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
387
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
388 389
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
390
  /** Returns true when child got a deNovo variant */
Peter van 't Hof's avatar
Peter van 't Hof committed
391 392 393 394 395 396 397 398 399 400 401
  def denovoTrio(record: VariantContext, trios: List[Trio], onlyLossHet: Boolean = false): Boolean = {
    for (trio <- trios) {
      val child = record.getGenotype(trio.child)
      val father = record.getGenotype(trio.father)
      val mother = record.getGenotype(trio.mother)

      for (allele <- child.getAlleles) {
        val childCount = child.countAllele(allele)
        val fatherCount = father.countAllele(allele)
        val motherCount = mother.countAllele(allele)

Peter van 't Hof's avatar
Peter van 't Hof committed
402 403 404 405 406 407 408 409
        if (onlyLossHet) {
          if (childCount == 2 && (
            (fatherCount == 2 && motherCount == 0) ||
            (fatherCount == 0 && motherCount == 2))) return true
        } else {
          if (childCount == 1 && fatherCount == 0 && motherCount == 0) return true
          else if (childCount == 2 && (fatherCount == 0 || motherCount == 0)) return true
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
410 411
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
412
    trios.isEmpty
Peter van 't Hof's avatar
Peter van 't Hof committed
413 414
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
415
  /** Returns true when VCF record contains a ID from the given list */
Peter van 't Hof's avatar
Peter van 't Hof committed
416 417
  def inIdSet(record: VariantContext, idSet: Set[String]): Boolean = {
    record.getID.split(",").exists(idSet.contains)
418
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
419
}