VcfFilter.scala 9.72 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
2
3
4

import htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder
5
6
import htsjdk.variant.vcf.{ VCFHeader, VCFFileReader }
import htsjdk.variant.variantcontext.VariantContext
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import java.util.ArrayList
Peter van 't Hof's avatar
Peter van 't Hof committed
9
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
10
import nl.lumc.sasc.biopet.core.ToolCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
11
12
13
14
15
16
17
18
19
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.JavaConversions._

class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction {
  javaMainClass = getClass.getName

  @Input(doc = "Input vcf", shortName = "I", required = true)
  var inputVcf: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
20

Peter van 't Hof's avatar
Peter van 't Hof committed
21
22
  @Output(doc = "Output vcf", shortName = "o", required = false)
  var outputVcf: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
23

Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26
27
  var minSampleDepth: Option[Int] = _
  var minTotalDepth: Option[Int] = _
  var minAlternateDepth: Option[Int] = _
  var minSamplesPass: Option[Int] = _
Peter van 't Hof's avatar
Peter van 't Hof committed
28
  var filterRefCalls: Boolean = _
Peter van 't Hof's avatar
Peter van 't Hof committed
29

Peter van 't Hof's avatar
Peter van 't Hof committed
30
31
  override val defaultVmem = "8G"
  memoryLimit = Option(4.0)
Peter van 't Hof's avatar
Peter van 't Hof committed
32

Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
35
36
37
  override def afterGraph {
    minSampleDepth = config("min_sample_depth")
    minTotalDepth = config("min_total_depth")
    minAlternateDepth = config("min_alternate_depth")
    minSamplesPass = config("min_samples_pass")
Peter van 't Hof's avatar
Peter van 't Hof committed
38
    filterRefCalls = config("filter_ref_calls")
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
42

  override def commandLine = super.commandLine +
    required("-I", inputVcf) +
Peter van 't Hof's avatar
Peter van 't Hof committed
43
    required("-o", outputVcf) +
Peter van 't Hof's avatar
Peter van 't Hof committed
44
45
    optional("--minSampleDepth", minSampleDepth) +
    optional("--minTotalDepth", minTotalDepth) +
Peter van 't Hof's avatar
Peter van 't Hof committed
46
    optional("--minAlternateDepth", minAlternateDepth) +
Peter van 't Hof's avatar
Peter van 't Hof committed
47
48
    optional("--minSamplesPass", minSamplesPass) +
    conditional(filterRefCalls, "--filterRefCalls")
Peter van 't Hof's avatar
Peter van 't Hof committed
49
50
}

Peter van 't Hof's avatar
Peter van 't Hof committed
51
object VcfFilter extends ToolCommand {
52
53
  case class Args(inputVcf: File = null,
                  outputVcf: File = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
54
                  minQualscore: Option[Double] = None,
55
56
57
58
59
60
61
                  minSampleDepth: Int = -1,
                  minTotalDepth: Int = -1,
                  minAlternateDepth: Int = -1,
                  minSamplesPass: Int = 0,
                  minBamAlternateDepth: Int = 0,
                  mustHaveVariant: List[String] = Nil,
                  denovoInSample: String = null,
Peter van 't Hof's avatar
Peter van 't Hof committed
62
                  diffGenotype: List[(String, String)] = Nil,
63
                  filterHetVarToHomVar: List[(String, String)] = Nil,
64
65
                  filterRefCalls: Boolean = false,
                  filterNoCalls: Boolean = false) extends AbstractArgs
Peter van 't Hof's avatar
Peter van 't Hof committed
66
67

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
    opt[File]('I', "inputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
      c.copy(inputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
70
    } text ("Input vcf file")
Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
    opt[File]('o', "outputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
      c.copy(outputVcf = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
73
74
    } text ("Output vcf file")
    opt[Int]("minSampleDepth") unbounded () valueName ("<int>") action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
75
      c.copy(minSampleDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
    } text ("Min value for DP in genotype fields")
    opt[Int]("minTotalDepth") unbounded () valueName ("<int>") action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
78
      c.copy(minTotalDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
    } text ("Min value of DP field in INFO fields")
    opt[Int]("minAlternateDepth") unbounded () valueName ("<int>") action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
81
      c.copy(minAlternateDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
    } text ("Min value of AD field in genotype fields")
    opt[Int]("minSamplesPass") unbounded () valueName ("<int>") action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
84
      c.copy(minSamplesPass = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
    } text ("Min number opf samples to pass --minAlternateDepth, --minBamAlternateDepth and --minSampleDepth")
    opt[Int]("minBamAlternateDepth") unbounded () valueName ("<int>") action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
87
      c.copy(minBamAlternateDepth = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
    } // TODO: Convert this to more generic filter
    opt[String]("denovoInSample") maxOccurs (1) unbounded () valueName ("<sample>") action { (x, c) =>
90
      c.copy(denovoInSample = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
    } text ("Only show variants that contain unique alleles in compete set for given sample")
    opt[String]("mustHaveVariant") unbounded () valueName ("<sample>") action { (x, c) =>
93
      c.copy(mustHaveVariant = x :: c.mustHaveVariant)
Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
96
97
98
99
    } text ("Given sample must have 1 alternative allele")
    opt[String]("diffGenotype") unbounded () valueName ("<sample:sample>") action { (x, c) =>
      c.copy(diffGenotype = (x.split(":")(0), x.split(":")(1)) :: c.diffGenotype)
    } validate { x => if (x.split(":").length == 2) success else failure("--notSameGenotype should be in this format: sample:sample")
    } text ("Given samples must have a different genotype")
    opt[String]("filterHetVarToHomVar") unbounded () valueName ("<sample:sample>") action { (x, c) =>
100
      c.copy(filterHetVarToHomVar = (x.split(":")(0), x.split(":")(1)) :: c.filterHetVarToHomVar)
Peter van 't Hof's avatar
Peter van 't Hof committed
101
102
    } validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample")
    } text ("If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filterd")
Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
    opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
      c.copy(filterRefCalls = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
105
    } text ("Filter when there are only ref calls")
106
107
108
    opt[Unit]("filterNoCalls") unbounded () action { (x, c) =>
      c.copy(filterNoCalls = true)
    } text ("Filter when there are only no calls")
Peter van 't Hof's avatar
Peter van 't Hof committed
109
110
111
    opt[Double]("minQualscore") unbounded () action { (x, c) =>
      c.copy(minQualscore = Some(x))
    } text ("Min qual score")
Peter van 't Hof's avatar
Peter van 't Hof committed
112
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
113

114
115
  var commandArgs: Args = _

Peter van 't Hof's avatar
Peter van 't Hof committed
116
117
118
119
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
120
    val argsParser = new OptParser
121
    commandArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)
Peter van 't Hof's avatar
Peter van 't Hof committed
122

Peter van 't Hof's avatar
Peter van 't Hof committed
123
    val reader = new VCFFileReader(commandArgs.inputVcf, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
124
    val header = reader.getFileHeader
Peter van 't Hof's avatar
Peter van 't Hof committed
125
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputVcf).build)
Peter van 't Hof's avatar
Peter van 't Hof committed
126
    writer.writeHeader(header)
Peter van 't Hof's avatar
Peter van 't Hof committed
127

Peter van 't Hof's avatar
Peter van 't Hof committed
128
    for (record <- reader) {
Peter van 't Hof's avatar
Peter van 't Hof committed
129
130
      if (minQualscore(record) &&
        filterRefCalls(record) &&
131
132
133
134
        filterNoCalls(record) &&
        minTotalDepth(record) &&
        minSampleDepth(record) &&
        minAlternateDepth(record) &&
135
136
137
138
        minBamAlternateDepth(record, header) &&
        mustHaveVariant(record) &&
        notSameGenotype(record) &&
        filterHetVarToHomVar(record) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
139
        denovoInSample(record)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
140
        writer.add(record)
Peter van 't Hof's avatar
Peter van 't Hof committed
141
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
142
143
144
145
    }
    reader.close
    writer.close
  }
146

Peter van 't Hof's avatar
Peter van 't Hof committed
147
148
149
150
151
  def minQualscore(record: VariantContext): Boolean = {
    if (commandArgs.minQualscore.isEmpty) return true
    record.getPhredScaledQual >= commandArgs.minQualscore.get
  }

152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  def filterRefCalls(record: VariantContext): Boolean = {
    if (commandArgs.filterNoCalls) record.getGenotypes.exists(g => !g.isHomRef)
    else true
  }

  def filterNoCalls(record: VariantContext): Boolean = {
    if (commandArgs.filterNoCalls) record.getGenotypes.exists(g => !g.isNoCall)
    else true
  }

  def minTotalDepth(record: VariantContext): Boolean = {
    record.getAttributeAsInt("DP", -1) >= commandArgs.minTotalDepth
  }

  def minSampleDepth(record: VariantContext): Boolean = {
    record.getGenotypes.count(genotype => {
      val DP = if (genotype.hasDP) genotype.getDP else -1
      DP >= commandArgs.minSampleDepth
    }) >= commandArgs.minSamplesPass
  }

  def minAlternateDepth(record: VariantContext): Boolean = {
    record.getGenotypes.count(genotype => {
      val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil
      if (!AD.isEmpty) AD.tail.count(_ >= commandArgs.minAlternateDepth) > 0 else true
    }) >= commandArgs.minSamplesPass
  }

180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
  def minBamAlternateDepth(record: VariantContext, header: VCFHeader): Boolean = {
    val bamADFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-AD-")) yield line.getID).toList

    val bamADvalues = (for (field <- bamADFields) yield {
      record.getAttribute(field, new ArrayList) match {
        case t: ArrayList[_] if t.length > 1 => {
          for (i <- 1 until t.size) yield {
            t(i) match {
              case a: Int    => a > commandArgs.minBamAlternateDepth
              case a: String => a.toInt > commandArgs.minBamAlternateDepth
              case _         => false
            }
          }
        }
        case _ => List(false)
      }
    }).flatten

    return commandArgs.minBamAlternateDepth <= 0 || bamADvalues.count(_ == true) >= commandArgs.minSamplesPass
  }

  def mustHaveVariant(record: VariantContext): Boolean = {
    return !commandArgs.mustHaveVariant.map(record.getGenotype(_)).exists(a => a.isHomRef || a.isNoCall)
  }

  def notSameGenotype(record: VariantContext): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
206
    for ((sample1, sample2) <- commandArgs.diffGenotype) {
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
      val genotype1 = record.getGenotype(sample1)
      val genotype2 = record.getGenotype(sample2)
      if (genotype1.sameGenotype(genotype2)) return false
    }
    return true
  }

  def filterHetVarToHomVar(record: VariantContext): Boolean = {
    for ((sample1, sample2) <- commandArgs.filterHetVarToHomVar) {
      val genotype1 = record.getGenotype(sample1)
      val genotype2 = record.getGenotype(sample2)
      if (genotype1.isHet && !genotype1.getAlleles.forall(_.isNonReference)) {
        for (allele <- genotype1.getAlleles if allele.isNonReference) {
          if (genotype2.getAlleles.forall(_.basesMatch(allele))) return false
        }
      }
    }
    return true
  }

  def denovoInSample(record: VariantContext): Boolean = {
    if (commandArgs.denovoInSample == null) return true
    val genotype = record.getGenotype(commandArgs.denovoInSample)
    for (allele <- genotype.getAlleles if allele.isNonReference) {
      for (g <- record.getGenotypes if g.getSampleName != commandArgs.denovoInSample) {
        if (g.getAlleles.exists(_.basesMatch(allele))) return false
      }
    }
    return true
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
237
}