VcfFilter.scala 3.26 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
package nl.lumc.sasc.biopet.core.apps

import htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder
import htsjdk.variant.vcf.VCFFileReader
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.JavaConversions._

class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction {
  javaMainClass = getClass.getName

  @Input(doc = "Input vcf", shortName = "I", required = true)
  var inputVcf: File = _
    
  @Output(doc = "Output vcf", shortName = "o", required = false)
  var outputVcf: File = _
  
  var minSampleDepth: Option[Int] = _
  var minTotalDepth: Option[Int] = _
  var minAlternateDepth: Option[Int] = _
  var minSamplesPass: Option[Int] = _
Peter van 't Hof's avatar
Peter van 't Hof committed
25
  var filterRefCalls: Boolean = _
Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
28
29
30
31
32
33
34
  
  override val defaultVmem = "8G"
  memoryLimit = Option(4.0)
  
  override def afterGraph {
    minSampleDepth = config("min_sample_depth")
    minTotalDepth = config("min_total_depth")
    minAlternateDepth = config("min_alternate_depth")
    minSamplesPass = config("min_samples_pass")
Peter van 't Hof's avatar
Peter van 't Hof committed
35
    filterRefCalls = config("filter_ref_calls")
Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
38
39
40
41
42
43
  }
  
  override def commandLine = super.commandLine + 
    required("-I", inputVcf) + 
    required("-o", outputVcf) +
    optional("-minSampleDepth", minSampleDepth) +
    optional("-minTotalDepth", minTotalDepth) +
    optional("-minAlternateDepth", minAlternateDepth) + 
Peter van 't Hof's avatar
Peter van 't Hof committed
44
45
    optional("-minSamplesPass", minSamplesPass) +
    conditional(filterRefCalls, "-filterRefCalls")
Peter van 't Hof's avatar
Peter van 't Hof committed
46
47
48
49
50
51
52
53
54
}

object VcfFilter {
  var inputVcf: File = _
  var outputVcf: File = _
  var minSampleDepth = -1
  var minTotalDepth = -1
  var minAlternateDepth = -1
  var minSamplesPass = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
55
  var filterRefCalls = false
Peter van 't Hof's avatar
Peter van 't Hof committed
56
57
58
59
60
61
62
63
64
65
66
67
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
    for (t <- 0 until args.size) {
      args(t) match {
        case "-I" => inputVcf =  new File(args(t+1))
        case "-o" => outputVcf = new File(args(t+1))
        case "-minSampleDepth" => minSampleDepth = args(t+1).toInt
        case "-minTotalDepth" => minTotalDepth = args(t+1).toInt
        case "-minAlternateDepth" => minAlternateDepth = args(t+1).toInt
        case "-minSamplesPass" => minSamplesPass = args(t+1).toInt
Peter van 't Hof's avatar
Peter van 't Hof committed
68
        case "-filterRefCalls" => filterRefCalls = true
Peter van 't Hof's avatar
Peter van 't Hof committed
69
70
71
72
73
74
        case _ =>
      }
    }
    if (inputVcf == null) throw new IllegalStateException("No inputVcf, use -I")
    if (outputVcf == null) throw new IllegalStateException("No outputVcf, use -o")
    
Peter van 't Hof's avatar
Peter van 't Hof committed
75
    val reader = new VCFFileReader(inputVcf, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(outputVcf).build)
    writer.writeHeader(reader.getFileHeader)
Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
80
    for (record <- reader) {
      val genotypes = for (genotype <- record.getGenotypes) yield {
        genotype.getDP >= minSampleDepth && 
Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
        List(genotype.getAD:_*).tail.count(_ >= minAlternateDepth) > 0 &&
        !(filterRefCalls && genotype.isHomRef)
Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
      }
      
Peter van 't Hof's avatar
Peter van 't Hof committed
85
      if (record.getAttributeAsInt("DP", -1) >= minTotalDepth && genotypes.count(_ == true) >= minSamplesPass)
Peter van 't Hof's avatar
Peter van 't Hof committed
86
87
88
89
90
91
        writer.add(record)
    }
    reader.close
    writer.close
  }
}