VcfFilter.scala 3 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
package nl.lumc.sasc.biopet.core.apps

import htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder
import htsjdk.variant.vcf.VCFFileReader
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.JavaConversions._

class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction {
  javaMainClass = getClass.getName

  @Input(doc = "Input vcf", shortName = "I", required = true)
  var inputVcf: File = _
    
  @Output(doc = "Output vcf", shortName = "o", required = false)
  var outputVcf: File = _
  
  var minSampleDepth: Option[Int] = _
  var minTotalDepth: Option[Int] = _
  var minAlternateDepth: Option[Int] = _
  var minSamplesPass: Option[Int] = _
  
  override val defaultVmem = "8G"
  memoryLimit = Option(4.0)
  
  override def afterGraph {
    minSampleDepth = config("min_sample_depth")
    minTotalDepth = config("min_total_depth")
    minAlternateDepth = config("min_alternate_depth")
    minSamplesPass = config("min_samples_pass")
  }
  
  override def commandLine = super.commandLine + 
    required("-I", inputVcf) + 
    required("-o", outputVcf) +
    optional("-minSampleDepth", minSampleDepth) +
    optional("-minTotalDepth", minTotalDepth) +
    optional("-minAlternateDepth", minAlternateDepth) + 
    optional("-minSamplesPass", minSamplesPass)
}

object VcfFilter {
  var inputVcf: File = _
  var outputVcf: File = _
  var minSampleDepth = -1
  var minTotalDepth = -1
  var minAlternateDepth = -1
  var minSamplesPass = 0
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
    for (t <- 0 until args.size) {
      args(t) match {
        case "-I" => inputVcf =  new File(args(t+1))
        case "-o" => outputVcf = new File(args(t+1))
        case "-minSampleDepth" => minSampleDepth = args(t+1).toInt
        case "-minTotalDepth" => minTotalDepth = args(t+1).toInt
        case "-minAlternateDepth" => minAlternateDepth = args(t+1).toInt
        case "-minSamplesPass" => minSamplesPass = args(t+1).toInt
        case _ =>
      }
    }
    if (inputVcf == null) throw new IllegalStateException("No inputVcf, use -I")
    if (outputVcf == null) throw new IllegalStateException("No outputVcf, use -o")
    
Peter van 't Hof's avatar
Peter van 't Hof committed
70
    val reader = new VCFFileReader(inputVcf, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(outputVcf).build)
    writer.writeHeader(reader.getFileHeader)
Peter van 't Hof's avatar
Peter van 't Hof committed
73
74
75
76
77
78
    for (record <- reader) {
      val genotypes = for (genotype <- record.getGenotypes) yield {
        genotype.getDP >= minSampleDepth && 
        List(genotype.getAD:_*).tail.count(_ >= minAlternateDepth) > 0
      }
      
Peter van 't Hof's avatar
Peter van 't Hof committed
79
      if (record.getAttributeAsInt("DP", -1) >= minTotalDepth && genotypes.count(_ == true) >= minSamplesPass)
Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
82
83
84
85
        writer.add(record)
    }
    reader.close
    writer.close
  }
}