VcfFilter.scala 4.89 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
2
3
4
5
6

import htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder
import htsjdk.variant.vcf.VCFFileReader
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import java.util.ArrayList
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
9
import nl.lumc.sasc.biopet.core.ToolCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.JavaConversions._

class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction {
  javaMainClass = getClass.getName

  @Input(doc = "Input vcf", shortName = "I", required = true)
  var inputVcf: File = _
    
  @Output(doc = "Output vcf", shortName = "o", required = false)
  var outputVcf: File = _
  
  var minSampleDepth: Option[Int] = _
  var minTotalDepth: Option[Int] = _
  var minAlternateDepth: Option[Int] = _
  var minSamplesPass: Option[Int] = _
Peter van 't Hof's avatar
Peter van 't Hof committed
27
  var filterRefCalls: Boolean = _
Peter van 't Hof's avatar
Peter van 't Hof committed
28
29
30
31
32
33
34
35
36
  
  override val defaultVmem = "8G"
  memoryLimit = Option(4.0)
  
  override def afterGraph {
    minSampleDepth = config("min_sample_depth")
    minTotalDepth = config("min_total_depth")
    minAlternateDepth = config("min_alternate_depth")
    minSamplesPass = config("min_samples_pass")
Peter van 't Hof's avatar
Peter van 't Hof committed
37
    filterRefCalls = config("filter_ref_calls")
Peter van 't Hof's avatar
Peter van 't Hof committed
38
39
40
41
42
43
44
45
  }
  
  override def commandLine = super.commandLine + 
    required("-I", inputVcf) + 
    required("-o", outputVcf) +
    optional("-minSampleDepth", minSampleDepth) +
    optional("-minTotalDepth", minTotalDepth) +
    optional("-minAlternateDepth", minAlternateDepth) + 
Peter van 't Hof's avatar
Peter van 't Hof committed
46
47
    optional("-minSamplesPass", minSamplesPass) +
    conditional(filterRefCalls, "-filterRefCalls")
Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
}

Peter van 't Hof's avatar
Peter van 't Hof committed
50
51
object VcfFilter extends ToolCommand {
  case class Args (inputVcf:File = null, outputVcf:File = null, minSampleDepth: Int = -1, minTotalDepth: Int = -1,
Peter van 't Hof's avatar
Peter van 't Hof committed
52
                   minAlternateDepth: Int = -1, minSamplesPass: Int = 0, minBamAlternateDepth: Int = 0, filterRefCalls: Boolean = false) extends AbstractArgs
Peter van 't Hof's avatar
Peter van 't Hof committed
53
54
55
56
57
58
59
60
61
62
63
64
65
66

  class OptParser extends AbstractOptParser {
    opt[File]('I', "inputVcf") required() maxOccurs(1) valueName("<file>") action { (x, c) =>
      c.copy(inputVcf = x) }
    opt[File]('o', "outputVcf") required() maxOccurs(1) valueName("<file>") action { (x, c) =>
      c.copy(outputVcf = x) } text("output file, default to stdout")
    opt[Int]("minSampleDepth") unbounded() action { (x, c) =>
      c.copy(minSampleDepth = x ) }
    opt[Int]("minTotalDepth") unbounded() action { (x, c) =>
      c.copy(minTotalDepth = x ) }
    opt[Int]("minAlternateDepth") unbounded() action { (x, c) =>
      c.copy(minAlternateDepth = x) }
    opt[Int]("minSamplesPass") unbounded() action { (x, c) =>
      c.copy(minSamplesPass = x) }
Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
    opt[Int]("minBamAlternateDepth") unbounded() action { (x, c) =>
      c.copy(minBamAlternateDepth = x) }
Peter van 't Hof's avatar
Peter van 't Hof committed
69
70
71
72
    opt[Boolean]("filterRefCalls") unbounded() action { (x, c) => 
      c.copy(filterRefCalls = x) }
  }
  
Peter van 't Hof's avatar
Peter van 't Hof committed
73
74
75
76
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
    val argsParser = new OptParser
    val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
Peter van 't Hof's avatar
Peter van 't Hof committed
79
    
Peter van 't Hof's avatar
Peter van 't Hof committed
80
    val reader = new VCFFileReader(commandArgs.inputVcf, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
81
    val header = reader.getFileHeader
Peter van 't Hof's avatar
Peter van 't Hof committed
82
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputVcf).build)
Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
85
86
87
    writer.writeHeader(header)
    
    val bamADFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-AD-")) yield line.getID).toList
    val bamDPFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-DP-")) yield line.getID).toList
        
Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
    for (record <- reader) {
      val genotypes = for (genotype <- record.getGenotypes) yield {
Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
92
93
        val DP = if (genotype.hasDP) genotype.getDP else -1
        val AD = if (genotype.hasAD) List(genotype.getAD:_*) else Nil
        DP >= commandArgs.minSampleDepth && 
        (if (!AD.isEmpty) AD.tail.count(_ >= commandArgs.minAlternateDepth) > 0 else true) &&
Peter van 't Hof's avatar
Peter van 't Hof committed
94
        !(commandArgs.filterRefCalls && genotype.isHomRef)
Peter van 't Hof's avatar
Peter van 't Hof committed
95
96
      }
      
Peter van 't Hof's avatar
Peter van 't Hof committed
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
      val bamADvalues = (for (field <- bamADFields) yield {
        record.getAttribute(field, new ArrayList) match {
          case t:ArrayList[_] if t.length > 1 => {
              for (i <- 1 until t.size) yield {
                t(i) match {
                  case a:Int => a > commandArgs.minBamAlternateDepth
                  case a:String => a.toInt > commandArgs.minBamAlternateDepth
                  case _ => false
                }
              }
          }
          case _ => List(false)
        }
      }).flatten
          
      if (record.getAttributeAsInt("DP", -1) >= commandArgs.minTotalDepth && 
          genotypes.count(_ == true) >= commandArgs.minSamplesPass &&
          bamADvalues.count(_ == true) >= commandArgs.minSamplesPass)
Peter van 't Hof's avatar
Peter van 't Hof committed
115
116
117
118
119
120
        writer.add(record)
    }
    reader.close
    writer.close
  }
}