VcfFilter.scala 4.93 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
2
3
4
5
6

import htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder
import htsjdk.variant.vcf.VCFFileReader
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import java.util.ArrayList
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
9
import nl.lumc.sasc.biopet.core.ToolCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
10
11
12
13
14
15
16
17
18
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.JavaConversions._

class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction {
  javaMainClass = getClass.getName

  @Input(doc = "Input vcf", shortName = "I", required = true)
  var inputVcf: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
21
  @Output(doc = "Output vcf", shortName = "o", required = false)
  var outputVcf: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
22

Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
25
26
  var minSampleDepth: Option[Int] = _
  var minTotalDepth: Option[Int] = _
  var minAlternateDepth: Option[Int] = _
  var minSamplesPass: Option[Int] = _
Peter van 't Hof's avatar
Peter van 't Hof committed
27
  var filterRefCalls: Boolean = _
Peter van 't Hof's avatar
Peter van 't Hof committed
28

Peter van 't Hof's avatar
Peter van 't Hof committed
29
30
  override val defaultVmem = "8G"
  memoryLimit = Option(4.0)
Peter van 't Hof's avatar
Peter van 't Hof committed
31

Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
34
35
36
  override def afterGraph {
    minSampleDepth = config("min_sample_depth")
    minTotalDepth = config("min_total_depth")
    minAlternateDepth = config("min_alternate_depth")
    minSamplesPass = config("min_samples_pass")
Peter van 't Hof's avatar
Peter van 't Hof committed
37
    filterRefCalls = config("filter_ref_calls")
Peter van 't Hof's avatar
Peter van 't Hof committed
38
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
39
40
41

  override def commandLine = super.commandLine +
    required("-I", inputVcf) +
Peter van 't Hof's avatar
Peter van 't Hof committed
42
    required("-o", outputVcf) +
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
    optional("--minSampleDepth", minSampleDepth) +
    optional("--minTotalDepth", minTotalDepth) +
Peter van 't Hof's avatar
Peter van 't Hof committed
45
    optional("--minAlternateDepth", minAlternateDepth) +
Peter van 't Hof's avatar
Peter van 't Hof committed
46
47
    optional("--minSamplesPass", minSamplesPass) +
    conditional(filterRefCalls, "--filterRefCalls")
Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
}

Peter van 't Hof's avatar
Peter van 't Hof committed
50
object VcfFilter extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
51
52
  case class Args(inputVcf: File = null, outputVcf: File = null, minSampleDepth: Int = -1, minTotalDepth: Int = -1,
                  minAlternateDepth: Int = -1, minSamplesPass: Int = 0, minBamAlternateDepth: Int = 0, filterRefCalls: Boolean = false) extends AbstractArgs
Peter van 't Hof's avatar
Peter van 't Hof committed
53
54

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
    opt[File]('I', "inputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
      c.copy(inputVcf = x)
    }
    opt[File]('o', "outputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
      c.copy(outputVcf = x)
    } text ("output file, default to stdout")
    opt[Int]("minSampleDepth") unbounded () action { (x, c) =>
      c.copy(minSampleDepth = x)
    }
    opt[Int]("minTotalDepth") unbounded () action { (x, c) =>
      c.copy(minTotalDepth = x)
    }
    opt[Int]("minAlternateDepth") unbounded () action { (x, c) =>
      c.copy(minAlternateDepth = x)
    }
    opt[Int]("minSamplesPass") unbounded () action { (x, c) =>
      c.copy(minSamplesPass = x)
    }
    opt[Int]("minBamAlternateDepth") unbounded () action { (x, c) =>
      c.copy(minBamAlternateDepth = x)
    }
    opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
      c.copy(filterRefCalls = true)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
79
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
83
84
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
    val argsParser = new OptParser
    val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
Peter van 't Hof's avatar
Peter van 't Hof committed
87

Peter van 't Hof's avatar
Peter van 't Hof committed
88
    val reader = new VCFFileReader(commandArgs.inputVcf, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
89
    val header = reader.getFileHeader
Peter van 't Hof's avatar
Peter van 't Hof committed
90
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputVcf).build)
Peter van 't Hof's avatar
Peter van 't Hof committed
91
    writer.writeHeader(header)
Peter van 't Hof's avatar
Peter van 't Hof committed
92

Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
    val bamADFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-AD-")) yield line.getID).toList
    val bamDPFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-DP-")) yield line.getID).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
95

Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
    for (record <- reader) {
      val genotypes = for (genotype <- record.getGenotypes) yield {
Peter van 't Hof's avatar
Peter van 't Hof committed
98
        val DP = if (genotype.hasDP) genotype.getDP else -1
Peter van 't Hof's avatar
Peter van 't Hof committed
99
100
101
102
        val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil
        DP >= commandArgs.minSampleDepth &&
          (if (!AD.isEmpty) AD.tail.count(_ >= commandArgs.minAlternateDepth) > 0 else true) &&
          !(commandArgs.filterRefCalls && genotype.isHomRef)
Peter van 't Hof's avatar
Peter van 't Hof committed
103
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
104

Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
      val bamADvalues = (for (field <- bamADFields) yield {
        record.getAttribute(field, new ArrayList) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
107
108
109
110
111
112
          case t: ArrayList[_] if t.length > 1 => {
            for (i <- 1 until t.size) yield {
              t(i) match {
                case a: Int    => a > commandArgs.minBamAlternateDepth
                case a: String => a.toInt > commandArgs.minBamAlternateDepth
                case _         => false
Peter van 't Hof's avatar
Peter van 't Hof committed
113
              }
Peter van 't Hof's avatar
Peter van 't Hof committed
114
            }
Peter van 't Hof's avatar
Peter van 't Hof committed
115
116
117
118
          }
          case _ => List(false)
        }
      }).flatten
Peter van 't Hof's avatar
Peter van 't Hof committed
119
120
121
122

      if (record.getAttributeAsInt("DP", -1) >= commandArgs.minTotalDepth &&
        genotypes.count(_ == true) >= commandArgs.minSamplesPass &&
        (commandArgs.minBamAlternateDepth <= 0 || bamADvalues.count(_ == true) >= commandArgs.minSamplesPass))
Peter van 't Hof's avatar
Peter van 't Hof committed
123
124
125
126
127
128
        writer.add(record)
    }
    reader.close
    writer.close
  }
}