GvcfToBed.scala 3.23 KB
Newer Older
Sander Bollen's avatar
Sander Bollen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
package nl.lumc.sasc.biopet.tools

import java.io.{ PrintWriter, File }

import htsjdk.variant.variantcontext.VariantContext
import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.utils.{ ToolCommand, VcfUtils }
import nl.lumc.sasc.biopet.utils.intervals.BedRecord
import scala.collection.JavaConversions._

/**
 * Created by ahbbollen on 13-10-15.
 * Create bed track from genome quality values in (g)VCF
 */
object GvcfToBed extends ToolCommand {

  case class Args(inputVcf: File = null,
                  outputBed: File = null,
                  sample: Option[String] = None,
                  minGenomeQuality: Int = 0,
                  inverse: Boolean = false) extends AbstractArgs

  class OptParser extends AbstractOptParser {
    opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
      c.copy(inputVcf = x)
    } text "Input vcf file"
    opt[File]('O', "outputBed") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
      c.copy(outputBed = x)
    } text "Output bed file"
    opt[String]('S', "sample") unbounded () maxOccurs 1 valueName "<sample>" action { (x, c) =>
      c.copy(sample = Some(x))
    } text "Sample to consider. Will take first sample on alphabetical order by default"
    opt[Int]("minGenomeQuality") unbounded () maxOccurs 1 valueName "<int>" action { (x, c) =>
      c.copy(minGenomeQuality = x)
    } text "Minimum genome quality to consider"
    opt[Boolean]("invert") unbounded () maxOccurs 1 valueName "<boolean>" action { (x, c) =>
      c.copy(inverse = x)
    } text "Invert filter (i.e. emit only records NOT passing filter)"
  }

  def main(args: Array[String]): Unit = {
    val argsParser = new OptParser
    val cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)

    logger.debug("Opening reader")
    val reader = new VCFFileReader(cmdArgs.inputVcf, false)
    logger.debug("Opening writer")
    val writer = new PrintWriter(cmdArgs.outputBed)

    var counter = 0

    logger.info("Start")
    for (r <- reader) {
      if (counter % 100000 == 0) {
        logger.info(s"Processed $counter records")
      }
      counter += 1
      if (!hasMinGenomeQuality(r, cmdArgs.sample, cmdArgs.minGenomeQuality) && cmdArgs.inverse) {
        writer.println(createBedRecord(r).toString)
      } else if (hasMinGenomeQuality(r, cmdArgs.sample, cmdArgs.minGenomeQuality) && !cmdArgs.inverse) {
        writer.println(createBedRecord(r).toString)
      }
    }

    logger.debug("Closing writer")
    writer.close()
    logger.debug("Closing reader")
    reader.close()

    logger.info("Finished!")
  }

  /**
   * Check whether record has minimum genome qality
   * @param record variant context
   * @param sample Option[String] with sample name
   * @param minGQ minimum genome quality value
   * @return
   */
  def hasMinGenomeQuality(record: VariantContext, sample: Option[String], minGQ: Int): Boolean = {
    val gt = record.getGenotype(sample.getOrElse(record.getSampleNamesOrderedByName.head))
    gt.hasGQ && gt.getGQ >= minGQ
  }

  /**
   * Create bed record from variantcontext
   * @param record variant context
   * @return BedRecord
   */
  def createBedRecord(record: VariantContext): BedRecord = {
    new BedRecord(record.getContig, record.getStart, record.getEnd)
  }

}