ValidateVcf.scala 2.29 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
package nl.lumc.sasc.biopet.tools

import java.io.File

import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.utils.ToolCommand
import nl.lumc.sasc.biopet.utils.intervals.{ BedRecord, BedRecordList }

import scala.collection.JavaConversions._

/**
  * Created by pjvanthof on 10/12/2016.
  */
object ValidateVcf extends ToolCommand {
  case class Args(inputVcf: File = null,
                  reference: File = null,
                  failOnError: Boolean = true) extends AbstractArgs

  class OptParser extends AbstractOptParser {
    opt[File]('i', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
      c.copy(inputVcf = x)
    } text "Vcf file to check"
    opt[File]('R', "reference") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
      c.copy(reference = x)
    } text "Reference fasta to check vcf file against"
    opt[Unit]("disableFail") maxOccurs 1 valueName "<file>" action { (x, c) =>
      c.copy(failOnError = false)
    } text "Disable failing, tool still stops at the first error but exitcode 0 is given"
  }

  def main(args: Array[String]): Unit = {
    logger.info("Start")

    val argsParser = new OptParser
    val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)

    val regions = BedRecordList.fromReference(cmdArgs.reference)

    val vcfReader = new VCFFileReader(cmdArgs.inputVcf, true)

    try {
      for (record <- vcfReader.iterator()) {
        val contig = record.getContig
        require(regions.chrRecords.contains(contig),
          s"contig in vcf file is not on reference: ${contig}")
        val start = record.getStart
        val end = record.getEnd
        require(regions.chrRecords(contig).exists(_.overlapWith(BedRecord("contig", start, start))),
          s"Position does not exist on reference: ${contig}:$start")
        if (end != start) require(regions.chrRecords(contig).exists(_.overlapWith(BedRecord("contig", end, end))),
          s"Position does not exist on reference: ${contig}:$end")
        require(start <= end, "End is higher then Start, this should not be possible")
      }
    } catch {
      case e: IllegalArgumentException =>
        if (cmdArgs.failOnError) throw e
        else logger.warn(e.getMessage)
    }

    vcfReader.close()

    logger.info("No error found")
  }
}