VcfWithVcf.scala 4.09 KB
Newer Older
Sander Bollen's avatar
Sander Bollen committed
1
2
3
4
5
6
package nl.lumc.sasc.biopet.tools

import java.io.File

import scala.collection.JavaConversions._
import htsjdk.variant.variantcontext.{ VariantContextBuilder, VariantContext }
7
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
Sander Bollen's avatar
Sander Bollen committed
8
9
10
import htsjdk.variant.vcf._
import nl.lumc.sasc.biopet.core.ToolCommand

Sander Bollen's avatar
Sander Bollen committed
11
12
import scala.collection.immutable

Sander Bollen's avatar
Sander Bollen committed
13
14
15
/**
 * Created by ahbbollen on 11-2-15.
 */
16
object VcfWithVcf extends ToolCommand {
17
18
19
20
  case class Fields(inputField: String, outputField: String)

  case class Args(inputFile: File = null,
                  outputFile: File = null,
21
                  secondaryVcf: File = null,
22
23
                  fields: List[Fields] = Nil,
                  matchAllele: Boolean = true) extends AbstractArgs
Sander Bollen's avatar
Sander Bollen committed
24
25
26
27
28
29
30
31

  class OptParser extends AbstractOptParser {
    opt[File]('I', "inputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
      c.copy(inputFile = x)
    }
    opt[File]('O', "outputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
      c.copy(outputFile = x)
    }
32
33
    opt[File]('S', "SecondaryVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
      c.copy(secondaryVcf = x)
Sander Bollen's avatar
Sander Bollen committed
34
    }
35
36
37
38
    opt[String]('f', "field") unbounded () valueName ("<input_field:output_field>") action { (x, c) =>
      val values = x.split(":")
      if (values.size > 1) c.copy(fields = Fields(values(0), values(1)) :: c.fields)
      else c.copy(fields = Fields(x, x) :: c.fields)
Sander Bollen's avatar
Sander Bollen committed
39
    }
40
    opt[Boolean]("match") valueName ("<Boolean>") maxOccurs (1) action { (x, c) =>
41
      c.copy(matchAllele = x)
42
    } text ("Match alternative alleles; default true")
Sander Bollen's avatar
Sander Bollen committed
43
44
45
46
47
48
49
  }

  def main(args: Array[String]): Unit = {
    val argsParser = new OptParser
    val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)

    val reader = new VCFFileReader(commandArgs.inputFile)
50
    val secondaryReader = new VCFFileReader(commandArgs.secondaryVcf)
Sander Bollen's avatar
Sander Bollen committed
51
52

    val header = reader.getFileHeader
53
    val secondHeader = secondaryReader.getFileHeader
Sander Bollen's avatar
Sander Bollen committed
54
55

    for (x <- commandArgs.fields) {
56
57
58
59
60
      if (header.hasInfoLine(x.outputField))
        throw new IllegalArgumentException("Field '" + x.outputField + "' already exist in input vcf")
      if (!secondHeader.hasInfoLine(x.inputField))
        throw new IllegalArgumentException("Field '" + x.inputField + "' does not exist in secondary vcf")

61
62
63
64
65
      val oldHeaderLine = secondHeader.getInfoHeaderLine(x.inputField)

      val newHeaderLine = new VCFInfoHeaderLine(x.outputField, VCFHeaderLineCount.UNBOUNDED,
        oldHeaderLine.getType, oldHeaderLine.getDescription)
      header.addMetaDataLine(newHeaderLine)
Sander Bollen's avatar
Sander Bollen committed
66
67
    }

Sander Bollen's avatar
Sander Bollen committed
68
69
    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().
      setOutputFile(commandArgs.outputFile).build())
Sander Bollen's avatar
Sander Bollen committed
70
    writer.writeHeader(header)
Sander Bollen's avatar
Sander Bollen committed
71
72
    var idx = 0

73
    for (record <- reader) {
74
      val secondaryRecords = if (commandArgs.matchAllele) {
75
76
77
78
79
80
        secondaryReader.query(record.getChr, record.getStart, record.getEnd).toList.
          filter(x => record.getAlternateAlleles.exists(x.hasAlternateAllele(_)))
      } else {
        secondaryReader.query(record.getChr, record.getStart, record.getEnd).toList
      }

81
82
83
84
85
86
87
88
89
90
91
      val fieldMap = (for (
        f <- commandArgs.fields;
        if secondaryRecords.exists(_.hasAttribute(f.inputField))
      ) yield {
        f.outputField -> (for (
          secondRecord <- secondaryRecords;
          if (secondRecord.hasAttribute(f.inputField))
        ) yield {
          secondRecord.getAttribute(f.inputField) match {
            case l: List[_] => l
            case x          => List(x)
Sander Bollen's avatar
Sander Bollen committed
92
          }
93
94
95
96
97
98
        }).fold(Nil)(_ ::: _)
      }).toMap

      writer.add(fieldMap.foldLeft(new VariantContextBuilder(record))((builder, attribute) => {
        builder.attribute(attribute._1, attribute._2.toArray)
      }).make())
99

Sander Bollen's avatar
Sander Bollen committed
100
      idx += 1
101
102
103
      if (idx % 100000 == 0) {
        logger.info(s"""Processed $idx records""")
      }
Sander Bollen's avatar
Sander Bollen committed
104
    }
105
    logger.info(s"""Processed $idx records""")
Sander Bollen's avatar
Sander Bollen committed
106

Sander Bollen's avatar
Sander Bollen committed
107
    logger.debug("Closing readers")
Sander Bollen's avatar
Sander Bollen committed
108
109
110
    writer.close()
    reader.close()
    secondaryReader.close()
Sander Bollen's avatar
Sander Bollen committed
111
    logger.info("Done. Goodbye")
Sander Bollen's avatar
Sander Bollen committed
112
113
114
  }

}