VcfToTsv.scala 5.85 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/*
 * Copyright 2014 pjvan_thof.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package nl.lumc.sasc.biopet.tools

import htsjdk.variant.vcf.VCFFileReader
import java.io.File
import java.io.PrintStream
import nl.lumc.sasc.biopet.core.ToolCommand
import scala.collection.JavaConversions._
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import scala.collection.mutable.{ Map, ListBuffer }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
26
27
28
29
30

class VcfToTsv {
  // TODO: Queue wrapper
}

object VcfToTsv extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
33
  case class Args(inputFile: File = null, outputFile: File = null, fields: List[String] = Nil, infoFields: List[String] = Nil,
                  sampleFileds: List[String] = Nil, disableDefaults: Boolean = false,
                  allInfo: Boolean = false, allFormat: Boolean = false) extends AbstractArgs
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
    opt[File]('I', "inputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
      c.copy(inputFile = x)
    }
    opt[File]('o', "outputFile") maxOccurs (1) valueName ("<file>") action { (x, c) =>
      c.copy(outputFile = x)
    } text ("output file, default to stdout")
    opt[String]('f', "field") unbounded () action { (x, c) =>
      c.copy(fields = x :: c.fields)
    }
    opt[String]('i', "info_field") unbounded () action { (x, c) =>
      c.copy(infoFields = x :: c.infoFields)
    }
    opt[Unit]("all_info") unbounded () action { (x, c) =>
      c.copy(allInfo = true)
    }
    opt[Unit]("all_format") unbounded () action { (x, c) =>
      c.copy(allFormat = true)
    }
    opt[String]('s', "sample_field") unbounded () action { (x, c) =>
      c.copy(sampleFileds = x :: c.sampleFileds)
    }
    opt[Unit]('d', "disable_defaults") unbounded () action { (x, c) =>
      c.copy(disableDefaults = true)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
60
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
61

Peter van 't Hof's avatar
Peter van 't Hof committed
62
  val defaultFields = List("chr", "pos", "id", "ref", "alt", "qual")
Peter van 't Hof's avatar
Peter van 't Hof committed
63

Peter van 't Hof's avatar
Peter van 't Hof committed
64
65
66
  def main(args: Array[String]): Unit = {
    val argsParser = new OptParser
    val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
Peter van 't Hof's avatar
Peter van 't Hof committed
67

Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
70
    val reader = new VCFFileReader(commandArgs.inputFile, false)
    val header = reader.getFileHeader
    val samples = header.getSampleNamesInOrder
Peter van 't Hof's avatar
Peter van 't Hof committed
71

72
73
    val allInfoFields = header.getInfoHeaderLines.map(_.getID).toList
    val allFormatFields = header.getFormatHeaderLines.map(_.getID).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
74
75
76
77
78
79
80
81
82

    val fields: Set[String] = (if (commandArgs.disableDefaults) Nil else defaultFields).toSet[String] ++
      commandArgs.fields.toSet[String] ++
      (if (commandArgs.allInfo) allInfoFields else commandArgs.infoFields).map("INFO-" + _) ++ {
        val buffer: ListBuffer[String] = ListBuffer()
        for (f <- (if (commandArgs.allFormat) allFormatFields else commandArgs.sampleFileds); sample <- samples) {
          buffer += sample + "-" + f
        }
        buffer.toSet[String]
Peter van 't Hof's avatar
Peter van 't Hof committed
83
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
86
87

    val sortedFields = fields.toList.sortWith((a, b) => {
      val aT = if (a.startsWith("INFO-")) 'i' else if (samples.exists(x => a.startsWith(x + "-"))) 'f' else 'g'
      val bT = if (b.startsWith("INFO-")) 'i' else if (samples.exists(x => b.startsWith(x + "-"))) 'f' else 'g'
88
89
90
91
92
      if (aT == 'g' && bT == 'g') {
        val ai = defaultFields.indexOf(a)
        val bi = defaultFields.indexOf(b)
        if (bi < 0) true
        else ai <= bi
Peter van 't Hof's avatar
Peter van 't Hof committed
93
      } else if (aT == 'g') true
94
95
96
97
98
      else if (bT == 'g') false
      else if (aT == bT) (if (a.compareTo(b) > 0) false else true)
      else if (aT == 'i') true
      else false
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
99

Peter van 't Hof's avatar
Peter van 't Hof committed
100
    val witter = if (commandArgs.outputFile != null) new PrintStream(commandArgs.outputFile)
101
    else sys.process.stdout
Peter van 't Hof's avatar
Peter van 't Hof committed
102

103
    witter.println(sortedFields.mkString("#", "\t", ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106
107
108
109
110
111
112
113
    for (vcfRecord <- reader) {
      val values: Map[String, Any] = Map()
      values += "chr" -> vcfRecord.getChr
      values += "pos" -> vcfRecord.getStart
      values += "id" -> vcfRecord.getID
      values += "ref" -> vcfRecord.getReference.getBaseString
      values += "alt" -> {
        val t = for (a <- vcfRecord.getAlternateAlleles) yield a.getBaseString
        t.mkString(",")
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
114
      values += "qual" -> (if (vcfRecord.getPhredScaledQual == -10) "." else scala.math.round(vcfRecord.getPhredScaledQual * 100.0) / 100.0)
Peter van 't Hof's avatar
Peter van 't Hof committed
115
116
      values += "filter" -> vcfRecord.getFilters
      for ((field, content) <- vcfRecord.getAttributes) {
Peter van 't Hof's avatar
Peter van 't Hof committed
117
        values += "INFO-" + field -> {
Peter van 't Hof's avatar
Peter van 't Hof committed
118
          content match {
Peter van 't Hof's avatar
Peter van 't Hof committed
119
120
121
122
            case a: List[_]                => a.mkString(",")
            case a: Array[_]               => a.mkString(",")
            case a: java.util.ArrayList[_] => a.mkString(",")
            case _                         => content
Peter van 't Hof's avatar
Peter van 't Hof committed
123
124
125
          }
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
126

Peter van 't Hof's avatar
Peter van 't Hof committed
127
128
      for (sample <- samples) {
        val genotype = vcfRecord.getGenotype(sample)
Peter van 't Hof's avatar
Peter van 't Hof committed
129
        values += sample + "-GT" -> {
Peter van 't Hof's avatar
Peter van 't Hof committed
130
131
132
          val l = for (g <- genotype.getAlleles) yield vcfRecord.getAlleleIndex(g)
          l.map(x => if (x < 0) "." else x).mkString("/")
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
135
136
        if (genotype.hasAD) values += sample + "-AD" -> List(genotype.getAD: _*).mkString(",")
        if (genotype.hasDP) values += sample + "-DP" -> genotype.getDP
        if (genotype.hasGQ) values += sample + "-GQ" -> genotype.getGQ
        if (genotype.hasPL) values += sample + "-PL" -> List(genotype.getPL: _*).mkString(",")
Peter van 't Hof's avatar
Peter van 't Hof committed
137
        for ((field, content) <- genotype.getExtendedAttributes) {
Peter van 't Hof's avatar
Peter van 't Hof committed
138
          values += sample + "-" + field -> content
Peter van 't Hof's avatar
Peter van 't Hof committed
139
140
        }
      }
141
      val line = for (f <- sortedFields) yield {
Peter van 't Hof's avatar
Peter van 't Hof committed
142
143
        if (values.contains(f)) {
          values(f)
Peter van 't Hof's avatar
Peter van 't Hof committed
144
145
146
147
148
149
        } else ""
      }
      witter.println(line.mkString("\t"))
    }
  }
}