MpileupToVcf.scala 9.64 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
17
18
19

import java.io.File
import java.io.PrintWriter
20
import htsjdk.samtools.SamReaderFactory
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.{ Reference, ToolCommandFuntion, BiopetJavaCommandLineFunction, ToolCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.core.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup
24
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
26
import scala.collection.mutable.ArrayBuffer
Peter van 't Hof's avatar
Peter van 't Hof committed
27
28
import scala.io.Source
import scala.math.round
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import scala.math.floor
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import scala.collection.JavaConversions._
Peter van 't Hof's avatar
Peter van 't Hof committed
31

Peter van 't Hof's avatar
Peter van 't Hof committed
32
class MpileupToVcf(val root: Configurable) extends ToolCommandFuntion with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
33
  javaMainClass = getClass.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35
  @Input(doc = "Input mpileup file", shortName = "mpileup", required = false)
36
  var inputMpileup: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  @Input(doc = "Input bam file", shortName = "bam", required = false)
39
  var inputBam: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
40

Peter van 't Hof's avatar
Peter van 't Hof committed
41
42
  @Output(doc = "Output tag library", shortName = "output", required = true)
  var output: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
45
46
47

  var minDP: Option[Int] = config("min_dp")
  var minAP: Option[Int] = config("min_ap")
  var homoFraction: Option[Double] = config("homoFraction")
  var ploidy: Option[Int] = config("ploidy")
Peter van 't Hof's avatar
Peter van 't Hof committed
48
  var sample: String = _
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  var reference: String = _
Peter van 't Hof's avatar
Peter van 't Hof committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  override val defaultCoreMemory = 3.0
Peter van 't Hof's avatar
Peter van 't Hof committed
52

53
54
  override def defaults = ConfigUtils.mergeMaps(Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1)),
    super.defaults)
Peter van 't Hof's avatar
Peter van 't Hof committed
55

56
57
  override def beforeGraph {
    super.beforeGraph
Peter van 't Hof's avatar
Peter van 't Hof committed
58
    reference = referenceFasta().getAbsolutePath
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
    val samtoolsMpileup = new SamtoolsMpileup(this)
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
61

62
63
64
65
66
67
68
69
70
71
  override def beforeCmd: Unit = {
    if (sample == null && inputBam.exists()) {
      val inputSam = SamReaderFactory.makeDefault.open(inputBam)
      val readGroups = inputSam.getFileHeader.getReadGroups
      val samples = readGroups.map(readGroup => readGroup.getSample).distinct
      sample = samples.head
      inputSam.close
    }
  }

72
73
74
  override def commandLine = {
    (if (inputMpileup == null) {
      val samtoolsMpileup = new SamtoolsMpileup(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
      samtoolsMpileup.input = List(inputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
76
      samtoolsMpileup.cmdPipe + " | "
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
79
80
    } else "") +
      super.commandLine +
      required("-o", output) +
      optional("--minDP", minDP) +
Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
83
      optional("--minAP", minAP) +
      optional("--homoFraction", homoFraction) +
      optional("--ploidy", ploidy) +
Peter van 't Hof's avatar
Peter van 't Hof committed
84
      required("--sample", sample) +
85
86
      (if (inputBam == null) required("-I", inputMpileup) else "")
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
}

Peter van 't Hof's avatar
Peter van 't Hof committed
89
object MpileupToVcf extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
  case class Args(input: File = null, output: File = null, sample: String = null, minDP: Int = 8, minAP: Int = 2,
                  homoFraction: Double = 0.8, ploidy: Int = 2) extends AbstractArgs
Peter van 't Hof's avatar
Peter van 't Hof committed
92
93

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
96
97
98
99
100
101
102
    opt[File]('I', "input") valueName ("<file>") action { (x, c) =>
      c.copy(input = x)
    } text ("input, default is stdin")
    opt[File]('o', "output") required () valueName ("<file>") action { (x, c) =>
      c.copy(output = x)
    } text ("out is a required file property")
    opt[String]('s', "sample") required () action { (x, c) =>
      c.copy(sample = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
103
    opt[Int]("minDP") action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
      c.copy(minDP = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
106
    opt[Int]("minAP") action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
107
108
      c.copy(minAP = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
109
    opt[Double]("homoFraction") action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
      c.copy(homoFraction = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
112
    opt[Int]("ploidy") action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
      c.copy(ploidy = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
115
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
116

Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
119
120
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
    val argsParser = new OptParser
    val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
Peter van 't Hof's avatar
Peter van 't Hof committed
123

Peter van 't Hof's avatar
Peter van 't Hof committed
124
    import scala.collection.mutable.Map
Peter van 't Hof's avatar
Peter van 't Hof committed
125
    if (commandArgs.input != null && !commandArgs.input.exists) throw new IllegalStateException("Input file does not exist")
Peter van 't Hof's avatar
Peter van 't Hof committed
126

Peter van 't Hof's avatar
Peter van 't Hof committed
127
    val writer = new PrintWriter(commandArgs.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
128
    writer.println("##fileformat=VCFv4.1")
Peter van 't Hof's avatar
Peter van 't Hof committed
129
    writer.println("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">")
130
    writer.println("##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">")
Peter van 't Hof's avatar
Peter van 't Hof committed
131
    writer.println("##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">")
132
133
    writer.println("##FORMAT=<ID=AD,Number=.,Type=Integer,Description=\"Total Allele Depth\">")
    writer.println("##FORMAT=<ID=FREQ,Number=A,Type=Float,Description=\"Allele Frequency\">")
Peter van 't Hof's avatar
Peter van 't Hof committed
134
135
    writer.println("##FORMAT=<ID=RFC,Number=1,Type=Integer,Description=\"Reference Forward Reads\">")
    writer.println("##FORMAT=<ID=RRC,Number=1,Type=Integer,Description=\"Reference Reverse Reads\">")
136
137
    writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternetive Forward Reads\">")
    writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternetive Reverse Reads\">")
Peter van 't Hof's avatar
Peter van 't Hof committed
138
    writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">")
Peter van 't Hof's avatar
Peter van 't Hof committed
139
    writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample)
140
141
142
143
144
145
    val inputStream = if (commandArgs.input != null) {
      Source.fromFile(commandArgs.input).getLines
    } else {
      logger.info("No input file as argument, waiting on stdin")
      Source.stdin.getLines
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
146
147
148
    class Counts(var forward: Int, var reverse: Int)
    for (
      line <- inputStream;
Peter van 't Hof's avatar
Peter van 't Hof committed
149
      values = line.split("\t");
Peter van 't Hof's avatar
Peter van 't Hof committed
150
151
      if values.size > 5
    ) {
Peter van 't Hof's avatar
Peter van 't Hof committed
152
153
      val chr = values(0)
      val pos = values(1)
154
155
156
      val ref = values(2) match {
        case "A" | "T" | "G" | "C" => values(2)
        case "a" | "t" | "g" | "c" => values(2).toUpperCase
Peter van 't Hof's avatar
Peter van 't Hof committed
157
        case "U" | "u"             => "T"
158
159
        case _                     => "N"
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
162
      val reads = values(3).toInt
      val mpileup = values(4)
      val qual = values(5)
Peter van 't Hof's avatar
Peter van 't Hof committed
163
164
165
166

      val counts: Map[String, Counts] = Map(ref.toUpperCase -> new Counts(0, 0))

      def addCount(s: String) {
Peter van 't Hof's avatar
Peter van 't Hof committed
167
        val upper = s.toUpperCase
Peter van 't Hof's avatar
Peter van 't Hof committed
168
        if (!counts.contains(upper)) counts += upper -> new Counts(0, 0)
Peter van 't Hof's avatar
Peter van 't Hof committed
169
170
171
        if (s(0).isLower) counts(upper).reverse += 1
        else counts(upper).forward += 1
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
172

Peter van 't Hof's avatar
Peter van 't Hof committed
173
      var t = 0
174
      var dels = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
175
      while (t < mpileup.size) {
Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
        mpileup(t) match {
          case ',' => {
Peter van 't Hof's avatar
Peter van 't Hof committed
178
179
            addCount(ref.toLowerCase)
            t += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
180
181
          }
          case '.' => {
Peter van 't Hof's avatar
Peter van 't Hof committed
182
183
            addCount(ref.toUpperCase)
            t += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
186
          }
          case '^' => t += 2
          case '$' => t += 1
187
          case '*' => {
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
            dels += 1
            t += 1
190
          }
Peter van 't Hof's avatar
Peter van 't Hof committed
191
          case '+' | '-' => {
Peter van 't Hof's avatar
Peter van 't Hof committed
192
193
194
195
196
            t += 1
            var size = ""
            var insert = ""
            while (mpileup(t).isDigit) {
              size += mpileup(t)
Peter van 't Hof's avatar
Peter van 't Hof committed
197
              t += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
198
199
200
            }
            for (c <- t until t + size.toInt) insert = insert + mpileup(c)
            t += size.toInt
Peter van 't Hof's avatar
Peter van 't Hof committed
201
          }
Peter van 't Hof's avatar
Peter van 't Hof committed
202
203
204
          case 'a' | 'c' | 't' | 'g' | 'A' | 'C' | 'T' | 'G' => {
            addCount(mpileup(t).toString)
            t += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
205
          }
Peter van 't Hof's avatar
Peter van 't Hof committed
206
          case _ => t += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
207
208
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
209

210
211
212
213
214
215
      val info: ArrayBuffer[String] = ArrayBuffer("DP=" + reads)
      val format: Map[String, String] = Map("DP" -> reads.toString)
      val alt: ArrayBuffer[String] = new ArrayBuffer
      format += ("RFC" -> counts(ref.toUpperCase).forward.toString)
      format += ("RRC" -> counts(ref.toUpperCase).reverse.toString)
      format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString)
Peter van 't Hof's avatar
Peter van 't Hof committed
216
      if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward + value.reverse >= commandArgs.minAP) {
217
218
        alt += key
        format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString))
Peter van 't Hof's avatar
Peter van 't Hof committed
219
220
221
222
        format += ("AFC" -> ((if (format.contains("AFC")) format("AFC") + "," else "") + value.forward))
        format += ("ARC" -> ((if (format.contains("ARC")) format("ARC") + "," else "") + value.reverse))
        format += ("FREQ" -> ((if (format.contains("FREQ")) format("FREQ") + "," else "") +
          round((value.forward + value.reverse).toDouble / reads * 1E4).toDouble / 1E2))
223
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
224

225
      if (alt.size > 0) {
Peter van 't Hof's avatar
Peter van 't Hof committed
226
        val ad = for (ad <- format("AD").split(",")) yield ad.toInt
227
        var left = reads - dels
Peter van 't Hof's avatar
Peter van 't Hof committed
228
        val gt = ArrayBuffer[Int]()
Peter van 't Hof's avatar
Peter van 't Hof committed
229

Peter van 't Hof's avatar
Peter van 't Hof committed
230
        for (p <- 0 to alt.size if gt.size < commandArgs.ploidy) {
Peter van 't Hof's avatar
Peter van 't Hof committed
231
232
          var max = -1
          for (a <- 0 until ad.length if ad(a) > (if (max >= 0) ad(max) else -1) && !gt.exists(_ == a)) max = a
Peter van 't Hof's avatar
Peter van 't Hof committed
233
          val f = ad(max).toDouble / left
Peter van 't Hof's avatar
Peter van 't Hof committed
234
235
236
          for (a <- 0 to floor(f).toInt if gt.size < commandArgs.ploidy) gt.append(max)
          if (f - floor(f) >= commandArgs.homoFraction) {
            for (b <- p to commandArgs.ploidy if gt.size < commandArgs.ploidy) gt.append(max)
Peter van 't Hof's avatar
Peter van 't Hof committed
237
238
239
          }
          left -= ad(max)
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
240
241
242
        writer.println(Array(chr, pos, ".", ref.toUpperCase, alt.mkString(","), ".", ".", info.mkString(";"),
          "GT:" + format.keys.mkString(":"), gt.sortWith(_ < _).mkString("/") + ":" + format.values.mkString(":")
        ).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
243
244
245
246
247
      }
    }
    writer.close
  }
}