BiopetFlagstat.scala 11.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
17

Peter van 't Hof's avatar
Peter van 't Hof committed
18
19
import java.io.{ File, PrintWriter }

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import htsjdk.samtools.{ SAMRecord, SamReaderFactory }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.{ ToolCommand, ConfigUtils }
Peter van 't Hof's avatar
Peter van 't Hof committed
22

Peter van 't Hof's avatar
Peter van 't Hof committed
23
import scala.collection.JavaConversions._
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import scala.collection.mutable
Peter van 't Hof's avatar
Peter van 't Hof committed
25

26
object BiopetFlagstat extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
27

28
29
30
31
  case class Args(inputFile: File = null,
                  outputFile: Option[File] = None,
                  summaryFile: Option[File] = None,
                  region: Option[String] = None) extends AbstractArgs
32
33

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
34
    opt[File]('I', "inputFile") required () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
35
      c.copy(inputFile = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
36
    } text "input bam file"
37
38
39
    opt[File]('o', "outputFile") valueName "<file>" action { (x, c) =>
      c.copy(outputFile = Some(x))
    } text "output file"
Peter van 't Hof's avatar
Peter van 't Hof committed
40
    opt[File]('s', "summaryFile") valueName "<file>" action { (x, c) =>
41
      c.copy(summaryFile = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
42
43
    } text "summary output file"
    opt[String]('r', "region") valueName "<chr:start-stop>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
44
      c.copy(region = Some(x))
45
    }
46
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
50
51
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
52
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
53
    val commandArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
Peter van 't Hof's avatar
Peter van 't Hof committed
54

Peter van 't Hof's avatar
Peter van 't Hof committed
55
    val inputSam = SamReaderFactory.makeDefault.open(commandArgs.inputFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
56
    val iterSam = if (commandArgs.region.isEmpty) inputSam.iterator else {
57
58
59
      val regionRegex = """(.*):(.*)-(.*)""".r
      commandArgs.region.get match {
        case regionRegex(chr, start, stop) => inputSam.query(chr, start.toInt, stop.toInt, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
60
        case _                             => sys.error("Region wrong format")
61
62
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
63

Peter van 't Hof's avatar
Peter van 't Hof committed
64
    val flagstatCollector = new FlagstatCollector
Peter van 't Hof's avatar
Peter van 't Hof committed
65
    flagstatCollector.loadDefaultFunctions()
Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
68
    val m = 10
    val max = 60
    for (t <- 0 to (max / m))
bow's avatar
bow committed
69
      flagstatCollector.addFunction("MAPQ>" + (t * m), record => record.getMappingQuality > (t * m))
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
72
    flagstatCollector.addFunction("First normal, second read inverted (paired end orientation)", record => {
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
73
74
75
76
        ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
79
      else false
    })
    flagstatCollector.addFunction("First normal, second read normal", record => {
Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
82
83
84
85
        ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
86
87
88
      else false
    })
    flagstatCollector.addFunction("First inverted, second read inverted", record => {
Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
91
92
93
94
        ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
95
96
97
      else false
    })
    flagstatCollector.addFunction("First inverted, second read normal", record => {
Peter van 't Hof's avatar
Peter van 't Hof committed
98
99
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
100
101
102
103
        ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
      else false
    })
106
    flagstatCollector.addFunction("Mate in same strand", record => record.getReadPairedFlag && record.getReadNegativeStrandFlag && record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
107
      record.getReferenceIndex == record.getMateReferenceIndex)
108
    flagstatCollector.addFunction("Mate on other chr", record => record.getReadPairedFlag && record.getReferenceIndex != record.getMateReferenceIndex)
Peter van 't Hof's avatar
Peter van 't Hof committed
109

110
    logger.info("Start reading file: " + commandArgs.inputFile)
111
    for (record <- iterSam) {
Peter van 't Hof's avatar
Peter van 't Hof committed
112
      if (flagstatCollector.readsCount % 1e6 == 0 && flagstatCollector.readsCount > 0)
113
        logger.info("Reads prosessed: " + flagstatCollector.readsCount)
Peter van 't Hof's avatar
Peter van 't Hof committed
114
115
      flagstatCollector.loadRecord(record)
    }
bow's avatar
bow committed
116

117
    commandArgs.summaryFile.foreach {
Peter van 't Hof's avatar
Peter van 't Hof committed
118
      case file =>
119
120
121
122
123
        val writer = new PrintWriter(file)
        writer.println(flagstatCollector.summary)
        writer.close()
    }

124
125
126
127
128
129
130
131
    commandArgs.outputFile match {
      case Some(file) => {
        val writer = new PrintWriter(file)
        writer.println(flagstatCollector.report)
        writer.close()
      }
      case _ => println(flagstatCollector.report)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
132
  }
bow's avatar
bow committed
133
134

  class FlagstatCollector {
Peter van 't Hof's avatar
Peter van 't Hof committed
135
136
    private var functionCount = 0
    var readsCount = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
137
    private val names: mutable.Map[Int, String] = mutable.Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
138
139
    private var functions: Array[SAMRecord => Boolean] = Array()
    private var totalCounts: Array[Long] = Array()
Peter van 't Hof's avatar
Peter van 't Hof committed
140
    private var crossCounts = Array.ofDim[Long](1, 1)
bow's avatar
bow committed
141

Peter van 't Hof's avatar
Peter van 't Hof committed
142
    def loadDefaultFunctions() {
Peter van 't Hof's avatar
Peter van 't Hof committed
143
144
145
      addFunction("All", record => true)
      addFunction("Mapped", record => !record.getReadUnmappedFlag)
      addFunction("Duplicates", record => record.getDuplicateReadFlag)
146
147
      addFunction("FirstOfPair", record => if (record.getReadPairedFlag) record.getFirstOfPairFlag else false)
      addFunction("SecondOfPair", record => if (record.getReadPairedFlag) record.getSecondOfPairFlag else false)
Peter van 't Hof's avatar
Peter van 't Hof committed
148
149

      addFunction("ReadNegativeStrand", record => record.getReadNegativeStrandFlag)
bow's avatar
bow committed
150

Peter van 't Hof's avatar
Peter van 't Hof committed
151
152
153
      addFunction("NotPrimaryAlignment", record => record.getNotPrimaryAlignmentFlag)

      addFunction("ReadPaired", record => record.getReadPairedFlag)
154
      addFunction("ProperPair", record => if (record.getReadPairedFlag) record.getProperPairFlag else false)
Peter van 't Hof's avatar
Peter van 't Hof committed
155

156
157
      addFunction("MateNegativeStrand", record => if (record.getReadPairedFlag) record.getMateNegativeStrandFlag else false)
      addFunction("MateUnmapped", record => if (record.getReadPairedFlag) record.getMateUnmappedFlag else false)
Peter van 't Hof's avatar
Peter van 't Hof committed
158
159
160
161
162

      addFunction("ReadFailsVendorQualityCheck", record => record.getReadFailsVendorQualityCheckFlag)
      addFunction("SupplementaryAlignment", record => record.getSupplementaryAlignmentFlag)
      addFunction("SecondaryOrSupplementary", record => record.isSecondaryOrSupplementary)
    }
bow's avatar
bow committed
163
164

    def loadRecord(record: SAMRecord) {
Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
      readsCount += 1
      val values: Array[Boolean] = new Array(names.size)
      for (t <- 0 until names.size) {
        values(t) = functions(t)(record)
        if (values(t)) {
          totalCounts(t) += 1
        }
      }
      for (t <- 0 until names.size) {
        for (t2 <- 0 until names.size) {
          if (values(t) && values(t2)) {
            crossCounts(t)(t2) += 1
          }
        }
      }
    }
bow's avatar
bow committed
181
182

    def addFunction(name: String, function: SAMRecord => Boolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
183
      functionCount += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
184
      crossCounts = Array.ofDim[Long](functionCount, functionCount)
Peter van 't Hof's avatar
Peter van 't Hof committed
185
186
      totalCounts = new Array[Long](functionCount)
      val temp = new Array[SAMRecord => Boolean](functionCount)
Peter van 't Hof's avatar
Peter van 't Hof committed
187
      for (t <- 0 until (temp.length - 1)) temp(t) = functions(t)
Peter van 't Hof's avatar
Peter van 't Hof committed
188
      functions = temp
bow's avatar
bow committed
189

Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
192
193
194
      val index = functionCount - 1
      names += (index -> name)
      functions(index) = function
      totalCounts(index) = 0
    }
bow's avatar
bow committed
195

Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
198
199
200
    def report: String = {
      val buffer = new StringBuilder
      buffer.append("Number\tTotal Flags\tFraction\tName\n")
      for (t <- 0 until names.size) {
        val precentage = (totalCounts(t).toFloat / readsCount) * 100
bow's avatar
bow committed
201
        buffer.append("#" + (t + 1) + "\t" + totalCounts(t) + "\t" + f"$precentage%.4f" + "%\t" + names(t) + "\n")
Peter van 't Hof's avatar
Peter van 't Hof committed
202
203
      }
      buffer.append("\n")
bow's avatar
bow committed
204

Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
      buffer.append(crossReport() + "\n")
      buffer.append(crossReport(fraction = true) + "\n")
bow's avatar
bow committed
207

Peter van 't Hof's avatar
Peter van 't Hof committed
208
      buffer.toString()
Peter van 't Hof's avatar
Peter van 't Hof committed
209
    }
bow's avatar
bow committed
210

211
212
213
    def summary: String = {
      val map = (for (t <- 0 until names.size) yield {
        names(t) -> totalCounts(t)
Peter van 't Hof's avatar
Peter van 't Hof committed
214
215
      }).toMap ++ Map("Singletons" -> crossCounts(names.find(_._2 == "Mapped").map(_._1).getOrElse(-1))(names.find(_._2 == "MateUnmapped").map(_._1).getOrElse(-1))
      )
216

Peter van 't Hof's avatar
Peter van 't Hof committed
217
      ConfigUtils.mapToJson(map).spaces4
218
219
    }

bow's avatar
bow committed
220
    def crossReport(fraction: Boolean = false): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
221
      val buffer = new StringBuilder
bow's avatar
bow committed
222

Peter van 't Hof's avatar
Peter van 't Hof committed
223
      for (t <- 0 until names.size) // Header for table
bow's avatar
bow committed
224
        buffer.append("\t#" + (t + 1))
Peter van 't Hof's avatar
Peter van 't Hof committed
225
      buffer.append("\n")
bow's avatar
bow committed
226

Peter van 't Hof's avatar
Peter van 't Hof committed
227
      for (t <- 0 until names.size) {
bow's avatar
bow committed
228
        buffer.append("#" + (t + 1) + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
229
230
231
232
233
234
        for (t2 <- 0 until names.size) {
          val reads = crossCounts(t)(t2)
          if (fraction) {
            val precentage = (reads.toFloat / totalCounts(t).toFloat) * 100
            buffer.append(f"$precentage%.4f" + "%")
          } else buffer.append(reads)
bow's avatar
bow committed
235
          if (t2 == names.size - 1) buffer.append("\n")
Peter van 't Hof's avatar
Peter van 't Hof committed
236
237
238
          else buffer.append("\t")
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
239
      buffer.toString()
Peter van 't Hof's avatar
Peter van 't Hof committed
240
241
    }
  } // End of class
bow's avatar
bow committed
242

Peter van 't Hof's avatar
Peter van 't Hof committed
243
}