BiopetFlagstat.scala 11.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
17

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import htsjdk.samtools.{ SAMRecord, SamReaderFactory }
19
import java.io.{ PrintWriter, File }
20
import nl.lumc.sasc.biopet.core.{ ToolCommandFuntion, BiopetJavaCommandLineFunction, ToolCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.config.Configurable
22
23
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.utils.ConfigUtils
bow's avatar
bow committed
24
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
26
import scala.collection.JavaConversions._

27
class BiopetFlagstat(val root: Configurable) extends ToolCommandFuntion with Summarizable {
Peter van 't Hof's avatar
Peter van 't Hof committed
28
  javaMainClass = getClass.getName
bow's avatar
bow committed
29
30

  @Input(doc = "Input bam", shortName = "input", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
31
  var input: File = _
bow's avatar
bow committed
32
33

  @Output(doc = "Output flagstat", shortName = "output", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
34
  var output: File = _
bow's avatar
bow committed
35

36
37
38
  @Output(doc = "summary output file", shortName = "output", required = false)
  var summaryFile: File = _

Peter van 't Hof's avatar
Peter van 't Hof committed
39
  override val defaultCoreMemory = 2.0
bow's avatar
bow committed
40

41
42
43
44
  override def commandLine = super.commandLine + required("-I", input) + required("-s", summaryFile) + " > " + required(output)

  def summaryFiles: Map[String, File] = Map()

45
  def summaryStats: Map[String, Any] = {
46
47
    ConfigUtils.fileToConfigMap(summaryFile)
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
}

50
object BiopetFlagstat extends ToolCommand {
51
52
  import scala.collection.mutable.Map

53
  def apply(root: Configurable, input: File, outputDir: File): BiopetFlagstat = {
Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
56
    val flagstat = new BiopetFlagstat(root)
    flagstat.input = input
    flagstat.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".biopetflagstat")
57
    flagstat.summaryFile = new File(outputDir, input.getName.stripSuffix(".bam") + ".biopetflagstat.json")
58
    flagstat
Peter van 't Hof's avatar
Peter van 't Hof committed
59
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
60

61
  case class Args(inputFile: File = null, summaryFile: Option[File] = None, region: Option[String] = None) extends AbstractArgs
62
63

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
64
65
    opt[File]('I', "inputFile") required () valueName ("<file>") action { (x, c) =>
      c.copy(inputFile = x)
66
67
68
69
    } text ("input bam file")
    opt[File]('s', "summaryFile") valueName ("<file>") action { (x, c) =>
      c.copy(summaryFile = Some(x))
    } text ("summary output file")
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
    opt[String]('r', "region") valueName ("<chr:start-stop>") action { (x, c) =>
      c.copy(region = Some(x))
72
    }
73
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
74

Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
77
78
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
79
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
    val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)

Peter van 't Hof's avatar
Peter van 't Hof committed
82
    val inputSam = SamReaderFactory.makeDefault.open(commandArgs.inputFile)
83
84
85
86
    val iterSam = if (commandArgs.region == None) inputSam.iterator else {
      val regionRegex = """(.*):(.*)-(.*)""".r
      commandArgs.region.get match {
        case regionRegex(chr, start, stop) => inputSam.query(chr, start.toInt, stop.toInt, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
87
        case _                             => sys.error("Region wrong format")
88
89
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
90

Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
93
94
95
    val flagstatCollector = new FlagstatCollector
    flagstatCollector.loadDefaultFunctions
    val m = 10
    val max = 60
    for (t <- 0 to (max / m))
bow's avatar
bow committed
96
      flagstatCollector.addFunction("MAPQ>" + (t * m), record => record.getMappingQuality > (t * m))
Peter van 't Hof's avatar
Peter van 't Hof committed
97
98
99
    flagstatCollector.addFunction("First normal, second read inverted (paired end orientation)", record => {
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
100
101
102
103
        ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106
      else false
    })
    flagstatCollector.addFunction("First normal, second read normal", record => {
Peter van 't Hof's avatar
Peter van 't Hof committed
107
108
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
109
110
111
112
        ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
115
      else false
    })
    flagstatCollector.addFunction("First inverted, second read inverted", record => {
Peter van 't Hof's avatar
Peter van 't Hof committed
116
117
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
118
119
120
121
        ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
122
123
124
      else false
    })
    flagstatCollector.addFunction("First inverted, second read normal", record => {
Peter van 't Hof's avatar
Peter van 't Hof committed
125
126
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
127
128
129
130
        ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
131
132
      else false
    })
133
    flagstatCollector.addFunction("Mate in same strand", record => record.getReadPairedFlag && record.getReadNegativeStrandFlag && record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
134
      record.getReferenceIndex == record.getMateReferenceIndex)
135
    flagstatCollector.addFunction("Mate on other chr", record => record.getReadPairedFlag && record.getReferenceIndex != record.getMateReferenceIndex)
Peter van 't Hof's avatar
Peter van 't Hof committed
136

137
    logger.info("Start reading file: " + commandArgs.inputFile)
138
    for (record <- iterSam) {
Peter van 't Hof's avatar
Peter van 't Hof committed
139
      if (flagstatCollector.readsCount % 1e6 == 0 && flagstatCollector.readsCount > 0)
140
        logger.info("Reads prosessed: " + flagstatCollector.readsCount)
Peter van 't Hof's avatar
Peter van 't Hof committed
141
142
      flagstatCollector.loadRecord(record)
    }
bow's avatar
bow committed
143

144
145
146
147
148
149
150
151
    commandArgs.summaryFile.foreach {
      case file => {
        val writer = new PrintWriter(file)
        writer.println(flagstatCollector.summary)
        writer.close()
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
152
153
    println(flagstatCollector.report)
  }
bow's avatar
bow committed
154
155

  class FlagstatCollector {
Peter van 't Hof's avatar
Peter van 't Hof committed
156
157
158
159
160
    private var functionCount = 0
    var readsCount = 0
    private val names: Map[Int, String] = Map()
    private var functions: Array[SAMRecord => Boolean] = Array()
    private var totalCounts: Array[Long] = Array()
Peter van 't Hof's avatar
Peter van 't Hof committed
161
    private var crossCounts = Array.ofDim[Long](1, 1)
bow's avatar
bow committed
162

Peter van 't Hof's avatar
Peter van 't Hof committed
163
164
165
166
    def loadDefaultFunctions {
      addFunction("All", record => true)
      addFunction("Mapped", record => !record.getReadUnmappedFlag)
      addFunction("Duplicates", record => record.getDuplicateReadFlag)
167
168
      addFunction("FirstOfPair", record => if (record.getReadPairedFlag) record.getFirstOfPairFlag else false)
      addFunction("SecondOfPair", record => if (record.getReadPairedFlag) record.getSecondOfPairFlag else false)
Peter van 't Hof's avatar
Peter van 't Hof committed
169
170

      addFunction("ReadNegativeStrand", record => record.getReadNegativeStrandFlag)
bow's avatar
bow committed
171

Peter van 't Hof's avatar
Peter van 't Hof committed
172
173
174
      addFunction("NotPrimaryAlignment", record => record.getNotPrimaryAlignmentFlag)

      addFunction("ReadPaired", record => record.getReadPairedFlag)
175
      addFunction("ProperPair", record => if (record.getReadPairedFlag) record.getProperPairFlag else false)
Peter van 't Hof's avatar
Peter van 't Hof committed
176

177
178
      addFunction("MateNegativeStrand", record => if (record.getReadPairedFlag) record.getMateNegativeStrandFlag else false)
      addFunction("MateUnmapped", record => if (record.getReadPairedFlag) record.getMateUnmappedFlag else false)
Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
181
182
183

      addFunction("ReadFailsVendorQualityCheck", record => record.getReadFailsVendorQualityCheckFlag)
      addFunction("SupplementaryAlignment", record => record.getSupplementaryAlignmentFlag)
      addFunction("SecondaryOrSupplementary", record => record.isSecondaryOrSupplementary)
    }
bow's avatar
bow committed
184
185

    def loadRecord(record: SAMRecord) {
Peter van 't Hof's avatar
Peter van 't Hof committed
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
      readsCount += 1
      val values: Array[Boolean] = new Array(names.size)
      for (t <- 0 until names.size) {
        values(t) = functions(t)(record)
        if (values(t)) {
          totalCounts(t) += 1
        }
      }
      for (t <- 0 until names.size) {
        for (t2 <- 0 until names.size) {
          if (values(t) && values(t2)) {
            crossCounts(t)(t2) += 1
          }
        }
      }
    }
bow's avatar
bow committed
202
203

    def addFunction(name: String, function: SAMRecord => Boolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
204
      functionCount += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
205
      crossCounts = Array.ofDim[Long](functionCount, functionCount)
Peter van 't Hof's avatar
Peter van 't Hof committed
206
207
208
209
      totalCounts = new Array[Long](functionCount)
      val temp = new Array[SAMRecord => Boolean](functionCount)
      for (t <- 0 until (temp.size - 1)) temp(t) = functions(t)
      functions = temp
bow's avatar
bow committed
210

Peter van 't Hof's avatar
Peter van 't Hof committed
211
212
213
214
215
      val index = functionCount - 1
      names += (index -> name)
      functions(index) = function
      totalCounts(index) = 0
    }
bow's avatar
bow committed
216

Peter van 't Hof's avatar
Peter van 't Hof committed
217
218
219
220
221
    def report: String = {
      val buffer = new StringBuilder
      buffer.append("Number\tTotal Flags\tFraction\tName\n")
      for (t <- 0 until names.size) {
        val precentage = (totalCounts(t).toFloat / readsCount) * 100
bow's avatar
bow committed
222
        buffer.append("#" + (t + 1) + "\t" + totalCounts(t) + "\t" + f"$precentage%.4f" + "%\t" + names(t) + "\n")
Peter van 't Hof's avatar
Peter van 't Hof committed
223
224
      }
      buffer.append("\n")
bow's avatar
bow committed
225

Peter van 't Hof's avatar
Peter van 't Hof committed
226
227
      buffer.append(crossReport() + "\n")
      buffer.append(crossReport(fraction = true) + "\n")
bow's avatar
bow committed
228

Peter van 't Hof's avatar
Peter van 't Hof committed
229
230
      return buffer.toString
    }
bow's avatar
bow committed
231

232
233
234
235
236
237
238
239
    def summary: String = {
      val map = (for (t <- 0 until names.size) yield {
        names(t) -> totalCounts(t)
      }).toMap

      return ConfigUtils.mapToJson(map).spaces4
    }

bow's avatar
bow committed
240
    def crossReport(fraction: Boolean = false): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
241
      val buffer = new StringBuilder
bow's avatar
bow committed
242

Peter van 't Hof's avatar
Peter van 't Hof committed
243
      for (t <- 0 until names.size) // Header for table
bow's avatar
bow committed
244
        buffer.append("\t#" + (t + 1))
Peter van 't Hof's avatar
Peter van 't Hof committed
245
      buffer.append("\n")
bow's avatar
bow committed
246

Peter van 't Hof's avatar
Peter van 't Hof committed
247
      for (t <- 0 until names.size) {
bow's avatar
bow committed
248
        buffer.append("#" + (t + 1) + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
249
250
251
252
253
254
        for (t2 <- 0 until names.size) {
          val reads = crossCounts(t)(t2)
          if (fraction) {
            val precentage = (reads.toFloat / totalCounts(t).toFloat) * 100
            buffer.append(f"$precentage%.4f" + "%")
          } else buffer.append(reads)
bow's avatar
bow committed
255
          if (t2 == names.size - 1) buffer.append("\n")
Peter van 't Hof's avatar
Peter van 't Hof committed
256
257
258
259
260
261
          else buffer.append("\t")
        }
      }
      return buffer.toString
    }
  } // End of class
bow's avatar
bow committed
262

Peter van 't Hof's avatar
Peter van 't Hof committed
263
}