BiopetFlagstat.scala 10.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
17

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import htsjdk.samtools.{ SAMRecord, SamReaderFactory }
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
21
import nl.lumc.sasc.biopet.core.ToolCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.core.config.Configurable
bow's avatar
bow committed
23
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26
import scala.collection.JavaConversions._
import scala.collection.mutable.Map

bow's avatar
bow committed
27
class BiopetFlagstat(val root: Configurable) extends BiopetJavaCommandLineFunction {
Peter van 't Hof's avatar
Peter van 't Hof committed
28
  javaMainClass = getClass.getName
bow's avatar
bow committed
29
30

  @Input(doc = "Input bam", shortName = "input", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
31
  var input: File = _
bow's avatar
bow committed
32
33

  @Output(doc = "Output flagstat", shortName = "output", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
34
  var output: File = _
bow's avatar
bow committed
35

Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
  override val defaultVmem = "8G"
  memoryLimit = Option(4.0)
bow's avatar
bow committed
38

Peter van 't Hof's avatar
Peter van 't Hof committed
39
  override def commandLine = super.commandLine + required("-I", input) + " > " + required(output)
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
}

42
object BiopetFlagstat extends ToolCommand {
bow's avatar
bow committed
43
  def apply(root: Configurable, input: File, output: File): BiopetFlagstat = {
Peter van 't Hof's avatar
Peter van 't Hof committed
44
45
46
47
48
    val flagstat = new BiopetFlagstat(root)
    flagstat.input = input
    flagstat.output = output
    return flagstat
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
49
50

  case class Args(inputFile: File = null, region: Option[String] = None) extends AbstractArgs
51
52

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
53
54
55
56
57
58
    opt[File]('I', "inputFile") required () valueName ("<file>") action { (x, c) =>
      c.copy(inputFile = x)
    } text ("out is a required file property")
    opt[String]('r', "region") valueName ("<chr:start-stop>") action { (x, c) =>
      c.copy(region = Some(x))
    } text ("out is a required file property")
59
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
60

Peter van 't Hof's avatar
Peter van 't Hof committed
61
62
63
64
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
65
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
    val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)

Peter van 't Hof's avatar
Peter van 't Hof committed
68
    val inputSam = SamReaderFactory.makeDefault.open(commandArgs.inputFile)
69
70
71
72
    val iterSam = if (commandArgs.region == None) inputSam.iterator else {
      val regionRegex = """(.*):(.*)-(.*)""".r
      commandArgs.region.get match {
        case regionRegex(chr, start, stop) => inputSam.query(chr, start.toInt, stop.toInt, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
73
        case _                             => sys.error("Region wrong format")
74
75
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
76

Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
79
80
81
    val flagstatCollector = new FlagstatCollector
    flagstatCollector.loadDefaultFunctions
    val m = 10
    val max = 60
    for (t <- 0 to (max / m))
bow's avatar
bow committed
82
      flagstatCollector.addFunction("MAPQ>" + (t * m), record => record.getMappingQuality > (t * m))
Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
85
    flagstatCollector.addFunction("First normal, second read inverted (paired end orientation)", record => {
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
86
87
88
89
        ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
92
      else false
    })
    flagstatCollector.addFunction("First normal, second read normal", record => {
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
95
96
97
98
        ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
99
100
101
      else false
    })
    flagstatCollector.addFunction("First inverted, second read inverted", record => {
Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
104
105
106
107
        ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
110
      else false
    })
    flagstatCollector.addFunction("First inverted, second read normal", record => {
Peter van 't Hof's avatar
Peter van 't Hof committed
111
112
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
113
114
115
116
        ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
      else false
    })
119
    flagstatCollector.addFunction("Mate in same strand", record => record.getReadPairedFlag && record.getReadNegativeStrandFlag && record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
120
      record.getReferenceIndex == record.getMateReferenceIndex)
121
    flagstatCollector.addFunction("Mate on other chr", record => record.getReadPairedFlag && record.getReferenceIndex != record.getMateReferenceIndex)
Peter van 't Hof's avatar
Peter van 't Hof committed
122

123
    logger.info("Start reading file: " + commandArgs.inputFile)
124
    for (record <- iterSam) {
Peter van 't Hof's avatar
Peter van 't Hof committed
125
      if (flagstatCollector.readsCount % 1e6 == 0 && flagstatCollector.readsCount > 0)
126
        logger.info("Reads prosessed: " + flagstatCollector.readsCount)
Peter van 't Hof's avatar
Peter van 't Hof committed
127
128
      flagstatCollector.loadRecord(record)
    }
bow's avatar
bow committed
129

Peter van 't Hof's avatar
Peter van 't Hof committed
130
131
    println(flagstatCollector.report)
  }
bow's avatar
bow committed
132
133

  class FlagstatCollector {
Peter van 't Hof's avatar
Peter van 't Hof committed
134
135
136
137
138
    private var functionCount = 0
    var readsCount = 0
    private val names: Map[Int, String] = Map()
    private var functions: Array[SAMRecord => Boolean] = Array()
    private var totalCounts: Array[Long] = Array()
Peter van 't Hof's avatar
Peter van 't Hof committed
139
    private var crossCounts = Array.ofDim[Long](1, 1)
bow's avatar
bow committed
140

Peter van 't Hof's avatar
Peter van 't Hof committed
141
142
143
144
    def loadDefaultFunctions {
      addFunction("All", record => true)
      addFunction("Mapped", record => !record.getReadUnmappedFlag)
      addFunction("Duplicates", record => record.getDuplicateReadFlag)
145
146
      addFunction("FirstOfPair", record => if (record.getReadPairedFlag) record.getFirstOfPairFlag else false)
      addFunction("SecondOfPair", record => if (record.getReadPairedFlag) record.getSecondOfPairFlag else false)
Peter van 't Hof's avatar
Peter van 't Hof committed
147
148

      addFunction("ReadNegativeStrand", record => record.getReadNegativeStrandFlag)
bow's avatar
bow committed
149

Peter van 't Hof's avatar
Peter van 't Hof committed
150
151
152
      addFunction("NotPrimaryAlignment", record => record.getNotPrimaryAlignmentFlag)

      addFunction("ReadPaired", record => record.getReadPairedFlag)
153
      addFunction("ProperPair", record => if (record.getReadPairedFlag) record.getProperPairFlag else false)
Peter van 't Hof's avatar
Peter van 't Hof committed
154

155
156
      addFunction("MateNegativeStrand", record => if (record.getReadPairedFlag) record.getMateNegativeStrandFlag else false)
      addFunction("MateUnmapped", record => if (record.getReadPairedFlag) record.getMateUnmappedFlag else false)
Peter van 't Hof's avatar
Peter van 't Hof committed
157
158
159
160
161

      addFunction("ReadFailsVendorQualityCheck", record => record.getReadFailsVendorQualityCheckFlag)
      addFunction("SupplementaryAlignment", record => record.getSupplementaryAlignmentFlag)
      addFunction("SecondaryOrSupplementary", record => record.isSecondaryOrSupplementary)
    }
bow's avatar
bow committed
162
163

    def loadRecord(record: SAMRecord) {
Peter van 't Hof's avatar
Peter van 't Hof committed
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
      readsCount += 1
      val values: Array[Boolean] = new Array(names.size)
      for (t <- 0 until names.size) {
        values(t) = functions(t)(record)
        if (values(t)) {
          totalCounts(t) += 1
        }
      }
      for (t <- 0 until names.size) {
        for (t2 <- 0 until names.size) {
          if (values(t) && values(t2)) {
            crossCounts(t)(t2) += 1
          }
        }
      }
    }
bow's avatar
bow committed
180
181

    def addFunction(name: String, function: SAMRecord => Boolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
182
      functionCount += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
183
      crossCounts = Array.ofDim[Long](functionCount, functionCount)
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
186
187
      totalCounts = new Array[Long](functionCount)
      val temp = new Array[SAMRecord => Boolean](functionCount)
      for (t <- 0 until (temp.size - 1)) temp(t) = functions(t)
      functions = temp
bow's avatar
bow committed
188

Peter van 't Hof's avatar
Peter van 't Hof committed
189
190
191
192
193
      val index = functionCount - 1
      names += (index -> name)
      functions(index) = function
      totalCounts(index) = 0
    }
bow's avatar
bow committed
194

Peter van 't Hof's avatar
Peter van 't Hof committed
195
196
197
198
199
    def report: String = {
      val buffer = new StringBuilder
      buffer.append("Number\tTotal Flags\tFraction\tName\n")
      for (t <- 0 until names.size) {
        val precentage = (totalCounts(t).toFloat / readsCount) * 100
bow's avatar
bow committed
200
        buffer.append("#" + (t + 1) + "\t" + totalCounts(t) + "\t" + f"$precentage%.4f" + "%\t" + names(t) + "\n")
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
      }
      buffer.append("\n")
bow's avatar
bow committed
203

Peter van 't Hof's avatar
Peter van 't Hof committed
204
205
      buffer.append(crossReport() + "\n")
      buffer.append(crossReport(fraction = true) + "\n")
bow's avatar
bow committed
206

Peter van 't Hof's avatar
Peter van 't Hof committed
207
208
      return buffer.toString
    }
bow's avatar
bow committed
209
210

    def crossReport(fraction: Boolean = false): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
211
      val buffer = new StringBuilder
bow's avatar
bow committed
212

Peter van 't Hof's avatar
Peter van 't Hof committed
213
      for (t <- 0 until names.size) // Header for table
bow's avatar
bow committed
214
        buffer.append("\t#" + (t + 1))
Peter van 't Hof's avatar
Peter van 't Hof committed
215
      buffer.append("\n")
bow's avatar
bow committed
216

Peter van 't Hof's avatar
Peter van 't Hof committed
217
      for (t <- 0 until names.size) {
bow's avatar
bow committed
218
        buffer.append("#" + (t + 1) + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
219
220
221
222
223
224
        for (t2 <- 0 until names.size) {
          val reads = crossCounts(t)(t2)
          if (fraction) {
            val precentage = (reads.toFloat / totalCounts(t).toFloat) * 100
            buffer.append(f"$precentage%.4f" + "%")
          } else buffer.append(reads)
bow's avatar
bow committed
225
          if (t2 == names.size - 1) buffer.append("\n")
Peter van 't Hof's avatar
Peter van 't Hof committed
226
227
228
229
230
231
          else buffer.append("\t")
        }
      }
      return buffer.toString
    }
  } // End of class
bow's avatar
bow committed
232

Peter van 't Hof's avatar
Peter van 't Hof committed
233
}