BiopetFlagstat.scala 11.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
package nl.lumc.sasc.biopet.tools
Peter van 't Hof's avatar
Peter van 't Hof committed
17

Peter van 't Hof's avatar
Peter van 't Hof committed
18 19
import java.io.{ File, PrintWriter }

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import htsjdk.samtools.{ SAMRecord, SamReaderFactory }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.config.Configurable
22
import nl.lumc.sasc.biopet.core.summary.Summarizable
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.core.{ ToolCommand, ToolCommandFuntion }
24
import nl.lumc.sasc.biopet.utils.ConfigUtils
bow's avatar
bow committed
25
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
Peter van 't Hof's avatar
Peter van 't Hof committed
26

Peter van 't Hof's avatar
Peter van 't Hof committed
27
import scala.collection.JavaConversions._
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import scala.collection.mutable
Peter van 't Hof's avatar
Peter van 't Hof committed
29

30
class BiopetFlagstat(val root: Configurable) extends ToolCommandFuntion with Summarizable {
Peter van 't Hof's avatar
Peter van 't Hof committed
31
  javaMainClass = getClass.getName
bow's avatar
bow committed
32 33

  @Input(doc = "Input bam", shortName = "input", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
34
  var input: File = _
bow's avatar
bow committed
35 36

  @Output(doc = "Output flagstat", shortName = "output", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
37
  var output: File = _
bow's avatar
bow committed
38

39 40 41
  @Output(doc = "summary output file", shortName = "output", required = false)
  var summaryFile: File = _

bow's avatar
bow committed
42
  override val defaultCoreMemory = 6.0
bow's avatar
bow committed
43

44 45 46 47
  override def commandLine = super.commandLine + required("-I", input) + required("-s", summaryFile) + " > " + required(output)

  def summaryFiles: Map[String, File] = Map()

48
  def summaryStats: Map[String, Any] = {
49 50
    ConfigUtils.fileToConfigMap(summaryFile)
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
51 52
}

53
object BiopetFlagstat extends ToolCommand {
54 55
  import scala.collection.mutable.Map

56
  def apply(root: Configurable, input: File, outputDir: File): BiopetFlagstat = {
Peter van 't Hof's avatar
Peter van 't Hof committed
57 58 59
    val flagstat = new BiopetFlagstat(root)
    flagstat.input = input
    flagstat.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".biopetflagstat")
60
    flagstat.summaryFile = new File(outputDir, input.getName.stripSuffix(".bam") + ".biopetflagstat.json")
61
    flagstat
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
63

64
  case class Args(inputFile: File = null, summaryFile: Option[File] = None, region: Option[String] = None) extends AbstractArgs
65 66

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
67
    opt[File]('I', "inputFile") required () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
68
      c.copy(inputFile = x)
Peter van 't Hof's avatar
Peter van 't Hof committed
69 70
    } text "input bam file"
    opt[File]('s', "summaryFile") valueName "<file>" action { (x, c) =>
71
      c.copy(summaryFile = Some(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
72 73
    } text "summary output file"
    opt[String]('r', "region") valueName "<chr:start-stop>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
74
      c.copy(region = Some(x))
75
    }
76
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
77

Peter van 't Hof's avatar
Peter van 't Hof committed
78 79 80 81
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
82
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
83 84
    val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)

Peter van 't Hof's avatar
Peter van 't Hof committed
85
    val inputSam = SamReaderFactory.makeDefault.open(commandArgs.inputFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
86
    val iterSam = if (commandArgs.region.isEmpty) inputSam.iterator else {
87 88 89
      val regionRegex = """(.*):(.*)-(.*)""".r
      commandArgs.region.get match {
        case regionRegex(chr, start, stop) => inputSam.query(chr, start.toInt, stop.toInt, false)
Peter van 't Hof's avatar
Peter van 't Hof committed
90
        case _                             => sys.error("Region wrong format")
91 92
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
93

Peter van 't Hof's avatar
Peter van 't Hof committed
94
    val flagstatCollector = new FlagstatCollector
Peter van 't Hof's avatar
Peter van 't Hof committed
95
    flagstatCollector.loadDefaultFunctions()
Peter van 't Hof's avatar
Peter van 't Hof committed
96 97 98
    val m = 10
    val max = 60
    for (t <- 0 to (max / m))
bow's avatar
bow committed
99
      flagstatCollector.addFunction("MAPQ>" + (t * m), record => record.getMappingQuality > (t * m))
Peter van 't Hof's avatar
Peter van 't Hof committed
100 101 102
    flagstatCollector.addFunction("First normal, second read inverted (paired end orientation)", record => {
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
103 104 105 106
        ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
107 108 109
      else false
    })
    flagstatCollector.addFunction("First normal, second read normal", record => {
Peter van 't Hof's avatar
Peter van 't Hof committed
110 111
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
112 113 114 115
        ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
116 117 118
      else false
    })
    flagstatCollector.addFunction("First inverted, second read inverted", record => {
Peter van 't Hof's avatar
Peter van 't Hof committed
119 120
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
121 122 123 124
        ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
125 126 127
      else false
    })
    flagstatCollector.addFunction("First inverted, second read normal", record => {
Peter van 't Hof's avatar
Peter van 't Hof committed
128 129
      if (record.getReadPairedFlag &&
        record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
130 131 132 133
        ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
          (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
Peter van 't Hof's avatar
Peter van 't Hof committed
134 135
      else false
    })
136
    flagstatCollector.addFunction("Mate in same strand", record => record.getReadPairedFlag && record.getReadNegativeStrandFlag && record.getMateNegativeStrandFlag &&
bow's avatar
bow committed
137
      record.getReferenceIndex == record.getMateReferenceIndex)
138
    flagstatCollector.addFunction("Mate on other chr", record => record.getReadPairedFlag && record.getReferenceIndex != record.getMateReferenceIndex)
Peter van 't Hof's avatar
Peter van 't Hof committed
139

140
    logger.info("Start reading file: " + commandArgs.inputFile)
141
    for (record <- iterSam) {
Peter van 't Hof's avatar
Peter van 't Hof committed
142
      if (flagstatCollector.readsCount % 1e6 == 0 && flagstatCollector.readsCount > 0)
143
        logger.info("Reads prosessed: " + flagstatCollector.readsCount)
Peter van 't Hof's avatar
Peter van 't Hof committed
144 145
      flagstatCollector.loadRecord(record)
    }
bow's avatar
bow committed
146

147
    commandArgs.summaryFile.foreach {
Peter van 't Hof's avatar
Peter van 't Hof committed
148
      case file =>
149 150 151 152 153
        val writer = new PrintWriter(file)
        writer.println(flagstatCollector.summary)
        writer.close()
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
154 155
    println(flagstatCollector.report)
  }
bow's avatar
bow committed
156 157

  class FlagstatCollector {
Peter van 't Hof's avatar
Peter van 't Hof committed
158 159
    private var functionCount = 0
    var readsCount = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
160
    private val names: mutable.Map[Int, String] = mutable.Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
161 162
    private var functions: Array[SAMRecord => Boolean] = Array()
    private var totalCounts: Array[Long] = Array()
Peter van 't Hof's avatar
Peter van 't Hof committed
163
    private var crossCounts = Array.ofDim[Long](1, 1)
bow's avatar
bow committed
164

Peter van 't Hof's avatar
Peter van 't Hof committed
165
    def loadDefaultFunctions() {
Peter van 't Hof's avatar
Peter van 't Hof committed
166 167 168
      addFunction("All", record => true)
      addFunction("Mapped", record => !record.getReadUnmappedFlag)
      addFunction("Duplicates", record => record.getDuplicateReadFlag)
169 170
      addFunction("FirstOfPair", record => if (record.getReadPairedFlag) record.getFirstOfPairFlag else false)
      addFunction("SecondOfPair", record => if (record.getReadPairedFlag) record.getSecondOfPairFlag else false)
Peter van 't Hof's avatar
Peter van 't Hof committed
171 172

      addFunction("ReadNegativeStrand", record => record.getReadNegativeStrandFlag)
bow's avatar
bow committed
173

Peter van 't Hof's avatar
Peter van 't Hof committed
174 175 176
      addFunction("NotPrimaryAlignment", record => record.getNotPrimaryAlignmentFlag)

      addFunction("ReadPaired", record => record.getReadPairedFlag)
177
      addFunction("ProperPair", record => if (record.getReadPairedFlag) record.getProperPairFlag else false)
Peter van 't Hof's avatar
Peter van 't Hof committed
178

179 180
      addFunction("MateNegativeStrand", record => if (record.getReadPairedFlag) record.getMateNegativeStrandFlag else false)
      addFunction("MateUnmapped", record => if (record.getReadPairedFlag) record.getMateUnmappedFlag else false)
Peter van 't Hof's avatar
Peter van 't Hof committed
181 182 183 184 185

      addFunction("ReadFailsVendorQualityCheck", record => record.getReadFailsVendorQualityCheckFlag)
      addFunction("SupplementaryAlignment", record => record.getSupplementaryAlignmentFlag)
      addFunction("SecondaryOrSupplementary", record => record.isSecondaryOrSupplementary)
    }
bow's avatar
bow committed
186 187

    def loadRecord(record: SAMRecord) {
Peter van 't Hof's avatar
Peter van 't Hof committed
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
      readsCount += 1
      val values: Array[Boolean] = new Array(names.size)
      for (t <- 0 until names.size) {
        values(t) = functions(t)(record)
        if (values(t)) {
          totalCounts(t) += 1
        }
      }
      for (t <- 0 until names.size) {
        for (t2 <- 0 until names.size) {
          if (values(t) && values(t2)) {
            crossCounts(t)(t2) += 1
          }
        }
      }
    }
bow's avatar
bow committed
204 205

    def addFunction(name: String, function: SAMRecord => Boolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
206
      functionCount += 1
Peter van 't Hof's avatar
Peter van 't Hof committed
207
      crossCounts = Array.ofDim[Long](functionCount, functionCount)
Peter van 't Hof's avatar
Peter van 't Hof committed
208 209
      totalCounts = new Array[Long](functionCount)
      val temp = new Array[SAMRecord => Boolean](functionCount)
Peter van 't Hof's avatar
Peter van 't Hof committed
210
      for (t <- 0 until (temp.length - 1)) temp(t) = functions(t)
Peter van 't Hof's avatar
Peter van 't Hof committed
211
      functions = temp
bow's avatar
bow committed
212

Peter van 't Hof's avatar
Peter van 't Hof committed
213 214 215 216 217
      val index = functionCount - 1
      names += (index -> name)
      functions(index) = function
      totalCounts(index) = 0
    }
bow's avatar
bow committed
218

Peter van 't Hof's avatar
Peter van 't Hof committed
219 220 221 222 223
    def report: String = {
      val buffer = new StringBuilder
      buffer.append("Number\tTotal Flags\tFraction\tName\n")
      for (t <- 0 until names.size) {
        val precentage = (totalCounts(t).toFloat / readsCount) * 100
bow's avatar
bow committed
224
        buffer.append("#" + (t + 1) + "\t" + totalCounts(t) + "\t" + f"$precentage%.4f" + "%\t" + names(t) + "\n")
Peter van 't Hof's avatar
Peter van 't Hof committed
225 226
      }
      buffer.append("\n")
bow's avatar
bow committed
227

Peter van 't Hof's avatar
Peter van 't Hof committed
228 229
      buffer.append(crossReport() + "\n")
      buffer.append(crossReport(fraction = true) + "\n")
bow's avatar
bow committed
230

Peter van 't Hof's avatar
Peter van 't Hof committed
231
      buffer.toString()
Peter van 't Hof's avatar
Peter van 't Hof committed
232
    }
bow's avatar
bow committed
233

234 235 236 237 238
    def summary: String = {
      val map = (for (t <- 0 until names.size) yield {
        names(t) -> totalCounts(t)
      }).toMap

Peter van 't Hof's avatar
Peter van 't Hof committed
239
      ConfigUtils.mapToJson(map).spaces4
240 241
    }

bow's avatar
bow committed
242
    def crossReport(fraction: Boolean = false): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
243
      val buffer = new StringBuilder
bow's avatar
bow committed
244

Peter van 't Hof's avatar
Peter van 't Hof committed
245
      for (t <- 0 until names.size) // Header for table
bow's avatar
bow committed
246
        buffer.append("\t#" + (t + 1))
Peter van 't Hof's avatar
Peter van 't Hof committed
247
      buffer.append("\n")
bow's avatar
bow committed
248

Peter van 't Hof's avatar
Peter van 't Hof committed
249
      for (t <- 0 until names.size) {
bow's avatar
bow committed
250
        buffer.append("#" + (t + 1) + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
251 252 253 254 255 256
        for (t2 <- 0 until names.size) {
          val reads = crossCounts(t)(t2)
          if (fraction) {
            val precentage = (reads.toFloat / totalCounts(t).toFloat) * 100
            buffer.append(f"$precentage%.4f" + "%")
          } else buffer.append(reads)
bow's avatar
bow committed
257
          if (t2 == names.size - 1) buffer.append("\n")
Peter van 't Hof's avatar
Peter van 't Hof committed
258 259 260
          else buffer.append("\t")
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
261
      buffer.toString()
Peter van 't Hof's avatar
Peter van 't Hof committed
262 263
    }
  } // End of class
bow's avatar
bow committed
264

Peter van 't Hof's avatar
Peter van 't Hof committed
265
}