Skip to content
Snippets Groups Projects
Commit 99f13260 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added flagstats to bamstats

parent 498358c5
No related branches found
No related tags found
No related merge requests found
Showing
with 307 additions and 253 deletions
...@@ -18,12 +18,13 @@ import java.io.File ...@@ -18,12 +18,13 @@ import java.io.File
import nl.lumc.sasc.biopet.core.ToolCommandFunction import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.core.summary.Summarizable import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.tools.flagstat
import nl.lumc.sasc.biopet.utils.ConfigUtils import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class BiopetFlagstat(val root: Configurable) extends ToolCommandFunction with Summarizable { class BiopetFlagstat(val root: Configurable) extends ToolCommandFunction with Summarizable {
def toolObject = nl.lumc.sasc.biopet.tools.BiopetFlagstat def toolObject = flagstat.BiopetFlagstat
@Input(doc = "Input bam", shortName = "input", required = true) @Input(doc = "Input bam", shortName = "input", required = true)
var input: File = _ var input: File = _
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
*/ */
package nl.lumc.sasc.biopet package nl.lumc.sasc.biopet
import nl.lumc.sasc.biopet.tools.flagstat.BiopetFlagstat
import nl.lumc.sasc.biopet.utils.{ BiopetExecutable, MainCommand } import nl.lumc.sasc.biopet.utils.{ BiopetExecutable, MainCommand }
object BiopetToolsExecutable extends BiopetExecutable { object BiopetToolsExecutable extends BiopetExecutable {
...@@ -25,7 +26,7 @@ object BiopetToolsExecutable extends BiopetExecutable { ...@@ -25,7 +26,7 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.BaseCounter, nl.lumc.sasc.biopet.tools.BaseCounter,
nl.lumc.sasc.biopet.tools.BastyGenerateFasta, nl.lumc.sasc.biopet.tools.BastyGenerateFasta,
nl.lumc.sasc.biopet.tools.BedtoolsCoverageToCounts, nl.lumc.sasc.biopet.tools.BedtoolsCoverageToCounts,
nl.lumc.sasc.biopet.tools.BiopetFlagstat, BiopetFlagstat,
nl.lumc.sasc.biopet.tools.CheckAllelesVcfInBam, nl.lumc.sasc.biopet.tools.CheckAllelesVcfInBam,
nl.lumc.sasc.biopet.tools.ExtractAlignedFastq, nl.lumc.sasc.biopet.tools.ExtractAlignedFastq,
nl.lumc.sasc.biopet.tools.FastqSplitter, nl.lumc.sasc.biopet.tools.FastqSplitter,
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.tools
import java.io.{ File, PrintWriter }
import htsjdk.samtools.{ SAMRecord, SamReaderFactory }
import nl.lumc.sasc.biopet.utils.{ ToolCommand, ConfigUtils }
import scala.collection.JavaConversions._
import scala.collection.mutable
object BiopetFlagstat extends ToolCommand {
case class Args(inputFile: File = null,
outputFile: Option[File] = None,
summaryFile: Option[File] = None,
region: Option[String] = None) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "inputFile") required () valueName "<file>" action { (x, c) =>
c.copy(inputFile = x)
} text "input bam file"
opt[File]('o', "outputFile") valueName "<file>" action { (x, c) =>
c.copy(outputFile = Some(x))
} text "output file"
opt[File]('s', "summaryFile") valueName "<file>" action { (x, c) =>
c.copy(summaryFile = Some(x))
} text "summary output file"
opt[String]('r', "region") valueName "<chr:start-stop>" action { (x, c) =>
c.copy(region = Some(x))
}
}
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
val inputSam = SamReaderFactory.makeDefault.open(commandArgs.inputFile)
val iterSam = if (commandArgs.region.isEmpty) inputSam.iterator else {
val regionRegex = """(.*):(.*)-(.*)""".r
commandArgs.region.get match {
case regionRegex(chr, start, stop) => inputSam.query(chr, start.toInt, stop.toInt, false)
case _ => sys.error("Region wrong format")
}
}
val flagstatCollector = new FlagstatCollector
flagstatCollector.loadDefaultFunctions()
val m = 10
val max = 60
for (t <- 0 to (max / m))
flagstatCollector.addFunction("MAPQ>" + (t * m), record => record.getMappingQuality > (t * m))
flagstatCollector.addFunction("First normal, second read inverted (paired end orientation)", record => {
if (record.getReadPairedFlag &&
record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag &&
((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
else false
})
flagstatCollector.addFunction("First normal, second read normal", record => {
if (record.getReadPairedFlag &&
record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag &&
((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
else false
})
flagstatCollector.addFunction("First inverted, second read inverted", record => {
if (record.getReadPairedFlag &&
record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag &&
((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
else false
})
flagstatCollector.addFunction("First inverted, second read normal", record => {
if (record.getReadPairedFlag &&
record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag &&
((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
else false
})
flagstatCollector.addFunction("Mate in same strand", record => record.getReadPairedFlag && record.getReadNegativeStrandFlag && record.getMateNegativeStrandFlag &&
record.getReferenceIndex == record.getMateReferenceIndex)
flagstatCollector.addFunction("Mate on other chr", record => record.getReadPairedFlag && record.getReferenceIndex != record.getMateReferenceIndex)
logger.info("Start reading file: " + commandArgs.inputFile)
for (record <- iterSam) {
if (flagstatCollector.readsCount % 1e6 == 0 && flagstatCollector.readsCount > 0)
logger.info("Reads prosessed: " + flagstatCollector.readsCount)
flagstatCollector.loadRecord(record)
}
commandArgs.summaryFile.foreach {
case file =>
val writer = new PrintWriter(file)
writer.println(flagstatCollector.summary)
writer.close()
}
commandArgs.outputFile match {
case Some(file) => {
val writer = new PrintWriter(file)
writer.println(flagstatCollector.report)
writer.close()
}
case _ => println(flagstatCollector.report)
}
}
class FlagstatCollector {
private var functionCount = 0
var readsCount = 0
private val names: mutable.Map[Int, String] = mutable.Map()
private var functions: Array[SAMRecord => Boolean] = Array()
private var totalCounts: Array[Long] = Array()
private var crossCounts = Array.ofDim[Long](1, 1)
def loadDefaultFunctions() {
addFunction("All", record => true)
addFunction("Mapped", record => !record.getReadUnmappedFlag)
addFunction("Duplicates", record => record.getDuplicateReadFlag)
addFunction("FirstOfPair", record => if (record.getReadPairedFlag) record.getFirstOfPairFlag else false)
addFunction("SecondOfPair", record => if (record.getReadPairedFlag) record.getSecondOfPairFlag else false)
addFunction("ReadNegativeStrand", record => record.getReadNegativeStrandFlag)
addFunction("NotPrimaryAlignment", record => record.getNotPrimaryAlignmentFlag)
addFunction("ReadPaired", record => record.getReadPairedFlag)
addFunction("ProperPair", record => if (record.getReadPairedFlag) record.getProperPairFlag else false)
addFunction("MateNegativeStrand", record => if (record.getReadPairedFlag) record.getMateNegativeStrandFlag else false)
addFunction("MateUnmapped", record => if (record.getReadPairedFlag) record.getMateUnmappedFlag else false)
addFunction("ReadFailsVendorQualityCheck", record => record.getReadFailsVendorQualityCheckFlag)
addFunction("SupplementaryAlignment", record => record.getSupplementaryAlignmentFlag)
addFunction("SecondaryOrSupplementary", record => record.isSecondaryOrSupplementary)
}
def loadRecord(record: SAMRecord) {
readsCount += 1
val values: Array[Boolean] = new Array(names.size)
for (t <- 0 until names.size) {
values(t) = functions(t)(record)
if (values(t)) {
totalCounts(t) += 1
}
}
for (t <- 0 until names.size) {
for (t2 <- 0 until names.size) {
if (values(t) && values(t2)) {
crossCounts(t)(t2) += 1
}
}
}
}
def addFunction(name: String, function: SAMRecord => Boolean) {
functionCount += 1
crossCounts = Array.ofDim[Long](functionCount, functionCount)
totalCounts = new Array[Long](functionCount)
val temp = new Array[SAMRecord => Boolean](functionCount)
for (t <- 0 until (temp.length - 1)) temp(t) = functions(t)
functions = temp
val index = functionCount - 1
names += (index -> name)
functions(index) = function
totalCounts(index) = 0
}
def report: String = {
val buffer = new StringBuilder
buffer.append("Number\tTotal Flags\tFraction\tName\n")
for (t <- 0 until names.size) {
val precentage = (totalCounts(t).toFloat / readsCount) * 100
buffer.append("#" + (t + 1) + "\t" + totalCounts(t) + "\t" + f"$precentage%.4f" + "%\t" + names(t) + "\n")
}
buffer.append("\n")
buffer.append(crossReport() + "\n")
buffer.append(crossReport(fraction = true) + "\n")
buffer.toString()
}
def summary: String = {
val map = (for (t <- 0 until names.size) yield {
names(t) -> totalCounts(t)
}).toMap ++ Map("Singletons" -> crossCounts(names.find(_._2 == "Mapped").map(_._1).getOrElse(-1))(names.find(_._2 == "MateUnmapped").map(_._1).getOrElse(-1))
)
ConfigUtils.mapToJson(map).spaces4
}
def crossReport(fraction: Boolean = false): String = {
val buffer = new StringBuilder
for (t <- 0 until names.size) // Header for table
buffer.append("\t#" + (t + 1))
buffer.append("\n")
for (t <- 0 until names.size) {
buffer.append("#" + (t + 1) + "\t")
for (t2 <- 0 until names.size) {
val reads = crossCounts(t)(t2)
if (fraction) {
val precentage = (reads.toFloat / totalCounts(t).toFloat) * 100
buffer.append(f"$precentage%.4f" + "%")
} else buffer.append(reads)
if (t2 == names.size - 1) buffer.append("\n")
else buffer.append("\t")
}
}
buffer.toString()
}
} // End of class
}
...@@ -114,7 +114,7 @@ object BamStats extends ToolCommand { ...@@ -114,7 +114,7 @@ object BamStats extends ToolCommand {
.grouped((region.length.toDouble / threadBinSize).ceil.toInt) .grouped((region.length.toDouble / threadBinSize).ceil.toInt)
.map(scatters => Future { processThread(scatters, bamFile) }) .map(scatters => Future { processThread(scatters, bamFile) })
val stats = waitOnFutures(scattersFutures.toList, Some(region.chr)) val stats = waitOnFutures(scattersFutures.toList, Some(region.chr))
val contigDir = new File(outputDir, "contigs" + File.separator + region.chr) val contigDir = new File(outputDir, "contigs" + File.separator + region.chr)
contigDir.mkdirs() contigDir.mkdirs()
stats.writeStatsToFiles(contigDir) stats.writeStatsToFiles(contigDir)
stats stats
...@@ -166,6 +166,7 @@ object BamStats extends ToolCommand { ...@@ -166,6 +166,7 @@ object BamStats extends ToolCommand {
// Read based stats // Read based stats
if (samRecord.getAlignmentStart > threadStart && samRecord.getAlignmentStart <= threadEnd) { if (samRecord.getAlignmentStart > threadStart && samRecord.getAlignmentStart <= threadEnd) {
totalStats.flagstat.loadRecord(samRecord)
totalStats.totalReads += 1 totalStats.totalReads += 1
if (samRecord.isSecondaryOrSupplementary) totalStats.secondary += 1 if (samRecord.isSecondaryOrSupplementary) totalStats.secondary += 1
if (samRecord.getReadUnmappedFlag) totalStats.unmapped += 1 if (samRecord.getReadUnmappedFlag) totalStats.unmapped += 1
...@@ -202,16 +203,18 @@ object BamStats extends ToolCommand { ...@@ -202,16 +203,18 @@ object BamStats extends ToolCommand {
/** /**
* This method will only count the unmapped fragments * This method will only count the unmapped fragments
* *
* @param bamFile Input bamfile * @param bamFile Input bamfile
* @return Output stats * @return Output stats
*/ */
def processUnmappedReads(bamFile: File): Stats = { def processUnmappedReads(bamFile: File): Stats = {
val stats = Stats() val stats = Stats()
val samReader = SamReaderFactory.makeDefault().open(bamFile) val samReader = SamReaderFactory.makeDefault().open(bamFile)
var size = samReader.queryUnmapped().size for (samRecord <- samReader.queryUnmapped()) {
stats.totalReads += size stats.flagstat.loadRecord(samRecord)
stats.unmapped += size stats.totalReads += 1
stats.unmapped += 1
}
samReader.close() samReader.close()
stats stats
} }
......
...@@ -2,12 +2,15 @@ package nl.lumc.sasc.biopet.tools.bamstats ...@@ -2,12 +2,15 @@ package nl.lumc.sasc.biopet.tools.bamstats
import java.io.File import java.io.File
import nl.lumc.sasc.biopet.tools.flagstat.FlagstatCollector
/** /**
* Created by pjvanthof on 05/07/16. * Created by pjvanthof on 05/07/16.
*/ */
case class Stats(var totalReads: Long = 0L, case class Stats(var totalReads: Long = 0L,
var unmapped: Long = 0L, var unmapped: Long = 0L,
var secondary: Long = 0L, var secondary: Long = 0L,
flagstat: FlagstatCollector = new FlagstatCollector(),
mappingQualityHistogram: Histogram[Int] = new Histogram[Int](), mappingQualityHistogram: Histogram[Int] = new Histogram[Int](),
insertSizeHistogram: Histogram[Int] = new Histogram[Int](), insertSizeHistogram: Histogram[Int] = new Histogram[Int](),
clippingHistogram: Histogram[Int] = new Histogram[Int](), clippingHistogram: Histogram[Int] = new Histogram[Int](),
...@@ -16,10 +19,14 @@ case class Stats(var totalReads: Long = 0L, ...@@ -16,10 +19,14 @@ case class Stats(var totalReads: Long = 0L,
_5_ClippingHistogram: Histogram[Int] = new Histogram[Int](), _5_ClippingHistogram: Histogram[Int] = new Histogram[Int](),
_3_ClippingHistogram: Histogram[Int] = new Histogram[Int]()) { _3_ClippingHistogram: Histogram[Int] = new Histogram[Int]()) {
flagstat.loadDefaultFunctions()
flagstat.loadAditionalFunctions(1, 0)
/** This will add an other [[Stats]] inside `this` */ /** This will add an other [[Stats]] inside `this` */
def +=(other: Stats): Stats = { def +=(other: Stats): Stats = {
this.totalReads += other.totalReads this.totalReads += other.totalReads
this.unmapped += other.unmapped this.unmapped += other.unmapped
this.flagstat += other.flagstat
this.mappingQualityHistogram += other.mappingQualityHistogram this.mappingQualityHistogram += other.mappingQualityHistogram
this.insertSizeHistogram += other.insertSizeHistogram this.insertSizeHistogram += other.insertSizeHistogram
this.clippingHistogram += other.clippingHistogram this.clippingHistogram += other.clippingHistogram
...@@ -31,6 +38,8 @@ case class Stats(var totalReads: Long = 0L, ...@@ -31,6 +38,8 @@ case class Stats(var totalReads: Long = 0L,
} }
def writeStatsToFiles(outputDir: File): Unit = { def writeStatsToFiles(outputDir: File): Unit = {
this.flagstat.writeReportToFile(new File(outputDir, "flagstats"))
this.flagstat.writeSummaryTofile(new File(outputDir, "flagstats.summary.json"))
this.mappingQualityHistogram.writeToTsv(new File(outputDir, "mapping_quality.tsv")) this.mappingQualityHistogram.writeToTsv(new File(outputDir, "mapping_quality.tsv"))
this.insertSizeHistogram.writeToTsv(new File(outputDir, "insert_size.tsv")) this.insertSizeHistogram.writeToTsv(new File(outputDir, "insert_size.tsv"))
this.clippingHistogram.writeToTsv(new File(outputDir, "clipping.tsv")) this.clippingHistogram.writeToTsv(new File(outputDir, "clipping.tsv"))
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.tools.flagstat
import java.io.{ File, PrintWriter }
import htsjdk.samtools.{ SAMRecord, SamReaderFactory }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, ToolCommand }
import scala.collection.JavaConversions._
import scala.collection.mutable
object BiopetFlagstat extends ToolCommand {
case class Args(inputFile: File = null,
outputFile: Option[File] = None,
summaryFile: Option[File] = None,
region: Option[String] = None) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "inputFile") required () valueName "<file>" action { (x, c) =>
c.copy(inputFile = x)
} text "input bam file"
opt[File]('o', "outputFile") valueName "<file>" action { (x, c) =>
c.copy(outputFile = Some(x))
} text "output file"
opt[File]('s', "summaryFile") valueName "<file>" action { (x, c) =>
c.copy(summaryFile = Some(x))
} text "summary output file"
opt[String]('r', "region") valueName "<chr:start-stop>" action { (x, c) =>
c.copy(region = Some(x))
}
}
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
val inputSam = SamReaderFactory.makeDefault.open(commandArgs.inputFile)
val iterSam = if (commandArgs.region.isEmpty) inputSam.iterator else {
val regionRegex = """(.*):(.*)-(.*)""".r
commandArgs.region.get match {
case regionRegex(chr, start, stop) => inputSam.query(chr, start.toInt, stop.toInt, false)
case _ => sys.error("Region wrong format")
}
}
val flagstatCollector = new FlagstatCollector
flagstatCollector.loadDefaultFunctions()
flagstatCollector.loadAditionalFunctions()
logger.info("Start reading file: " + commandArgs.inputFile)
for (record <- iterSam) {
if (flagstatCollector.readsCount % 1e6 == 0 && flagstatCollector.readsCount > 0)
logger.info("Reads prosessed: " + flagstatCollector.readsCount)
flagstatCollector.loadRecord(record)
}
commandArgs.summaryFile.foreach(file => flagstatCollector.writeSummaryTofile(file))
commandArgs.outputFile match {
case Some(file) => flagstatCollector.writeReportToFile(file)
case _ => println(flagstatCollector.report)
}
}
}
package nl.lumc.sasc.biopet.tools.flagstat
import java.io.{ File, PrintWriter }
import htsjdk.samtools.SAMRecord
import nl.lumc.sasc.biopet.utils.ConfigUtils
import scala.collection.mutable
/**
* Created by pjvan_thof on 21-7-16.
*/
class FlagstatCollector {
protected[FlagstatCollector] var functionCount = 0
var readsCount = 0
protected[FlagstatCollector] val names: mutable.Map[Int, String] = mutable.Map()
protected[FlagstatCollector] var functions: Array[SAMRecord => Boolean] = Array()
protected[FlagstatCollector] var totalCounts: Array[Long] = Array()
protected[FlagstatCollector] var crossCounts = Array.ofDim[Long](1, 1)
def loadDefaultFunctions() {
addFunction("All", record => true)
addFunction("Mapped", record => !record.getReadUnmappedFlag)
addFunction("Duplicates", record => record.getDuplicateReadFlag)
addFunction("FirstOfPair", record => if (record.getReadPairedFlag) record.getFirstOfPairFlag else false)
addFunction("SecondOfPair", record => if (record.getReadPairedFlag) record.getSecondOfPairFlag else false)
addFunction("ReadNegativeStrand", record => record.getReadNegativeStrandFlag)
addFunction("NotPrimaryAlignment", record => record.getNotPrimaryAlignmentFlag)
addFunction("ReadPaired", record => record.getReadPairedFlag)
addFunction("ProperPair", record => if (record.getReadPairedFlag) record.getProperPairFlag else false)
addFunction("MateNegativeStrand", record => if (record.getReadPairedFlag) record.getMateNegativeStrandFlag else false)
addFunction("MateUnmapped", record => if (record.getReadPairedFlag) record.getMateUnmappedFlag else false)
addFunction("ReadFailsVendorQualityCheck", record => record.getReadFailsVendorQualityCheckFlag)
addFunction("SupplementaryAlignment", record => record.getSupplementaryAlignmentFlag)
addFunction("SecondaryOrSupplementary", record => record.isSecondaryOrSupplementary)
}
def loadAditionalFunctions(m: Int = 10, max: Int = 60): Unit = {
for (t <- 0 to (max / m))
this.addFunction("MAPQ>" + (t * m), record => record.getMappingQuality > (t * m))
this.addFunction("First normal, second read inverted (paired end orientation)", record => {
if (record.getReadPairedFlag &&
record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag &&
((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
else false
})
this.addFunction("First normal, second read normal", record => {
if (record.getReadPairedFlag &&
record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag &&
((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
else false
})
this.addFunction("First inverted, second read inverted", record => {
if (record.getReadPairedFlag &&
record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag &&
((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
else false
})
this.addFunction("First inverted, second read normal", record => {
if (record.getReadPairedFlag &&
record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag &&
((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) ||
(record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true
else false
})
this.addFunction("Mate in same strand", record => record.getReadPairedFlag && record.getReadNegativeStrandFlag && record.getMateNegativeStrandFlag &&
record.getReferenceIndex == record.getMateReferenceIndex)
this.addFunction("Mate on other chr", record => record.getReadPairedFlag && record.getReferenceIndex != record.getMateReferenceIndex)
}
def loadRecord(record: SAMRecord) {
readsCount += 1
val values: Array[Boolean] = new Array(names.size)
for (t <- 0 until names.size) {
values(t) = functions(t)(record)
if (values(t)) {
totalCounts(t) += 1
}
}
for (t <- 0 until names.size) {
for (t2 <- 0 until names.size) {
if (values(t) && values(t2)) {
crossCounts(t)(t2) += 1
}
}
}
}
def addFunction(name: String, function: SAMRecord => Boolean) {
functionCount += 1
crossCounts = Array.ofDim[Long](functionCount, functionCount)
totalCounts = new Array[Long](functionCount)
val temp = new Array[SAMRecord => Boolean](functionCount)
for (t <- 0 until (temp.length - 1)) temp(t) = functions(t)
functions = temp
val index = functionCount - 1
names += (index -> name)
functions(index) = function
totalCounts(index) = 0
}
def report: String = {
val buffer = new StringBuilder
buffer.append("Number\tTotal Flags\tFraction\tName\n")
for (t <- 0 until names.size) {
val precentage = (totalCounts(t).toFloat / readsCount) * 100
buffer.append("#" + (t + 1) + "\t" + totalCounts(t) + "\t" + f"$precentage%.4f" + "%\t" + names(t) + "\n")
}
buffer.append("\n")
buffer.append(crossReport() + "\n")
buffer.append(crossReport(fraction = true) + "\n")
buffer.toString()
}
def writeReportToFile(outputFile: File): Unit = {
val writer = new PrintWriter(outputFile)
writer.println(report)
writer.close()
}
def summary: String = {
val map = (for (t <- 0 until names.size) yield {
names(t) -> totalCounts(t)
}).toMap ++ Map("Singletons" -> crossCounts(names.find(_._2 == "Mapped").map(_._1).getOrElse(-1))(names.find(_._2 == "MateUnmapped").map(_._1).getOrElse(-1))
)
ConfigUtils.mapToJson(map).spaces4
}
def writeSummaryTofile(outputFile: File): Unit = {
val writer = new PrintWriter(outputFile)
writer.println(summary)
writer.close()
}
def crossReport(fraction: Boolean = false): String = {
val buffer = new StringBuilder
for (t <- 0 until names.size) // Header for table
buffer.append("\t#" + (t + 1))
buffer.append("\n")
for (t <- 0 until names.size) {
buffer.append("#" + (t + 1) + "\t")
for (t2 <- 0 until names.size) {
val reads = crossCounts(t)(t2)
if (fraction) {
val precentage = (reads.toFloat / totalCounts(t).toFloat) * 100
buffer.append(f"$precentage%.4f" + "%")
} else buffer.append(reads)
if (t2 == names.size - 1) buffer.append("\n")
else buffer.append("\t")
}
}
buffer.toString()
}
def +=(other: FlagstatCollector): FlagstatCollector = {
require(this.names == other.names)
//require(this.functions == other.functions)
this.readsCount += other.readsCount
this.totalCounts.zipWithIndex.foreach { case (v, i) => this.totalCounts(i) += other.totalCounts(i) }
this.crossCounts.zipWithIndex.foreach {
case (v1, i1) => v1.zipWithIndex.foreach {
case (v2, i2) =>
this.crossCounts(i1)(i2) += other.crossCounts(i1)(i2)
}
}
this
}
override def equals(other: Any): Boolean = {
other match {
case f: FlagstatCollector => f.totalCounts.toList == this.totalCounts.toList
case _ => false
}
}
}
...@@ -5,8 +5,8 @@ import org.scalatest.testng.TestNGSuite ...@@ -5,8 +5,8 @@ import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test import org.testng.annotations.Test
/** /**
* Created by pjvan_thof on 19-7-16. * Created by pjvan_thof on 19-7-16.
*/ */
class StatsTest extends TestNGSuite with Matchers { class StatsTest extends TestNGSuite with Matchers {
@Test @Test
def testEqual(): Unit = { def testEqual(): Unit = {
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
* license; For commercial users or users who do not want to follow the AGPL * license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license. * license, please contact us to obtain a separate license.
*/ */
package nl.lumc.sasc.biopet.tools package nl.lumc.sasc.biopet.tools.flagstat
import java.io.File import java.io.File
import java.nio.file.Paths import java.nio.file.Paths
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment