From 8d49ccbf1a20f323bf4487c3d88d27da8674703b Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Wed, 6 Jul 2016 00:56:02 +0200 Subject: [PATCH] Added clipping stats --- .../sasc/biopet/tools/bamstats/BamStats.scala | 23 +++++++++++++++++-- .../sasc/biopet/tools/bamstats/Stats.scala | 10 ++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala index b35670035..f8ed110e7 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala @@ -2,7 +2,7 @@ package nl.lumc.sasc.biopet.tools.bamstats import java.io.File -import htsjdk.samtools.{SAMSequenceDictionary, SamReaderFactory} +import htsjdk.samtools.{CigarOperator, SAMSequenceDictionary, SamReaderFactory} import htsjdk.samtools.reference.FastaSequenceFile import nl.lumc.sasc.biopet.utils.BamUtils.SamDictCheck import nl.lumc.sasc.biopet.utils.intervals.{BedRecord, BedRecordList} @@ -82,6 +82,11 @@ object BamStats extends ToolCommand { stats.mappingQualityHistogram.writeToTsv(new File(outputDir, "mapping_quality.tsv")) stats.insertSizeHistogram.writeToTsv(new File(outputDir, "insert_size.tsv")) + stats.clippingHistogram.writeToTsv(new File(outputDir, "clipping.tsv")) + stats.leftClippingHistogram.writeToTsv(new File(outputDir, "left_clipping.tsv")) + stats.rightClippingHistogram.writeToTsv(new File(outputDir, "right_clipping.tsv")) + stats._5_ClippingHistogram.writeToTsv(new File(outputDir, "5_prime_clipping.tsv")) + stats._3_ClippingHistogram.writeToTsv(new File(outputDir, "3_prime_clipping.tsv")) } def processContig(region: BedRecord, bamFile: File, binSize: Int, threadBinSize: Int): Stats = { @@ -134,7 +139,21 @@ object BamStats extends ToolCommand { if (samRecord.getProperPairFlag && samRecord.getFirstOfPairFlag && !samRecord.getSecondOfPairFlag) totalStats.insertSizeHistogram.add(samRecord.getInferredInsertSize.abs) - //TODO: Clipping stats + val leftClipping = samRecord.getAlignmentStart - samRecord.getUnclippedStart + val rightClipping = samRecord.getUnclippedEnd - samRecord.getAlignmentEnd + + totalStats.clippingHistogram.add(leftClipping + rightClipping) + totalStats.leftClippingHistogram.add(leftClipping) + totalStats.rightClippingHistogram.add(rightClipping) + + if (samRecord.getReadNegativeStrandFlag) { + totalStats._5_ClippingHistogram.add(leftClipping) + totalStats._3_ClippingHistogram.add(rightClipping) + } else { + totalStats._5_ClippingHistogram.add(rightClipping) + totalStats._3_ClippingHistogram.add(leftClipping) + } + //TODO: Bin Support } diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala index 4fdb3bb05..7dee85fd0 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala @@ -14,12 +14,22 @@ case class Stats() { var secondary = 0L val mappingQualityHistogram = Histogram() val insertSizeHistogram = Histogram() + val clippingHistogram = Histogram() + val leftClippingHistogram = Histogram() + val rightClippingHistogram = Histogram() + val _5_ClippingHistogram = Histogram() + val _3_ClippingHistogram = Histogram() def +(other: Stats): Stats = { this.totalReads += other.totalReads this.unmapped += other.unmapped this.mappingQualityHistogram += other.mappingQualityHistogram this.insertSizeHistogram += other.insertSizeHistogram + this.clippingHistogram += other.clippingHistogram + this.leftClippingHistogram += other.leftClippingHistogram + this.rightClippingHistogram += other.rightClippingHistogram + this._5_ClippingHistogram += other._5_ClippingHistogram + this._3_ClippingHistogram += other._3_ClippingHistogram this } } -- GitLab