From 8d49ccbf1a20f323bf4487c3d88d27da8674703b Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Wed, 6 Jul 2016 00:56:02 +0200
Subject: [PATCH] Added clipping stats

---
 .../sasc/biopet/tools/bamstats/BamStats.scala | 23 +++++++++++++++++--
 .../sasc/biopet/tools/bamstats/Stats.scala    | 10 ++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala
index b35670035..f8ed110e7 100644
--- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala
+++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala
@@ -2,7 +2,7 @@ package nl.lumc.sasc.biopet.tools.bamstats
 
 import java.io.File
 
-import htsjdk.samtools.{SAMSequenceDictionary, SamReaderFactory}
+import htsjdk.samtools.{CigarOperator, SAMSequenceDictionary, SamReaderFactory}
 import htsjdk.samtools.reference.FastaSequenceFile
 import nl.lumc.sasc.biopet.utils.BamUtils.SamDictCheck
 import nl.lumc.sasc.biopet.utils.intervals.{BedRecord, BedRecordList}
@@ -82,6 +82,11 @@ object BamStats extends ToolCommand {
 
     stats.mappingQualityHistogram.writeToTsv(new File(outputDir, "mapping_quality.tsv"))
     stats.insertSizeHistogram.writeToTsv(new File(outputDir, "insert_size.tsv"))
+    stats.clippingHistogram.writeToTsv(new File(outputDir, "clipping.tsv"))
+    stats.leftClippingHistogram.writeToTsv(new File(outputDir, "left_clipping.tsv"))
+    stats.rightClippingHistogram.writeToTsv(new File(outputDir, "right_clipping.tsv"))
+    stats._5_ClippingHistogram.writeToTsv(new File(outputDir, "5_prime_clipping.tsv"))
+    stats._3_ClippingHistogram.writeToTsv(new File(outputDir, "3_prime_clipping.tsv"))
   }
 
   def processContig(region: BedRecord, bamFile: File, binSize: Int, threadBinSize: Int): Stats = {
@@ -134,7 +139,21 @@ object BamStats extends ToolCommand {
         if (samRecord.getProperPairFlag && samRecord.getFirstOfPairFlag && !samRecord.getSecondOfPairFlag)
           totalStats.insertSizeHistogram.add(samRecord.getInferredInsertSize.abs)
 
-        //TODO: Clipping stats
+        val leftClipping = samRecord.getAlignmentStart - samRecord.getUnclippedStart
+        val rightClipping = samRecord.getUnclippedEnd - samRecord.getAlignmentEnd
+
+        totalStats.clippingHistogram.add(leftClipping + rightClipping)
+        totalStats.leftClippingHistogram.add(leftClipping)
+        totalStats.rightClippingHistogram.add(rightClipping)
+
+        if (samRecord.getReadNegativeStrandFlag) {
+          totalStats._5_ClippingHistogram.add(leftClipping)
+          totalStats._3_ClippingHistogram.add(rightClipping)
+        } else {
+          totalStats._5_ClippingHistogram.add(rightClipping)
+          totalStats._3_ClippingHistogram.add(leftClipping)
+        }
+
         //TODO: Bin Support
       }
 
diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala
index 4fdb3bb05..7dee85fd0 100644
--- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala
+++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala
@@ -14,12 +14,22 @@ case class Stats() {
   var secondary = 0L
   val mappingQualityHistogram = Histogram()
   val insertSizeHistogram = Histogram()
+  val clippingHistogram = Histogram()
+  val leftClippingHistogram = Histogram()
+  val rightClippingHistogram = Histogram()
+  val _5_ClippingHistogram = Histogram()
+  val _3_ClippingHistogram = Histogram()
 
   def +(other: Stats): Stats = {
     this.totalReads += other.totalReads
     this.unmapped += other.unmapped
     this.mappingQualityHistogram += other.mappingQualityHistogram
     this.insertSizeHistogram += other.insertSizeHistogram
+    this.clippingHistogram += other.clippingHistogram
+    this.leftClippingHistogram += other.leftClippingHistogram
+    this.rightClippingHistogram += other.rightClippingHistogram
+    this._5_ClippingHistogram += other._5_ClippingHistogram
+    this._3_ClippingHistogram += other._3_ClippingHistogram
     this
   }
 }
-- 
GitLab