diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala index b3567003581188aeb1dff8296e6fd07039875769..f8ed110e7d129dfa78d71a954fad72654aa54eda 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala @@ -2,7 +2,7 @@ package nl.lumc.sasc.biopet.tools.bamstats import java.io.File -import htsjdk.samtools.{SAMSequenceDictionary, SamReaderFactory} +import htsjdk.samtools.{CigarOperator, SAMSequenceDictionary, SamReaderFactory} import htsjdk.samtools.reference.FastaSequenceFile import nl.lumc.sasc.biopet.utils.BamUtils.SamDictCheck import nl.lumc.sasc.biopet.utils.intervals.{BedRecord, BedRecordList} @@ -82,6 +82,11 @@ object BamStats extends ToolCommand { stats.mappingQualityHistogram.writeToTsv(new File(outputDir, "mapping_quality.tsv")) stats.insertSizeHistogram.writeToTsv(new File(outputDir, "insert_size.tsv")) + stats.clippingHistogram.writeToTsv(new File(outputDir, "clipping.tsv")) + stats.leftClippingHistogram.writeToTsv(new File(outputDir, "left_clipping.tsv")) + stats.rightClippingHistogram.writeToTsv(new File(outputDir, "right_clipping.tsv")) + stats._5_ClippingHistogram.writeToTsv(new File(outputDir, "5_prime_clipping.tsv")) + stats._3_ClippingHistogram.writeToTsv(new File(outputDir, "3_prime_clipping.tsv")) } def processContig(region: BedRecord, bamFile: File, binSize: Int, threadBinSize: Int): Stats = { @@ -134,7 +139,21 @@ object BamStats extends ToolCommand { if (samRecord.getProperPairFlag && samRecord.getFirstOfPairFlag && !samRecord.getSecondOfPairFlag) totalStats.insertSizeHistogram.add(samRecord.getInferredInsertSize.abs) - //TODO: Clipping stats + val leftClipping = samRecord.getAlignmentStart - samRecord.getUnclippedStart + val rightClipping = samRecord.getUnclippedEnd - samRecord.getAlignmentEnd + + totalStats.clippingHistogram.add(leftClipping + rightClipping) + totalStats.leftClippingHistogram.add(leftClipping) + totalStats.rightClippingHistogram.add(rightClipping) + + if (samRecord.getReadNegativeStrandFlag) { + totalStats._5_ClippingHistogram.add(leftClipping) + totalStats._3_ClippingHistogram.add(rightClipping) + } else { + totalStats._5_ClippingHistogram.add(rightClipping) + totalStats._3_ClippingHistogram.add(leftClipping) + } + //TODO: Bin Support } diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala index 4fdb3bb058a351d76f0e51626586b5fd5027ab8a..7dee85fd0c7ca73feec5b63a4cc93451a004c917 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala @@ -14,12 +14,22 @@ case class Stats() { var secondary = 0L val mappingQualityHistogram = Histogram() val insertSizeHistogram = Histogram() + val clippingHistogram = Histogram() + val leftClippingHistogram = Histogram() + val rightClippingHistogram = Histogram() + val _5_ClippingHistogram = Histogram() + val _3_ClippingHistogram = Histogram() def +(other: Stats): Stats = { this.totalReads += other.totalReads this.unmapped += other.unmapped this.mappingQualityHistogram += other.mappingQualityHistogram this.insertSizeHistogram += other.insertSizeHistogram + this.clippingHistogram += other.clippingHistogram + this.leftClippingHistogram += other.leftClippingHistogram + this.rightClippingHistogram += other.rightClippingHistogram + this._5_ClippingHistogram += other._5_ClippingHistogram + this._3_ClippingHistogram += other._3_ClippingHistogram this } }