Histogram.scala 4.1 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
package nl.lumc.sasc.biopet.tools.bamstats

Peter van 't Hof's avatar
WIP    
Peter van 't Hof committed
17
import java.io.{File, IOException, PrintWriter}
Peter van 't Hof's avatar
Peter van 't Hof committed
18
19

import nl.lumc.sasc.biopet.utils.rscript.LinePlot
Peter van 't Hof's avatar
WIP    
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.{Logging, sortAnyAny}
Peter van 't Hof's avatar
Peter van 't Hof committed
21
22

import scala.collection.mutable
Peter van 't Hof's avatar
WIP    
Peter van 't Hof committed
23
24
import scala.concurrent.ExecutionContext
import scala.concurrent.ExecutionContext.Implicits.global
Peter van 't Hof's avatar
Peter van 't Hof committed
25
26

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
27
28
 * Created by pjvanthof on 05/07/16.
 */
29
30
31
32
33
34
35
36
class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T]) {
  protected[Counts] val counts: mutable.Map[T, Long] = mutable.Map() ++ _counts

  /** Returns histogram as map */
  def countsMap = counts.toMap

  /** Returns value if it does exist */
  def get(key: T) = counts.get(key)
Peter van 't Hof's avatar
Peter van 't Hof committed
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  /** This will add an other histogram to `this` */
Peter van 't Hof's avatar
Peter van 't Hof committed
39
40
  def +=(other: Counts[T]): Counts[T] = {
    other.counts.foreach(x => this.counts += x._1 -> (this.counts.getOrElse(x._1, 0L) + x._2))
Peter van 't Hof's avatar
Peter van 't Hof committed
41
42
43
    this
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
44
  /** With this a value can be added to the histogram */
Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
  def add(value: T): Unit = {
    counts += value -> (counts.getOrElse(value, 0L) + 1)
Peter van 't Hof's avatar
Peter van 't Hof committed
47
48
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  /** Write histogram to a tsv/count file */
Peter van 't Hof's avatar
Peter van 't Hof committed
50
  def writeHistogramToTsv(file: File): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
51
52
    val writer = new PrintWriter(file)
    writer.println("value\tcount")
Peter van 't Hof's avatar
Peter van 't Hof committed
53
    counts.keys.toList.sorted.foreach(x => writer.println(s"$x\t${counts(x)}"))
Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
    writer.close()
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
59
60
61
  def toSummaryMap = {
    val values = counts.keySet.toList.sortWith(sortAnyAny)
    Map("values" -> values, "counts" -> values.map(counts(_)))
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
62
63
64
65
66
67
  override def equals(other: Any): Boolean = {
    other match {
      case c: Counts[T] => this.counts == c.counts
      case _            => false
    }
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
68
}
Peter van 't Hof's avatar
Peter van 't Hof committed
69

70
class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric[T]) extends Counts[T](_counts) {
Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
  def aggregateStats: Map[String, Any] = {
    val values = this.counts.keys.toList
    val counts = this.counts.values.toList
    require(values.size == counts.size)
    if (values.nonEmpty) {
      val modal = values(counts.indexOf(counts.max))
      val totalCounts = counts.sum
      val mean: Double = values.zip(counts).map(x => ord.toDouble(x._1) * x._2).sum / totalCounts
      val median = values(values.zip(counts).zipWithIndex.sortBy(_._1._1).foldLeft((0L, 0)) {
        case (a, b) =>
          val total = a._1 + b._1._2
          if (total >= totalCounts / 2) (total, a._2)
          else (total, b._2)
      }._2)
      Map("min" -> values.min, "max" -> values.max, "median" -> median, "mean" -> mean, "modal" -> modal)
    } else Map()
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
88

89
90
91
92
93
94
95
  /** Write histogram to a tsv/count file */
  def writeAggregateToTsv(file: File): Unit = {
    val writer = new PrintWriter(file)
    aggregateStats.foreach(x => writer.println(x._1 + "\t" + x._2))
    writer.close()
  }

Peter van 't Hof's avatar
WIP    
Peter van 't Hof committed
96
  def writeFilesAndPlot(outputDir: File, prefix: String, xlabel: String, ylabel: String, title: String)(implicit ec: ExecutionContext): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
97
98
99
100
101
102
103
104
    writeHistogramToTsv(new File(outputDir, prefix + ".histogram.tsv"))
    writeAggregateToTsv(new File(outputDir, prefix + ".stats.tsv"))
    val plot = new LinePlot(null)
    plot.input = new File(outputDir, prefix + ".histogram.tsv")
    plot.output = new File(outputDir, prefix + ".histogram.png")
    plot.xlabel = Some(xlabel)
    plot.ylabel = Some(ylabel)
    plot.title = Some(title)
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
107
108
109
110
    try {
      plot.runLocal()
    } catch {
      // If plotting fails the tools should not fail, this depens on R to be installed
      case e: IOException => Logging.logger.warn(s"Error found while plotting ${plot.output}: ${e.getMessage}")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
111
112
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
113
}