Stats.scala 4.06 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.tools.vcfstats

Peter van 't Hof's avatar
Peter van 't Hof committed
3
4
import java.io.{File, PrintWriter}

Peter van 't Hof's avatar
Peter van 't Hof committed
5
6
import scala.collection.mutable

Peter van 't Hof's avatar
Peter van 't Hof committed
7
8
import nl.lumc.sasc.biopet.utils.sortAnyAny

Peter van 't Hof's avatar
Peter van 't Hof committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/**
  * General stats class to store vcf stats
  *
  * @param generalStats Stores are general stats
  * @param samplesStats Stores all sample/genotype specific stats
  */
case class Stats(generalStats: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map(),
                 samplesStats: mutable.Map[String, SampleStats] = mutable.Map()) {
  /** Add an other class */
  def +=(other: Stats): Stats = {
    for ((key, value) <- other.samplesStats) {
      if (this.samplesStats.contains(key)) this.samplesStats(key) += value
      else this.samplesStats(key) = value
    }
    for ((chr, chrMap) <- other.generalStats; (field, fieldMap) <- chrMap) {
      if (!this.generalStats.contains(chr)) generalStats += (chr -> mutable.Map[String, mutable.Map[Any, Int]]())
      val thisField = this.generalStats(chr).get(field)
      if (thisField.isDefined) Stats.mergeStatsMap(thisField.get, fieldMap)
      else this.generalStats(chr) += field -> fieldMap
    }
    this
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

  /** Function to write 1 specific general field */
  def writeField(field: String, outputDir: File, prefix: String = "", chr: String = "total"): File = {
    val file = (prefix, chr) match {
      case ("", "total") => new File(outputDir, field + ".tsv")
      case (_, "total")  => new File(outputDir, prefix + "-" + field + ".tsv")
      case ("", _)       => new File(outputDir, chr + "-" + field + ".tsv")
      case _             => new File(outputDir, prefix + "-" + chr + "-" + field + ".tsv")
    }

    val data = this.generalStats.getOrElse(chr, mutable.Map[String, mutable.Map[Any, Int]]()).getOrElse(field, mutable.Map[Any, Int]())

    file.getParentFile.mkdirs()
    val writer = new PrintWriter(file)
    writer.println("value\tcount")
    for (key <- data.keySet.toList.sortWith(sortAnyAny)) {
      writer.println(key + "\t" + data(key))
    }
    writer.close()
    file
  }

  /** Function to write 1 specific genotype field */
  def writeGenotypeField(samples: List[String], field: String, outputDir: File,
                                   prefix: String = "", chr: String = "total"): Unit = {
    val file = (prefix, chr) match {
      case ("", "total") => new File(outputDir, field + ".tsv")
      case (_, "total")  => new File(outputDir, prefix + "-" + field + ".tsv")
      case ("", _)       => new File(outputDir, chr + "-" + field + ".tsv")
      case _             => new File(outputDir, prefix + "-" + chr + "-" + field + ".tsv")
    }

    file.getParentFile.mkdirs()
    val writer = new PrintWriter(file)
    writer.println(samples.mkString(field + "\t", "\t", ""))
    val keySet = (for (sample <- samples) yield this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).keySet).fold(Set[Any]())(_ ++ _)
    for (key <- keySet.toList.sortWith(sortAnyAny)) {
      val values = for (sample <- samples) yield this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).getOrElse(key, 0)
      writer.println(values.mkString(key + "\t", "\t", ""))
    }
    writer.close()
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
}

object Stats {
  /** Merge m2 into m1 */
  def mergeStatsMap(m1: mutable.Map[Any, Int], m2: mutable.Map[Any, Int]): Unit = {
    for (key <- m2.keySet)
      m1(key) = m1.getOrElse(key, 0) + m2(key)
  }

  /** Merge m2 into m1 */
  def mergeNestedStatsMap(m1: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]],
                          m2: Map[String, Map[String, Map[Any, Int]]]): Unit = {
    for ((chr, chrMap) <- m2; (field, fieldMap) <- chrMap) {
      if (m1.contains(chr)) {
        if (m1(chr).contains(field)) {
          for ((key, value) <- fieldMap) {
            if (m1(chr)(field).contains(key)) m1(chr)(field)(key) += value
            else m1(chr)(field)(key) = value
          }
        } else m1(chr)(field) = mutable.Map(fieldMap.toList: _*)
      } else m1(chr) = mutable.Map(field -> mutable.Map(fieldMap.toList: _*))
    }
  }
}