Stats.scala 6.61 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2
package nl.lumc.sasc.biopet.tools.vcfstats

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import java.io.{ File, PrintWriter }
Peter van 't Hof's avatar
Peter van 't Hof committed
4

Peter van 't Hof's avatar
Peter van 't Hof committed
5 6
import scala.collection.mutable

Peter van 't Hof's avatar
Peter van 't Hof committed
7 8
import nl.lumc.sasc.biopet.utils.sortAnyAny

Peter van 't Hof's avatar
Peter van 't Hof committed
9
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
10 11 12 13 14
 * General stats class to store vcf stats
 *
 * @param generalStats Stores are general stats
 * @param samplesStats Stores all sample/genotype specific stats
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
case class Stats(generalStats: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map(),
                 samplesStats: mutable.Map[String, SampleStats] = mutable.Map()) {
  /** Add an other class */
  def +=(other: Stats): Stats = {
    for ((key, value) <- other.samplesStats) {
      if (this.samplesStats.contains(key)) this.samplesStats(key) += value
      else this.samplesStats(key) = value
    }
    for ((chr, chrMap) <- other.generalStats; (field, fieldMap) <- chrMap) {
      if (!this.generalStats.contains(chr)) generalStats += (chr -> mutable.Map[String, mutable.Map[Any, Int]]())
      val thisField = this.generalStats(chr).get(field)
      if (thisField.isDefined) Stats.mergeStatsMap(thisField.get, fieldMap)
      else this.generalStats(chr) += field -> fieldMap
    }
    this
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
31 32 33 34 35

  /** Function to write 1 specific general field */
  def writeField(field: String, outputDir: File, prefix: String = "", chr: String = "total"): File = {
    val file = (prefix, chr) match {
      case ("", "total") => new File(outputDir, field + ".tsv")
Peter van 't Hof's avatar
Peter van 't Hof committed
36 37 38
      case (_, "total")  => new File(outputDir, prefix + "-" + field + ".tsv")
      case ("", _)       => new File(outputDir, chr + "-" + field + ".tsv")
      case _             => new File(outputDir, prefix + "-" + chr + "-" + field + ".tsv")
Peter van 't Hof's avatar
Peter van 't Hof committed
39 40 41 42 43 44 45 46 47 48 49 50 51 52
    }

    val data = this.generalStats.getOrElse(chr, mutable.Map[String, mutable.Map[Any, Int]]()).getOrElse(field, mutable.Map[Any, Int]())

    file.getParentFile.mkdirs()
    val writer = new PrintWriter(file)
    writer.println("value\tcount")
    for (key <- data.keySet.toList.sortWith(sortAnyAny)) {
      writer.println(key + "\t" + data(key))
    }
    writer.close()
    file
  }

53
  /** Function to write 1 specific general field */
54
  def getField(field: String, chr: String = "total"): Map[String, Array[Any]] = {
55 56 57 58 59 60 61 62

    val data = this.generalStats.getOrElse(chr, mutable.Map[String, mutable.Map[Any, Int]]()).getOrElse(field, mutable.Map[Any, Int]())
    val rows = for (key <- data.keySet.toArray.sortWith(sortAnyAny)) yield {
      (key, data(key))
    }
    Map("value" -> rows.map(_._1), "count" -> rows.map(_._2))
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
63 64
  /** Function to write 1 specific genotype field */
  def writeGenotypeField(samples: List[String], field: String, outputDir: File,
65
                         prefix: String = "", chr: String = "total"): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
66 67
    val file = (prefix, chr) match {
      case ("", "total") => new File(outputDir, field + ".tsv")
Peter van 't Hof's avatar
Peter van 't Hof committed
68 69 70
      case (_, "total")  => new File(outputDir, prefix + "-" + field + ".tsv")
      case ("", _)       => new File(outputDir, chr + "-" + field + ".tsv")
      case _             => new File(outputDir, prefix + "-" + chr + "-" + field + ".tsv")
Peter van 't Hof's avatar
Peter van 't Hof committed
71 72 73 74 75 76 77 78 79 80 81 82 83
    }

    file.getParentFile.mkdirs()
    val writer = new PrintWriter(file)
    writer.println(samples.mkString(field + "\t", "\t", ""))
    val keySet = (for (sample <- samples) yield this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).keySet).fold(Set[Any]())(_ ++ _)
    for (key <- keySet.toList.sortWith(sortAnyAny)) {
      val values = for (sample <- samples) yield this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).getOrElse(key, 0)
      writer.println(values.mkString(key + "\t", "\t", ""))
    }
    writer.close()
  }

84
  /** Function to write 1 specific genotype field */
85
  def getGenotypeField(samples: List[String], field: String, chr: String = "total"): Map[String, Map[String, Any]] = {
86 87 88 89
    val keySet = (for (sample <- samples) yield this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).keySet).fold(Set[Any]())(_ ++ _)

    (for (sample <- samples) yield sample -> {
      keySet.map(key =>
90 91
        key.toString -> this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).get(key)
      ).filter(_._2.isDefined).toMap
92 93
    }).toMap
  }
94 95

  /** This will generate stats for one contig */
96 97
  def getContigStats(contig: String,
                     samples: List[String],
Peter van 't Hof's avatar
Peter van 't Hof committed
98
                     genotypeFields: List[String] = Nil,
99 100
                     infoFields: List[String] = Nil,
                     sampleDistributions: List[String] = Nil): Map[String, Any] = {
101 102
    Map(
      "genotype" -> genotypeFields.map(f => f -> getGenotypeField(samples, f, contig)).toMap,
103 104
      "info" -> infoFields.map(f => f -> getField(f, contig)).toMap,
      "sample_distributions" -> sampleDistributions.map(f => f -> getField("SampleDistribution-" + f, contig)).toMap
105 106 107 108
    )
  }

  /** This will generate stats for total */
109
  def getTotalStats(samples: List[String],
Peter van 't Hof's avatar
Peter van 't Hof committed
110
                    genotypeFields: List[String] = Nil,
111 112 113
                    infoFields: List[String] = Nil,
                    sampleDistributions: List[String] = Nil) =
    getContigStats("total", samples, genotypeFields, infoFields, sampleDistributions)
114 115

  /** This will generate stats for total and contigs separated */
116 117
  def getAllStats(contigs: List[String],
                  samples: List[String],
Peter van 't Hof's avatar
Peter van 't Hof committed
118
                  genotypeFields: List[String] = Nil,
119 120
                  infoFields: List[String] = Nil,
                  sampleDistributions: List[String] = Nil): Map[String, Any] = {
121
    Map(
122 123
      "contigs" -> contigs.map(c => c -> getContigStats(c, samples, genotypeFields, infoFields, sampleDistributions)).toMap,
      "total" -> getTotalStats(samples, genotypeFields, infoFields, sampleDistributions)
124 125
    )
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
}

object Stats {
  /** Merge m2 into m1 */
  def mergeStatsMap(m1: mutable.Map[Any, Int], m2: mutable.Map[Any, Int]): Unit = {
    for (key <- m2.keySet)
      m1(key) = m1.getOrElse(key, 0) + m2(key)
  }

  /** Merge m2 into m1 */
  def mergeNestedStatsMap(m1: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]],
                          m2: Map[String, Map[String, Map[Any, Int]]]): Unit = {
    for ((chr, chrMap) <- m2; (field, fieldMap) <- chrMap) {
      if (m1.contains(chr)) {
        if (m1(chr).contains(field)) {
          for ((key, value) <- fieldMap) {
            if (m1(chr)(field).contains(key)) m1(chr)(field)(key) += value
            else m1(chr)(field)(key) = value
          }
        } else m1(chr)(field) = mutable.Map(fieldMap.toList: _*)
      } else m1(chr) = mutable.Map(field -> mutable.Map(fieldMap.toList: _*))
    }
  }
}