Commit f887f0b7 authored by pjvan_thof's avatar pjvan_thof

Changed map to array

parent ea5a2be9
......@@ -23,13 +23,14 @@ import scala.collection.mutable
* @param sampleToSample Stores sample to sample compare stats
*/
case class SampleStats(genotypeStats: mutable.Map[String, mutable.Map[Any, Int]] = mutable.Map(),
sampleToSample: mutable.Map[String, SampleToSampleStats] = mutable.Map()) {
sampleToSample: Array[SampleToSampleStats] = Array()) {
/** Add an other class */
def +=(other: SampleStats): Unit = {
for ((key, value) <- other.sampleToSample) {
if (this.sampleToSample.contains(key)) this.sampleToSample(key) += value
else this.sampleToSample(key) = value
require(other.sampleToSample.size == this.sampleToSample.size)
val zipped = this.sampleToSample.zip(other.sampleToSample).zipWithIndex
for (((s1, s2), i) <- zipped) {
s1 += s2
}
for ((field, fieldMap) <- other.genotypeStats) {
val thisField = this.genotypeStats.get(field)
......
......@@ -31,7 +31,7 @@ import scala.sys.process.{Process, ProcessLogger}
* @param samplesStats Stores all sample/genotype specific stats
*/
case class Stats(generalStats: mutable.Map[String, mutable.Map[Any, Int]] = mutable.Map(),
samplesStats: mutable.Map[String, SampleStats] = mutable.Map()) {
samplesStats: mutable.Map[Int, SampleStats] = mutable.Map()) {
/** Add an other class */
def +=(other: Stats): Stats = {
......@@ -91,18 +91,18 @@ case class Stats(generalStats: mutable.Map[String, mutable.Map[Any, Int]] = muta
file.getParentFile.mkdirs()
val writer = new PrintWriter(file)
writer.println(samples.mkString(field + "\t", "\t", ""))
val keySet = (for (sample <- samples)
val keySet = (for ((sample, sampleIndex) <- samples.zipWithIndex)
yield
this
.samplesStats(sample)
.samplesStats(sampleIndex)
.genotypeStats
.getOrElse(field, Map[Any, Int]())
.keySet).fold(Set[Any]())(_ ++ _)
for (key <- keySet.toList.sortWith(sortAnyAny)) {
val values = for (sample <- samples)
val values = for ((sample, sampleIndex) <- samples.zipWithIndex)
yield
this
.samplesStats(sample)
.samplesStats(sampleIndex)
.genotypeStats
.getOrElse(field, Map[Any, Int]())
.getOrElse(key, 0)
......@@ -113,22 +113,22 @@ case class Stats(generalStats: mutable.Map[String, mutable.Map[Any, Int]] = muta
/** Function to write 1 specific genotype field */
def getGenotypeField(samples: List[String], field: String): Map[String, Map[String, Any]] = {
val keySet = (for (sample <- samples)
val keySet = (for ((sample, sampleIndex) <- samples.zipWithIndex)
yield
this
.samplesStats(sample)
.samplesStats(sampleIndex)
.genotypeStats
.getOrElse(field, Map[Any, Int]())
.keySet).fold(Set[Any]())(_ ++ _)
(for (sample <- samples)
(for ((sample, sampleIndex) <- samples.zipWithIndex)
yield
sample -> {
keySet
.map(
key =>
key.toString -> this
.samplesStats(sample)
.samplesStats(sampleIndex)
.genotypeStats
.getOrElse(field, Map[Any, Int]())
.get(key))
......@@ -142,19 +142,21 @@ case class Stats(generalStats: mutable.Map[String, mutable.Map[Any, Int]] = muta
genotypeFields: List[String] = Nil,
infoFields: List[String] = Nil,
sampleDistributions: List[String] = Nil): Map[String, Any] = {
val sampleIndex = samples.zipWithIndex
Map(
"genotype" -> genotypeFields.map(f => f -> getGenotypeField(samples, f)).toMap,
"info" -> infoFields.map(f => f -> getField(f)).toMap,
"sample_distributions" -> sampleDistributions
.map(f => f -> getField("SampleDistribution-" + f))
.toMap
) ++ Map(
.toMap,
"sample_compare" -> Map(
"samples" -> samples,
"genotype_overlap" -> samples.map(sample1 =>
samples.map(sample2 => samplesStats(sample1).sampleToSample(sample2).genotypeOverlap)),
"allele_overlap" -> samples.map(sample1 =>
samples.map(sample2 => samplesStats(sample1).sampleToSample(sample2).alleleOverlap))
"genotype_overlap" -> sampleIndex.map(sample1 =>
sampleIndex.map(sample2 =>
samplesStats(sample1._2).sampleToSample(sample2._2).genotypeOverlap)),
"allele_overlap" -> sampleIndex.map(sample1 =>
sampleIndex.map(sample2 =>
samplesStats(sample1._2).sampleToSample(sample2._2).alleleOverlap))
)
)
}
......@@ -218,14 +220,14 @@ case class Stats(generalStats: mutable.Map[String, mutable.Map[Any, Int]] = muta
absWriter.println(samples.mkString("\t", "\t", ""))
relWriter.println(samples.mkString("\t", "\t", ""))
for (sample1 <- samples) {
val values = for (sample2 <- samples)
yield function(this.samplesStats(sample1).sampleToSample(sample2))
for (sample1 <- samples.zipWithIndex) {
val values = for (sample2 <- samples.zipWithIndex)
yield function(this.samplesStats(sample1._2).sampleToSample(sample2._2))
absWriter.println(values.mkString(sample1 + "\t", "\t", ""))
absWriter.println(values.mkString(sample1._1 + "\t", "\t", ""))
val total = function(this.samplesStats(sample1).sampleToSample(sample1))
relWriter.println(values.map(_.toFloat / total).mkString(sample1 + "\t", "\t", ""))
val total = function(this.samplesStats(sample1._2).sampleToSample(sample1._2))
relWriter.println(values.map(_.toFloat / total).mkString(sample1._1 + "\t", "\t", ""))
}
absWriter.close()
relWriter.close()
......@@ -239,12 +241,11 @@ object Stats {
def emptyStats(samples: List[String]): Stats = {
val stats = new Stats
val sampleIndex = samples.zipWithIndex
//init stats
for (sample1 <- samples) {
stats.samplesStats += sample1 -> new SampleStats
for (sample2 <- samples) {
stats.samplesStats(sample1).sampleToSample += sample2 -> new SampleToSampleStats
}
for (sample1 <- sampleIndex) {
stats.samplesStats += sample1._2 -> SampleStats(
sampleToSample = Array.fill(samples.size)(new SampleToSampleStats))
}
stats
}
......
......@@ -149,23 +149,23 @@ object VcfStats extends ToolCommand {
reader.query(samInterval.getContig, samInterval.getStart, samInterval.getEnd)
if (!query.hasNext) {
Stats.mergeNestedStatsMap(stats.generalStats, fillGeneral(adInfoTags))
for (sample <- samples) yield {
Stats.mergeNestedStatsMap(stats.samplesStats(sample).genotypeStats,
for (sample <- samples.zipWithIndex) yield {
Stats.mergeNestedStatsMap(stats.samplesStats(sample._2).genotypeStats,
fillGenotype(adGenotypeTags))
}
}
for (record <- query if record.getStart <= samInterval.getEnd) {
Stats.mergeNestedStatsMap(stats.generalStats, checkGeneral(record, adInfoTags))
for (sample1 <- samples) yield {
val genotype = record.getGenotype(sample1)
Stats.mergeNestedStatsMap(stats.samplesStats(sample1).genotypeStats,
for (sample1 <- samples.zipWithIndex) yield {
val genotype = record.getGenotype(sample1._2)
Stats.mergeNestedStatsMap(stats.samplesStats(sample1._2).genotypeStats,
checkGenotype(record, genotype, adGenotypeTags))
for (sample2 <- samples) {
val genotype2 = record.getGenotype(sample2)
for (sample2 <- samples.zipWithIndex) {
val genotype2 = record.getGenotype(sample2._2)
if (genotype.getAlleles == genotype2.getAlleles)
stats.samplesStats(sample1).sampleToSample(sample2).genotypeOverlap += 1
stats.samplesStats(sample1).sampleToSample(sample2).alleleOverlap += VcfUtils
stats.samplesStats(sample1._2).sampleToSample(sample2._2).genotypeOverlap += 1
stats.samplesStats(sample1._2).sampleToSample(sample2._2).alleleOverlap += VcfUtils
.alleleOverlap(genotype.getAlleles.toList, genotype2.getAlleles.toList)
}
}
......
......@@ -71,16 +71,11 @@ class VcfStatsTest extends TestNGSuite with Matchers {
@Test
def testSampleStats(): Unit = {
val s1 = SampleStats()
val s2 = SampleStats()
val s1 = SampleStats(sampleToSample = Array.fill(2)(SampleToSampleStats()))
val s2 = SampleStats(sampleToSample = Array.fill(2)(SampleToSampleStats()))
s1.sampleToSample += "s1" -> SampleToSampleStats()
s1.sampleToSample += "s2" -> SampleToSampleStats()
s2.sampleToSample += "s1" -> SampleToSampleStats()
s2.sampleToSample += "s2" -> SampleToSampleStats()
s1.sampleToSample("s1").alleleOverlap = 1
s2.sampleToSample("s2").alleleOverlap = 2
s1.sampleToSample(0).alleleOverlap = 1
s2.sampleToSample(1).alleleOverlap = 2
s1.genotypeStats += "1" -> mutable.Map(1 -> 1)
s2.genotypeStats += "2" -> mutable.Map(2 -> 2)
......@@ -92,7 +87,7 @@ class VcfStatsTest extends TestNGSuite with Matchers {
s1.genotypeStats shouldBe mutable.Map("1" -> mutable.Map(1 -> 1), "2" -> mutable.Map(2 -> 2))
ss1.alleleOverlap = 1
ss2.alleleOverlap = 2
s1.sampleToSample shouldBe mutable.Map("s1" -> ss1, "s2" -> ss2)
s1.sampleToSample shouldBe Array(ss1, ss2)
s1 += s2
s1.genotypeStats shouldBe mutable.Map("1" -> mutable.Map(1 -> 1), "2" -> mutable.Map(2 -> 4))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment