Commit 299875bb authored by Peter van 't Hof's avatar Peter van 't Hof

Adding methods to agregate stats

parent ef03063d
......@@ -51,7 +51,7 @@ case class Stats(generalStats: mutable.Map[String, mutable.Map[String, mutable.M
}
/** Function to write 1 specific general field */
def writeField2(field: String, chr: String = "total"): Map[String, Array[Any]] = {
def getField(field: String, chr: String = "total"): Map[String, Array[Any]] = {
val data = this.generalStats.getOrElse(chr, mutable.Map[String, mutable.Map[Any, Int]]()).getOrElse(field, mutable.Map[Any, Int]())
val rows = for (key <- data.keySet.toArray.sortWith(sortAnyAny)) yield {
......@@ -82,7 +82,7 @@ case class Stats(generalStats: mutable.Map[String, mutable.Map[String, mutable.M
}
/** Function to write 1 specific genotype field */
def writeGenotypeField2(samples: List[String], field: String, chr: String = "total"): Map[String, Map[String, Any]] = {
def getGenotypeField(samples: List[String], field: String, chr: String = "total"): Map[String, Map[String, Any]] = {
val keySet = (for (sample <- samples) yield this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).keySet).fold(Set[Any]())(_ ++ _)
(for (sample <- samples) yield sample -> {
......@@ -91,6 +91,26 @@ case class Stats(generalStats: mutable.Map[String, mutable.Map[String, mutable.M
).toMap
}).toMap
}
/** This will generate stats for one contig */
def getContigStats(contig: String, samples: List[String], genotypeFields: List[String], infoFields: List[String]): Map[String, Any] = {
Map(
"genotype" -> genotypeFields.map(f => f -> getGenotypeField(samples, f, contig)).toMap,
"info" -> infoFields.map(f => f -> getField(f, contig))
)
}
/** This will generate stats for total */
def getTotalStats(samples: List[String], genotypeFields: List[String], infoFields: List[String]) =
getContigStats("total", samples, genotypeFields, infoFields)
/** This will generate stats for total and contigs separated */
def getAllStats(contigs: List[String], samples: List[String], genotypeFields: List[String], infoFields: List[String]): Map[String, Any] = {
Map(
"contigs" -> contigs.map(c => c -> getContigStats(c, samples, genotypeFields, infoFields)).toMap,
"total" -> getTotalStats(samples, genotypeFields, infoFields)
)
}
}
object Stats {
......
......@@ -195,7 +195,7 @@ object VcfStats extends ToolCommand {
}
// Triple for loop to not keep all bins in memory
val statsFutures = (for (intervals <- Random.shuffle(intervals).grouped(intervals.size / (if (intervals.size > 10) 4 else 1)).toList) yield Future {
val statsFutures = for (intervals <- Random.shuffle(intervals).grouped(intervals.size / (if (intervals.size > 10) 4 else 1)).toList) yield Future {
val chunkStats = for (intervals <- intervals.grouped(25)) yield {
val binStats = for (interval <- intervals.par) yield {
val reader = new VCFFileReader(cmdArgs.inputFile, true)
......@@ -243,7 +243,7 @@ object VcfStats extends ToolCommand {
binStats.toList.fold(createStats)(_ += _)
}
chunkStats.toList.fold(createStats)(_ += _)
})
}
val stats = statsFutures.foldLeft(createStats) { case (a,b) => a += Await.result(b, Duration.Inf) }
logger.info("Done reading vcf records")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment