Commit d36d3bd4 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge remote-tracking branch 'remotes/origin/develop' into fix-BIOPET-500

parents 36586d37 c779f1f0
...@@ -208,8 +208,8 @@ object BammetricsReport extends ReportBuilder { ...@@ -208,8 +208,8 @@ object BammetricsReport extends ReportBuilder {
val pngFile = new File(outputDir, prefix + ".png") val pngFile = new File(outputDir, prefix + ".png")
def paths(name: String) = Map( def paths(name: String) = Map(
"mapping_quality" -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "value"), "mapping_quality" -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "values"),
name -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "count") name -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "counts")
) )
val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel) val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
...@@ -236,8 +236,8 @@ object BammetricsReport extends ReportBuilder { ...@@ -236,8 +236,8 @@ object BammetricsReport extends ReportBuilder {
val pngFile = new File(outputDir, prefix + ".png") val pngFile = new File(outputDir, prefix + ".png")
def paths(name: String) = Map( def paths(name: String) = Map(
"clipping" -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "value"), "clipping" -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "values"),
name -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "count") name -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "counts")
) )
val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel) val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
......
...@@ -40,17 +40,11 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc ...@@ -40,17 +40,11 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc
private var outputFiles: List[File] = Nil private var outputFiles: List[File] = Nil
def bamstatsSummary: File = new File(outputDir, "bamstats.summary.json") def bamstatsSummary: File = new File(outputDir, "bamstats.summary.json")
def flagstatSummaryFile(contig: Option[String] = None): File = getOutputFile("flagstats.summary.json", contig)
def mappingQualityFile(contig: Option[String] = None): File = getOutputFile("mapping_quality.tsv", contig)
def clipingFile(contig: Option[String] = None): File = getOutputFile("clipping.tsv", contig)
override def beforeGraph() { override def beforeGraph() {
super.beforeGraph() super.beforeGraph()
deps :+= new File(bamFile.getAbsolutePath.replaceAll(".bam$", ".bai")) deps :+= new File(bamFile.getAbsolutePath.replaceAll(".bam$", ".bai"))
outputFiles :+= bamstatsSummary outputFiles :+= bamstatsSummary
outputFiles :+= flagstatSummaryFile()
outputFiles :+= mappingQualityFile()
outputFiles :+= clipingFile()
jobOutputFile = new File(outputDir, ".bamstats.out") jobOutputFile = new File(outputDir, ".bamstats.out")
if (reference == null) reference = referenceFasta() if (reference == null) reference = referenceFasta()
} }
......
...@@ -18,8 +18,9 @@ import java.io.File ...@@ -18,8 +18,9 @@ import java.io.File
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript } import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.core.{ Reference, ToolCommandFunction } import nl.lumc.sasc.biopet.core.{ Reference, ToolCommandFunction }
import nl.lumc.sasc.biopet.tools.vcfstats.VcfStats
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.tryToParseNumber import nl.lumc.sasc.biopet.utils.{ ConfigUtils, tryToParseNumber }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.io.Source import scala.io.Source
...@@ -30,7 +31,7 @@ import scala.io.Source ...@@ -30,7 +31,7 @@ import scala.io.Source
* Created by pjvan_thof on 1/10/15. * Created by pjvan_thof on 1/10/15.
*/ */
class VcfStats(val root: Configurable) extends ToolCommandFunction with Summarizable with Reference { class VcfStats(val root: Configurable) extends ToolCommandFunction with Summarizable with Reference {
def toolObject = nl.lumc.sasc.biopet.tools.VcfStats def toolObject = VcfStats
mainFunction = false mainFunction = false
...@@ -41,10 +42,7 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz ...@@ -41,10 +42,7 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz
protected var index: File = null protected var index: File = null
@Output @Output
protected var generalStats: File = null protected var statsFile: File = null
@Output
protected var genotypeStats: File = null
override def defaultCoreMemory = 3.0 override def defaultCoreMemory = 3.0
override def defaultThreads = 3 override def defaultThreads = 3
...@@ -66,8 +64,7 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz ...@@ -66,8 +64,7 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz
/** Set output dir and a output file */ /** Set output dir and a output file */
def setOutputDir(dir: File): Unit = { def setOutputDir(dir: File): Unit = {
outputDir = dir outputDir = dir
generalStats = new File(dir, "general.tsv") statsFile = new File(dir, "stats.json")
genotypeStats = new File(dir, "genotype-general.tsv")
jobOutputFile = new File(dir, ".vcfstats.out") jobOutputFile = new File(dir, ".vcfstats.out")
} }
...@@ -83,35 +80,10 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz ...@@ -83,35 +80,10 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz
optional("--intervals", intervals) optional("--intervals", intervals)
/** Returns general stats to the summary */ /** Returns general stats to the summary */
def summaryStats: Map[String, Any] = { def summaryStats: Map[String, Any] = ConfigUtils.fileToConfigMap(statsFile)
Map("info" -> (for (
line <- Source.fromFile(generalStats).getLines().toList.tail;
values = line.split("\t") if values.size >= 2 && !values(0).isEmpty
) yield values(0) -> tryToParseNumber(values(1)).getOrElse(None)
).toMap)
}
/** return only general files to summary */ /** return only general files to summary */
def summaryFiles: Map[String, File] = Map( def summaryFiles: Map[String, File] = Map(
"general_stats" -> generalStats, "stats" -> statsFile
"genotype_stats" -> genotypeStats
) )
override def addToQscriptSummary(qscript: SummaryQScript, name: String): Unit = {
val data = Source.fromFile(genotypeStats).getLines().map(_.split("\t")).toArray
for (s <- 1 until data(0).size) {
val sample = data(0)(s)
val stats = Map("genotype" -> (for (f <- 1 until data.length) yield {
data(f)(0) -> tryToParseNumber(data(f)(s)).getOrElse(None)
}).toMap)
val sum = new Summarizable {
override def summaryFiles: Map[String, File] = Map()
override def summaryStats: Map[String, Any] = stats
}
qscript.addSummarizable(sum, name, Some(sample))
}
}
} }
...@@ -47,7 +47,7 @@ object BiopetToolsExecutable extends BiopetExecutable { ...@@ -47,7 +47,7 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.ValidateFastq, nl.lumc.sasc.biopet.tools.ValidateFastq,
nl.lumc.sasc.biopet.tools.ValidateVcf, nl.lumc.sasc.biopet.tools.ValidateVcf,
nl.lumc.sasc.biopet.tools.VcfFilter, nl.lumc.sasc.biopet.tools.VcfFilter,
nl.lumc.sasc.biopet.tools.VcfStats, nl.lumc.sasc.biopet.tools.vcfstats.VcfStats,
nl.lumc.sasc.biopet.tools.VcfToTsv, nl.lumc.sasc.biopet.tools.VcfToTsv,
nl.lumc.sasc.biopet.tools.VcfWithVcf, nl.lumc.sasc.biopet.tools.VcfWithVcf,
nl.lumc.sasc.biopet.tools.VepNormalizer, nl.lumc.sasc.biopet.tools.VepNormalizer,
......
...@@ -319,6 +319,11 @@ object VcfFilter extends ToolCommand { ...@@ -319,6 +319,11 @@ object VcfFilter extends ToolCommand {
* @return true if filter passed * @return true if filter passed
*/ */
def mustHaveVariant(record: VariantContext, samples: List[String]): Boolean = { def mustHaveVariant(record: VariantContext, samples: List[String]): Boolean = {
samples.foreach { s =>
if (!record.getSampleNames.toList.contains(s)) {
throw new IllegalArgumentException(s"Sample name $s does not exist in VCF file")
}
}
!samples.map(record.getGenotype).exists(a => a.isHomRef || a.isNoCall || VcfUtils.isCompoundNoCall(a)) !samples.map(record.getGenotype).exists(a => a.isHomRef || a.isNoCall || VcfUtils.isCompoundNoCall(a))
} }
......
...@@ -98,50 +98,25 @@ object BamStats extends ToolCommand { ...@@ -98,50 +98,25 @@ object BamStats extends ToolCommand {
*/ */
def init(outputDir: File, bamFile: File, referenceDict: SAMSequenceDictionary, binSize: Int, threadBinSize: Int): Unit = { def init(outputDir: File, bamFile: File, referenceDict: SAMSequenceDictionary, binSize: Int, threadBinSize: Int): Unit = {
val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig => val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig =>
processContig(contig, bamFile, binSize, threadBinSize, outputDir) contig.chr -> processContig(contig, bamFile, binSize, threadBinSize, outputDir)
} }.toList
val stats = waitOnFutures(processUnmappedReads(bamFile) :: contigsFutures.toList)
stats.writeStatsToFiles(outputDir)
val clippingHistogram = tsvToMap(new File(outputDir, "clipping.tsv")) val stats = waitOnFutures(processUnmappedReads(bamFile) :: contigsFutures.map(_._2))
val mappingQualityHistogram = tsvToMap(new File(outputDir, "mapping_quality.tsv"))
val summary = Map( val statsWriter = new PrintWriter(new File(outputDir, "bamstats.json"))
"flagstats" -> ConfigUtils.fileToConfigMap(new File(outputDir, "flagstats.summary.json")), val totalStats = stats.toSummaryMap
"flagstats_per_contig" -> referenceDict.getSequences.map { val statsMap = Map(
c => "total" -> totalStats,
c.getSequenceName -> ConfigUtils.fileToConfigMap( "contigs" -> contigsFutures.map(x => x._1 -> Await.result(x._2, Duration.Zero).toSummaryMap).toMap
new File(outputDir, "contigs" + File.separator + c.getSequenceName + File.separator + "flagstats.summary.json"))
}.toMap,
"mapping_quality" -> Map("general" -> aggregateStats(mappingQualityHistogram), "histogram" -> mappingQualityHistogram),
"clipping" -> Map("general" -> aggregateStats(clippingHistogram), "histogram" -> clippingHistogram)
) )
statsWriter.println(ConfigUtils.mapToJson(statsMap).nospaces)
statsWriter.close()
val summaryWriter = new PrintWriter(new File(outputDir, "bamstats.summary.json")) val summaryWriter = new PrintWriter(new File(outputDir, "bamstats.summary.json"))
summaryWriter.println(ConfigUtils.mapToJson(summary).spaces2) summaryWriter.println(ConfigUtils.mapToJson(totalStats).nospaces)
summaryWriter.close() summaryWriter.close()
} }
def aggregateStats(table: Map[String, Array[Long]]): Map[String, Any] = {
val values = table("value")
val counts = table("count")
require(values.size == counts.size)
if (values.nonEmpty) {
val modal = values(counts.indexOf(counts.max))
val totalCounts = counts.sum
val mean: Double = values.zip(counts).map(x => x._1 * x._2).sum.toDouble / totalCounts
val median: Long = values(values.zip(counts).zipWithIndex.sortBy(_._1._1).foldLeft((0L, 0)) {
case (a, b) =>
val total = a._1 + b._1._2
if (total >= totalCounts / 2) (total, a._2)
else (total, b._2)
}._2)
Map("min" -> values.min, "max" -> values.max, "median" -> median, "mean" -> mean, "modal" -> modal)
} else Map()
}
/** /**
* This will start the subjobs for each contig and collect [[Stats]] on contig level * This will start the subjobs for each contig and collect [[Stats]] on contig level
* *
...@@ -157,11 +132,7 @@ object BamStats extends ToolCommand { ...@@ -157,11 +132,7 @@ object BamStats extends ToolCommand {
.grouped((region.length.toDouble / binSize).ceil.toInt / (region.length.toDouble / threadBinSize).ceil.toInt) .grouped((region.length.toDouble / binSize).ceil.toInt / (region.length.toDouble / threadBinSize).ceil.toInt)
.map(scatters => processThread(scatters, bamFile)) .map(scatters => processThread(scatters, bamFile))
.toList .toList
val stats = waitOnFutures(scattersFutures, Some(region.chr)) waitOnFutures(scattersFutures, Some(region.chr))
val contigDir = new File(outputDir, "contigs" + File.separator + region.chr)
contigDir.mkdirs()
stats.writeStatsToFiles(contigDir)
stats
} }
/** /**
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
package nl.lumc.sasc.biopet.tools.bamstats package nl.lumc.sasc.biopet.tools.bamstats
import java.io.{ File, PrintWriter } import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.utils.sortAnyAny
import scala.collection.mutable import scala.collection.mutable
...@@ -49,6 +50,11 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T ...@@ -49,6 +50,11 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T
writer.close() writer.close()
} }
def toSummaryMap = {
val values = counts.keySet.toList.sortWith(sortAnyAny)
Map("values" -> values, "counts" -> values.map(counts(_)))
}
override def equals(other: Any): Boolean = { override def equals(other: Any): Boolean = {
other match { other match {
case c: Counts[T] => this.counts == c.counts case c: Counts[T] => this.counts == c.counts
...@@ -58,5 +64,22 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T ...@@ -58,5 +64,22 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T
} }
class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric[T]) extends Counts[T](_counts) { class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric[T]) extends Counts[T](_counts) {
def aggregateStats: Map[String, Any] = {
val values = this.counts.keys.toList
val counts = this.counts.values.toList
require(values.size == counts.size)
if (values.nonEmpty) {
val modal = values(counts.indexOf(counts.max))
val totalCounts = counts.sum
val mean: Double = values.zip(counts).map(x => ord.toDouble(x._1) * x._2).sum / totalCounts
val median = values(values.zip(counts).zipWithIndex.sortBy(_._1._1).foldLeft((0L, 0)) {
case (a, b) =>
val total = a._1 + b._1._2
if (total >= totalCounts / 2) (total, a._2)
else (total, b._2)
}._2)
Map("min" -> values.min, "max" -> values.max, "median" -> median, "mean" -> mean, "modal" -> modal)
} else Map()
}
} }
...@@ -58,4 +58,17 @@ case class Stats(flagstat: FlagstatCollector = new FlagstatCollector(), ...@@ -58,4 +58,17 @@ case class Stats(flagstat: FlagstatCollector = new FlagstatCollector(),
this._5_ClippingHistogram.writeToTsv(new File(outputDir, "5_prime_clipping.tsv")) this._5_ClippingHistogram.writeToTsv(new File(outputDir, "5_prime_clipping.tsv"))
this._3_ClippingHistogram.writeToTsv(new File(outputDir, "3_prime_clipping.tsv")) this._3_ClippingHistogram.writeToTsv(new File(outputDir, "3_prime_clipping.tsv"))
} }
def toSummaryMap = {
Map(
"flagstats" -> flagstat.toSummaryMap,
"mapping_quality" -> Map("histogram" -> mappingQualityHistogram.toSummaryMap, "general" -> mappingQualityHistogram.aggregateStats),
"insert_size" -> Map("histogram" -> insertSizeHistogram.toSummaryMap, "general" -> insertSizeHistogram.aggregateStats),
"clipping" -> Map("histogram" -> clippingHistogram.toSummaryMap, "general" -> clippingHistogram.aggregateStats),
"left_clipping" -> Map("histogram" -> leftClippingHistogram.toSummaryMap, "general" -> leftClippingHistogram.aggregateStats),
"right_clipping" -> Map("histogram" -> rightClippingHistogram.toSummaryMap, "general" -> rightClippingHistogram.aggregateStats),
"5_prime_clipping" -> Map("histogram" -> _5_ClippingHistogram.toSummaryMap, "general" -> _5_ClippingHistogram.aggregateStats),
"3_prime_clipping" -> Map("histogram" -> _3_ClippingHistogram.toSummaryMap, "general" -> _3_ClippingHistogram.aggregateStats)
)
}
} }
...@@ -201,6 +201,12 @@ class FlagstatCollector { ...@@ -201,6 +201,12 @@ class FlagstatCollector {
buffer.toString() buffer.toString()
} }
def toSummaryMap = {
val sortedKeys = names.keys.toArray.sorted
sortedKeys.map(x => names(x) -> totalCounts(x)).toMap ++
Map("cross_counts" -> crossCounts)
}
def +=(other: FlagstatCollector): FlagstatCollector = { def +=(other: FlagstatCollector): FlagstatCollector = {
require(this.names == other.names) require(this.names == other.names)
//require(this.functions == other.functions) //require(this.functions == other.functions)
......
package nl.lumc.sasc.biopet.tools.vcfstats
import scala.collection.mutable
/**
* class to store all sample relative stats
*
* @param genotypeStats Stores all genotype relative stats
* @param sampleToSample Stores sample to sample compare stats
*/
case class SampleStats(genotypeStats: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map(),
sampleToSample: mutable.Map[String, SampleToSampleStats] = mutable.Map()) {
/** Add an other class */
def +=(other: SampleStats): Unit = {
for ((key, value) <- other.sampleToSample) {
if (this.sampleToSample.contains(key)) this.sampleToSample(key) += value
else this.sampleToSample(key) = value
}
for ((chr, chrMap) <- other.genotypeStats; (field, fieldMap) <- chrMap) {
if (!this.genotypeStats.contains(chr)) genotypeStats += (chr -> mutable.Map[String, mutable.Map[Any, Int]]())
val thisField = this.genotypeStats(chr).get(field)
if (thisField.isDefined) Stats.mergeStatsMap(thisField.get, fieldMap)
else this.genotypeStats(chr) += field -> fieldMap
}
}
}
package nl.lumc.sasc.biopet.tools.vcfstats
/**
* Class to store sample to sample compare stats
* @param genotypeOverlap Number of genotypes match with other sample
* @param alleleOverlap Number of alleles also found in other sample
*/
case class SampleToSampleStats(var genotypeOverlap: Int = 0,
var alleleOverlap: Int = 0) {
/** Add an other class */
def +=(other: SampleToSampleStats) {
this.genotypeOverlap += other.genotypeOverlap
this.alleleOverlap += other.alleleOverlap
}
}
package nl.lumc.sasc.biopet.tools.vcfstats
import java.io.{ File, PrintWriter }
import scala.collection.mutable
import nl.lumc.sasc.biopet.utils.sortAnyAny
/**
* General stats class to store vcf stats
*
* @param generalStats Stores are general stats
* @param samplesStats Stores all sample/genotype specific stats
*/
case class Stats(generalStats: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map(),
samplesStats: mutable.Map[String, SampleStats] = mutable.Map()) {
/** Add an other class */
def +=(other: Stats): Stats = {
for ((key, value) <- other.samplesStats) {
if (this.samplesStats.contains(key)) this.samplesStats(key) += value
else this.samplesStats(key) = value
}
for ((chr, chrMap) <- other.generalStats; (field, fieldMap) <- chrMap) {
if (!this.generalStats.contains(chr)) generalStats += (chr -> mutable.Map[String, mutable.Map[Any, Int]]())
val thisField = this.generalStats(chr).get(field)
if (thisField.isDefined) Stats.mergeStatsMap(thisField.get, fieldMap)
else this.generalStats(chr) += field -> fieldMap
}
this
}
/** Function to write 1 specific general field */
def writeField(field: String, outputDir: File, prefix: String = "", chr: String = "total"): File = {
val file = (prefix, chr) match {
case ("", "total") => new File(outputDir, field + ".tsv")
case (_, "total") => new File(outputDir, prefix + "-" + field + ".tsv")
case ("", _) => new File(outputDir, chr + "-" + field + ".tsv")
case _ => new File(outputDir, prefix + "-" + chr + "-" + field + ".tsv")
}
val data = this.generalStats.getOrElse(chr, mutable.Map[String, mutable.Map[Any, Int]]()).getOrElse(field, mutable.Map[Any, Int]())
file.getParentFile.mkdirs()
val writer = new PrintWriter(file)
writer.println("value\tcount")
for (key <- data.keySet.toList.sortWith(sortAnyAny)) {
writer.println(key + "\t" + data(key))
}
writer.close()
file
}
/** Function to write 1 specific general field */
def getField(field: String, chr: String = "total"): Map[String, Array[Any]] = {
val data = this.generalStats.getOrElse(chr, mutable.Map[String, mutable.Map[Any, Int]]()).getOrElse(field, mutable.Map[Any, Int]())
val rows = for (key <- data.keySet.toArray.sortWith(sortAnyAny)) yield {
(key, data(key))
}
Map("value" -> rows.map(_._1), "count" -> rows.map(_._2))
}
/** Function to write 1 specific genotype field */
def writeGenotypeField(samples: List[String], field: String, outputDir: File,
prefix: String = "", chr: String = "total"): Unit = {
val file = (prefix, chr) match {
case ("", "total") => new File(outputDir, field + ".tsv")
case (_, "total") => new File(outputDir, prefix + "-" + field + ".tsv")
case ("", _) => new File(outputDir, chr + "-" + field + ".tsv")
case _ => new File(outputDir, prefix + "-" + chr + "-" + field + ".tsv")
}
file.getParentFile.mkdirs()
val writer = new PrintWriter(file)
writer.println(samples.mkString(field + "\t", "\t", ""))
val keySet = (for (sample <- samples) yield this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).keySet).fold(Set[Any]())(_ ++ _)
for (key <- keySet.toList.sortWith(sortAnyAny)) {
val values = for (sample <- samples) yield this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).getOrElse(key, 0)
writer.println(values.mkString(key + "\t", "\t", ""))
}
writer.close()
}
/** Function to write 1 specific genotype field */
def getGenotypeField(samples: List[String], field: String, chr: String = "total"): Map[String, Map[String, Any]] = {
val keySet = (for (sample <- samples) yield this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).keySet).fold(Set[Any]())(_ ++ _)
(for (sample <- samples) yield sample -> {
keySet.map(key =>
key.toString -> this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).get(key)
).filter(_._2.isDefined).toMap
}).toMap
}
/** This will generate stats for one contig */
def getContigStats(contig: String,
samples: List[String],
genotypeFields: List[String] = Nil,
infoFields: List[String] = Nil,
sampleDistributions: List[String] = Nil): Map[String, Any] = {
Map(
"genotype" -> genotypeFields.map(f => f -> getGenotypeField(samples, f, contig)).toMap,
"info" -> infoFields.map(f => f -> getField(f, contig)).toMap,
"sample_distributions" -> sampleDistributions.map(f => f -> getField("SampleDistribution-" + f, contig)).toMap
)
}
/** This will generate stats for total */
def getTotalStats(samples: List[String],
genotypeFields: List[String] = Nil,
infoFields: List[String] = Nil,