Skip to content
Snippets Groups Projects
Commit 11a6c2db authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added general stats

parent 612be8f7
Branches
Tags
No related merge requests found
...@@ -114,15 +114,17 @@ object VcfStats extends ToolCommand { ...@@ -114,15 +114,17 @@ object VcfStats extends ToolCommand {
val samples = header.getSampleNamesInOrder.toList val samples = header.getSampleNamesInOrder.toList
val intervals: List[Interval] = ( val intervals: List[Interval] = (
for (seq <- header.getSequenceDictionary.getSequences; for (
seq <- header.getSequenceDictionary.getSequences;
chunks = seq.getSequenceLength / 10000000; chunks = seq.getSequenceLength / 10000000;
i <- 1 until chunks) yield { i <- 1 until chunks
) yield {
val size = seq.getSequenceLength / chunks val size = seq.getSequenceLength / chunks
val begin = size * (i-1) + 1 val begin = size * (i - 1) + 1
val end = if (i >= chunks) seq.getSequenceLength else size * i val end = if (i >= chunks) seq.getSequenceLength else size * i
new Interval(seq.getSequenceName, begin, end) new Interval(seq.getSequenceName, begin, end)
} }
).toList ).toList
val totalBases = intervals.foldRight(0L)(_.length() + _) val totalBases = intervals.foldRight(0L)(_.length() + _)
...@@ -144,7 +146,7 @@ object VcfStats extends ToolCommand { ...@@ -144,7 +146,7 @@ object VcfStats extends ToolCommand {
var variantCounter = 0L var variantCounter = 0L
var baseCounter = 0L var baseCounter = 0L
def status(count: Int, interval:Interval): Unit = { def status(count: Int, interval: Interval): Unit = {
variantCounter += count variantCounter += count
baseCounter += interval.length() baseCounter += interval.length()
val fraction = baseCounter.toFloat / totalBases * 100 val fraction = baseCounter.toFloat / totalBases * 100
...@@ -158,8 +160,9 @@ object VcfStats extends ToolCommand { ...@@ -158,8 +160,9 @@ object VcfStats extends ToolCommand {
val stats = createStats val stats = createStats
logger.info("Starting on: " + interval) logger.info("Starting on: " + interval)
for (record <- reader.query(interval.getSequence, interval.getStart, interval.getEnd) for (
if record.getStart <= interval.getEnd) { record <- reader.query(interval.getSequence, interval.getStart, interval.getEnd) if record.getStart <= interval.getEnd
) {
mergeNestedStatsMap(stats.generalStats, checkGeneral(record)) mergeNestedStatsMap(stats.generalStats, checkGeneral(record))
for (sample1 <- samples) yield { for (sample1 <- samples) yield {
val genotype = record.getGenotype(sample1) val genotype = record.getGenotype(sample1)
...@@ -201,13 +204,25 @@ object VcfStats extends ToolCommand { ...@@ -201,13 +204,25 @@ object VcfStats extends ToolCommand {
buffer += "DP" -> Map((if (genotype.hasDP) genotype.getDP else "not set") -> 1) buffer += "DP" -> Map((if (genotype.hasDP) genotype.getDP else "not set") -> 1)
buffer += "GQ" -> Map((if (genotype.hasGQ) genotype.getGQ else "not set") -> 1) buffer += "GQ" -> Map((if (genotype.hasGQ) genotype.getGQ else "not set") -> 1)
def addToBuffer(key:String, value:Any): Unit = { def addToBuffer(key: String, value: Any): Unit = {
val map = buffer.getOrElse(key, Map()) val map = buffer.getOrElse(key, Map())
buffer += key -> (map + (value -> (map.getOrElse(value, 0) + 1))) buffer += key -> (map + (value -> (map.getOrElse(value, 0) + 1)))
} }
val usedAlleles = (for (allele <- genotype.getAlleles) yield record.getAlleleIndex(allele)).toList val usedAlleles = (for (allele <- genotype.getAlleles) yield record.getAlleleIndex(allele)).toList
if (genotype.isHet) addToBuffer("general", "Het")
if (genotype.isHetNonRef) addToBuffer("general", "HetNonRef")
if (genotype.isHom) addToBuffer("general", "Hom")
if (genotype.isHomRef) addToBuffer("general", "HomRef")
if (genotype.isHomVar) addToBuffer("general", "HomVar")
if (genotype.isMixed) addToBuffer("general", "Mixed")
if (genotype.isNoCall) addToBuffer("general", "NoCall")
if (genotype.isNonInformative) addToBuffer("general", "NonInformative")
if (genotype.isAvailable) addToBuffer("general", "Available")
if (genotype.isCalled) addToBuffer("general", "Called")
if (genotype.isFiltered) addToBuffer("general", "Filtered")
if (genotype.hasAD) { if (genotype.hasAD) {
val ad = genotype.getAD val ad = genotype.getAD
for (i <- 0 until ad.size if ad(i) > 0) { for (i <- 0 until ad.size if ad(i) > 0) {
...@@ -218,21 +233,20 @@ object VcfStats extends ToolCommand { ...@@ -218,21 +233,20 @@ object VcfStats extends ToolCommand {
else addToBuffer("AD-not_used", ad(i)) else addToBuffer("AD-not_used", ad(i))
} }
} }
buffer.toMap buffer.toMap
} }
def writeGenotypeFields(stats: Stats, prefix: String, samples: List[String]) { def writeGenotypeFields(stats: Stats, prefix: String, samples: List[String]) {
val fields = List("DP", "GQ", "AD", "AD-ref", "AD-alt", "AD-used", "AD-not_used") val fields = List("DP", "GQ", "AD", "AD-ref", "AD-alt", "AD-used", "AD-not_used", "general")
for (field <- fields) { for (field <- fields) {
val file = new File(prefix + field + ".tsv") val file = new File(prefix + field + ".tsv")
file.getParentFile.mkdirs() file.getParentFile.mkdirs()
val writer = new PrintWriter(file) val writer = new PrintWriter(file)
writer.println(samples.mkString("\t", "\t", "")) writer.println(samples.mkString("\t", "\t", ""))
val keySet = (for (sample <- samples) yield val keySet = (for (sample <- samples) yield stats.samplesStats(sample).genotypeStats.getOrElse(field, Map[Any, Int]()).keySet).fold(Set[Any]())(_ ++ _)
stats.samplesStats(sample).genotypeStats.getOrElse(field, Map[Any,Int]()).keySet).fold(Set[Any]())(_ ++ _)
for (key <- keySet.toList.sortWith(sortAnyAny(_, _))) { for (key <- keySet.toList.sortWith(sortAnyAny(_, _))) {
val values = for (sample <- samples) yield val values = for (sample <- samples) yield stats.samplesStats(sample).genotypeStats.getOrElse(field, Map[Any, Int]()).getOrElse(key, 0)
stats.samplesStats(sample).genotypeStats.getOrElse(field, Map[Any,Int]()).getOrElse(key, 0)
writer.println(values.mkString(key + "\t", "\t", "")) writer.println(values.mkString(key + "\t", "\t", ""))
} }
writer.close() writer.close()
...@@ -315,9 +329,9 @@ object VcfStats extends ToolCommand { ...@@ -315,9 +329,9 @@ object VcfStats extends ToolCommand {
logger.info("Starting: " + command) logger.info("Starting: " + command)
val process = Process(command).run(ProcessLogger(x => logger.debug(x), x => logger.debug(x))) val process = Process(command).run(ProcessLogger(x => logger.debug(x), x => logger.debug(x)))
if (process.exitValue() == 0) logger.info("Done: " + command) if (process.exitValue() == 0) logger.info("Done: " + command)
else { else {
logger.warn("Failed: " + command) logger.warn("Failed: " + command)
if (!logger.isDebugEnabled) logger.warn("Use -l debug for more info") if (!logger.isDebugEnabled) logger.warn("Use -l debug for more info")
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment