Commit d36d3bd4 authored by Peter van 't Hof's avatar Peter van 't Hof

Merge remote-tracking branch 'remotes/origin/develop' into fix-BIOPET-500

parents 36586d37 c779f1f0
......@@ -208,8 +208,8 @@ object BammetricsReport extends ReportBuilder {
val pngFile = new File(outputDir, prefix + ".png")
def paths(name: String) = Map(
"mapping_quality" -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "value"),
name -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "count")
"mapping_quality" -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "values"),
name -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "counts")
)
val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
......@@ -236,8 +236,8 @@ object BammetricsReport extends ReportBuilder {
val pngFile = new File(outputDir, prefix + ".png")
def paths(name: String) = Map(
"clipping" -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "value"),
name -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "count")
"clipping" -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "values"),
name -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "counts")
)
val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
......
......@@ -40,17 +40,11 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc
private var outputFiles: List[File] = Nil
def bamstatsSummary: File = new File(outputDir, "bamstats.summary.json")
def flagstatSummaryFile(contig: Option[String] = None): File = getOutputFile("flagstats.summary.json", contig)
def mappingQualityFile(contig: Option[String] = None): File = getOutputFile("mapping_quality.tsv", contig)
def clipingFile(contig: Option[String] = None): File = getOutputFile("clipping.tsv", contig)
override def beforeGraph() {
super.beforeGraph()
deps :+= new File(bamFile.getAbsolutePath.replaceAll(".bam$", ".bai"))
outputFiles :+= bamstatsSummary
outputFiles :+= flagstatSummaryFile()
outputFiles :+= mappingQualityFile()
outputFiles :+= clipingFile()
jobOutputFile = new File(outputDir, ".bamstats.out")
if (reference == null) reference = referenceFasta()
}
......
......@@ -18,8 +18,9 @@ import java.io.File
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.core.{ Reference, ToolCommandFunction }
import nl.lumc.sasc.biopet.tools.vcfstats.VcfStats
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.tryToParseNumber
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, tryToParseNumber }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.io.Source
......@@ -30,7 +31,7 @@ import scala.io.Source
* Created by pjvan_thof on 1/10/15.
*/
class VcfStats(val root: Configurable) extends ToolCommandFunction with Summarizable with Reference {
def toolObject = nl.lumc.sasc.biopet.tools.VcfStats
def toolObject = VcfStats
mainFunction = false
......@@ -41,10 +42,7 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz
protected var index: File = null
@Output
protected var generalStats: File = null
@Output
protected var genotypeStats: File = null
protected var statsFile: File = null
override def defaultCoreMemory = 3.0
override def defaultThreads = 3
......@@ -66,8 +64,7 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz
/** Set output dir and a output file */
def setOutputDir(dir: File): Unit = {
outputDir = dir
generalStats = new File(dir, "general.tsv")
genotypeStats = new File(dir, "genotype-general.tsv")
statsFile = new File(dir, "stats.json")
jobOutputFile = new File(dir, ".vcfstats.out")
}
......@@ -83,35 +80,10 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz
optional("--intervals", intervals)
/** Returns general stats to the summary */
def summaryStats: Map[String, Any] = {
Map("info" -> (for (
line <- Source.fromFile(generalStats).getLines().toList.tail;
values = line.split("\t") if values.size >= 2 && !values(0).isEmpty
) yield values(0) -> tryToParseNumber(values(1)).getOrElse(None)
).toMap)
}
def summaryStats: Map[String, Any] = ConfigUtils.fileToConfigMap(statsFile)
/** return only general files to summary */
def summaryFiles: Map[String, File] = Map(
"general_stats" -> generalStats,
"genotype_stats" -> genotypeStats
"stats" -> statsFile
)
override def addToQscriptSummary(qscript: SummaryQScript, name: String): Unit = {
val data = Source.fromFile(genotypeStats).getLines().map(_.split("\t")).toArray
for (s <- 1 until data(0).size) {
val sample = data(0)(s)
val stats = Map("genotype" -> (for (f <- 1 until data.length) yield {
data(f)(0) -> tryToParseNumber(data(f)(s)).getOrElse(None)
}).toMap)
val sum = new Summarizable {
override def summaryFiles: Map[String, File] = Map()
override def summaryStats: Map[String, Any] = stats
}
qscript.addSummarizable(sum, name, Some(sample))
}
}
}
......@@ -47,7 +47,7 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.ValidateFastq,
nl.lumc.sasc.biopet.tools.ValidateVcf,
nl.lumc.sasc.biopet.tools.VcfFilter,
nl.lumc.sasc.biopet.tools.VcfStats,
nl.lumc.sasc.biopet.tools.vcfstats.VcfStats,
nl.lumc.sasc.biopet.tools.VcfToTsv,
nl.lumc.sasc.biopet.tools.VcfWithVcf,
nl.lumc.sasc.biopet.tools.VepNormalizer,
......
......@@ -319,6 +319,11 @@ object VcfFilter extends ToolCommand {
* @return true if filter passed
*/
def mustHaveVariant(record: VariantContext, samples: List[String]): Boolean = {
samples.foreach { s =>
if (!record.getSampleNames.toList.contains(s)) {
throw new IllegalArgumentException(s"Sample name $s does not exist in VCF file")
}
}
!samples.map(record.getGenotype).exists(a => a.isHomRef || a.isNoCall || VcfUtils.isCompoundNoCall(a))
}
......
......@@ -98,50 +98,25 @@ object BamStats extends ToolCommand {
*/
def init(outputDir: File, bamFile: File, referenceDict: SAMSequenceDictionary, binSize: Int, threadBinSize: Int): Unit = {
val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig =>
processContig(contig, bamFile, binSize, threadBinSize, outputDir)
}
val stats = waitOnFutures(processUnmappedReads(bamFile) :: contigsFutures.toList)
stats.writeStatsToFiles(outputDir)
contig.chr -> processContig(contig, bamFile, binSize, threadBinSize, outputDir)
}.toList
val clippingHistogram = tsvToMap(new File(outputDir, "clipping.tsv"))
val mappingQualityHistogram = tsvToMap(new File(outputDir, "mapping_quality.tsv"))
val stats = waitOnFutures(processUnmappedReads(bamFile) :: contigsFutures.map(_._2))
val summary = Map(
"flagstats" -> ConfigUtils.fileToConfigMap(new File(outputDir, "flagstats.summary.json")),
"flagstats_per_contig" -> referenceDict.getSequences.map {
c =>
c.getSequenceName -> ConfigUtils.fileToConfigMap(
new File(outputDir, "contigs" + File.separator + c.getSequenceName + File.separator + "flagstats.summary.json"))
}.toMap,
"mapping_quality" -> Map("general" -> aggregateStats(mappingQualityHistogram), "histogram" -> mappingQualityHistogram),
"clipping" -> Map("general" -> aggregateStats(clippingHistogram), "histogram" -> clippingHistogram)
val statsWriter = new PrintWriter(new File(outputDir, "bamstats.json"))
val totalStats = stats.toSummaryMap
val statsMap = Map(
"total" -> totalStats,
"contigs" -> contigsFutures.map(x => x._1 -> Await.result(x._2, Duration.Zero).toSummaryMap).toMap
)
statsWriter.println(ConfigUtils.mapToJson(statsMap).nospaces)
statsWriter.close()
val summaryWriter = new PrintWriter(new File(outputDir, "bamstats.summary.json"))
summaryWriter.println(ConfigUtils.mapToJson(summary).spaces2)
summaryWriter.println(ConfigUtils.mapToJson(totalStats).nospaces)
summaryWriter.close()
}
def aggregateStats(table: Map[String, Array[Long]]): Map[String, Any] = {
val values = table("value")
val counts = table("count")
require(values.size == counts.size)
if (values.nonEmpty) {
val modal = values(counts.indexOf(counts.max))
val totalCounts = counts.sum
val mean: Double = values.zip(counts).map(x => x._1 * x._2).sum.toDouble / totalCounts
val median: Long = values(values.zip(counts).zipWithIndex.sortBy(_._1._1).foldLeft((0L, 0)) {
case (a, b) =>
val total = a._1 + b._1._2
if (total >= totalCounts / 2) (total, a._2)
else (total, b._2)
}._2)
Map("min" -> values.min, "max" -> values.max, "median" -> median, "mean" -> mean, "modal" -> modal)
} else Map()
}
/**
* This will start the subjobs for each contig and collect [[Stats]] on contig level
*
......@@ -157,11 +132,7 @@ object BamStats extends ToolCommand {
.grouped((region.length.toDouble / binSize).ceil.toInt / (region.length.toDouble / threadBinSize).ceil.toInt)
.map(scatters => processThread(scatters, bamFile))
.toList
val stats = waitOnFutures(scattersFutures, Some(region.chr))
val contigDir = new File(outputDir, "contigs" + File.separator + region.chr)
contigDir.mkdirs()
stats.writeStatsToFiles(contigDir)
stats
waitOnFutures(scattersFutures, Some(region.chr))
}
/**
......
......@@ -15,6 +15,7 @@
package nl.lumc.sasc.biopet.tools.bamstats
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.utils.sortAnyAny
import scala.collection.mutable
......@@ -49,6 +50,11 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T
writer.close()
}
def toSummaryMap = {
val values = counts.keySet.toList.sortWith(sortAnyAny)
Map("values" -> values, "counts" -> values.map(counts(_)))
}
override def equals(other: Any): Boolean = {
other match {
case c: Counts[T] => this.counts == c.counts
......@@ -58,5 +64,22 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T
}
class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric[T]) extends Counts[T](_counts) {
def aggregateStats: Map[String, Any] = {
val values = this.counts.keys.toList
val counts = this.counts.values.toList
require(values.size == counts.size)
if (values.nonEmpty) {
val modal = values(counts.indexOf(counts.max))
val totalCounts = counts.sum
val mean: Double = values.zip(counts).map(x => ord.toDouble(x._1) * x._2).sum / totalCounts
val median = values(values.zip(counts).zipWithIndex.sortBy(_._1._1).foldLeft((0L, 0)) {
case (a, b) =>
val total = a._1 + b._1._2
if (total >= totalCounts / 2) (total, a._2)
else (total, b._2)
}._2)
Map("min" -> values.min, "max" -> values.max, "median" -> median, "mean" -> mean, "modal" -> modal)
} else Map()
}
}
......@@ -58,4 +58,17 @@ case class Stats(flagstat: FlagstatCollector = new FlagstatCollector(),
this._5_ClippingHistogram.writeToTsv(new File(outputDir, "5_prime_clipping.tsv"))
this._3_ClippingHistogram.writeToTsv(new File(outputDir, "3_prime_clipping.tsv"))
}
def toSummaryMap = {
Map(
"flagstats" -> flagstat.toSummaryMap,
"mapping_quality" -> Map("histogram" -> mappingQualityHistogram.toSummaryMap, "general" -> mappingQualityHistogram.aggregateStats),
"insert_size" -> Map("histogram" -> insertSizeHistogram.toSummaryMap, "general" -> insertSizeHistogram.aggregateStats),
"clipping" -> Map("histogram" -> clippingHistogram.toSummaryMap, "general" -> clippingHistogram.aggregateStats),
"left_clipping" -> Map("histogram" -> leftClippingHistogram.toSummaryMap, "general" -> leftClippingHistogram.aggregateStats),
"right_clipping" -> Map("histogram" -> rightClippingHistogram.toSummaryMap, "general" -> rightClippingHistogram.aggregateStats),
"5_prime_clipping" -> Map("histogram" -> _5_ClippingHistogram.toSummaryMap, "general" -> _5_ClippingHistogram.aggregateStats),
"3_prime_clipping" -> Map("histogram" -> _3_ClippingHistogram.toSummaryMap, "general" -> _3_ClippingHistogram.aggregateStats)
)
}
}
......@@ -201,6 +201,12 @@ class FlagstatCollector {
buffer.toString()
}
def toSummaryMap = {
val sortedKeys = names.keys.toArray.sorted
sortedKeys.map(x => names(x) -> totalCounts(x)).toMap ++
Map("cross_counts" -> crossCounts)
}
def +=(other: FlagstatCollector): FlagstatCollector = {
require(this.names == other.names)
//require(this.functions == other.functions)
......
package nl.lumc.sasc.biopet.tools.vcfstats
import scala.collection.mutable
/**
* class to store all sample relative stats
*
* @param genotypeStats Stores all genotype relative stats
* @param sampleToSample Stores sample to sample compare stats
*/
case class SampleStats(genotypeStats: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map(),
sampleToSample: mutable.Map[String, SampleToSampleStats] = mutable.Map()) {
/** Add an other class */
def +=(other: SampleStats): Unit = {
for ((key, value) <- other.sampleToSample) {
if (this.sampleToSample.contains(key)) this.sampleToSample(key) += value
else this.sampleToSample(key) = value
}
for ((chr, chrMap) <- other.genotypeStats; (field, fieldMap) <- chrMap) {
if (!this.genotypeStats.contains(chr)) genotypeStats += (chr -> mutable.Map[String, mutable.Map[Any, Int]]())
val thisField = this.genotypeStats(chr).get(field)
if (thisField.isDefined) Stats.mergeStatsMap(thisField.get, fieldMap)
else this.genotypeStats(chr) += field -> fieldMap
}
}
}
package nl.lumc.sasc.biopet.tools.vcfstats
/**
* Class to store sample to sample compare stats
* @param genotypeOverlap Number of genotypes match with other sample
* @param alleleOverlap Number of alleles also found in other sample
*/
case class SampleToSampleStats(var genotypeOverlap: Int = 0,
var alleleOverlap: Int = 0) {
/** Add an other class */
def +=(other: SampleToSampleStats) {
this.genotypeOverlap += other.genotypeOverlap
this.alleleOverlap += other.alleleOverlap
}
}
package nl.lumc.sasc.biopet.tools.vcfstats
import java.io.{ File, PrintWriter }
import scala.collection.mutable
import nl.lumc.sasc.biopet.utils.sortAnyAny
/**
* General stats class to store vcf stats
*
* @param generalStats Stores are general stats
* @param samplesStats Stores all sample/genotype specific stats
*/
case class Stats(generalStats: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map(),
samplesStats: mutable.Map[String, SampleStats] = mutable.Map()) {
/** Add an other class */
def +=(other: Stats): Stats = {
for ((key, value) <- other.samplesStats) {
if (this.samplesStats.contains(key)) this.samplesStats(key) += value
else this.samplesStats(key) = value
}
for ((chr, chrMap) <- other.generalStats; (field, fieldMap) <- chrMap) {
if (!this.generalStats.contains(chr)) generalStats += (chr -> mutable.Map[String, mutable.Map[Any, Int]]())
val thisField = this.generalStats(chr).get(field)
if (thisField.isDefined) Stats.mergeStatsMap(thisField.get, fieldMap)
else this.generalStats(chr) += field -> fieldMap
}
this
}
/** Function to write 1 specific general field */
def writeField(field: String, outputDir: File, prefix: String = "", chr: String = "total"): File = {
val file = (prefix, chr) match {
case ("", "total") => new File(outputDir, field + ".tsv")
case (_, "total") => new File(outputDir, prefix + "-" + field + ".tsv")
case ("", _) => new File(outputDir, chr + "-" + field + ".tsv")
case _ => new File(outputDir, prefix + "-" + chr + "-" + field + ".tsv")
}
val data = this.generalStats.getOrElse(chr, mutable.Map[String, mutable.Map[Any, Int]]()).getOrElse(field, mutable.Map[Any, Int]())
file.getParentFile.mkdirs()
val writer = new PrintWriter(file)
writer.println("value\tcount")
for (key <- data.keySet.toList.sortWith(sortAnyAny)) {
writer.println(key + "\t" + data(key))
}
writer.close()
file
}
/** Function to write 1 specific general field */
def getField(field: String, chr: String = "total"): Map[String, Array[Any]] = {
val data = this.generalStats.getOrElse(chr, mutable.Map[String, mutable.Map[Any, Int]]()).getOrElse(field, mutable.Map[Any, Int]())
val rows = for (key <- data.keySet.toArray.sortWith(sortAnyAny)) yield {
(key, data(key))
}
Map("value" -> rows.map(_._1), "count" -> rows.map(_._2))
}
/** Function to write 1 specific genotype field */
def writeGenotypeField(samples: List[String], field: String, outputDir: File,
prefix: String = "", chr: String = "total"): Unit = {
val file = (prefix, chr) match {
case ("", "total") => new File(outputDir, field + ".tsv")
case (_, "total") => new File(outputDir, prefix + "-" + field + ".tsv")
case ("", _) => new File(outputDir, chr + "-" + field + ".tsv")
case _ => new File(outputDir, prefix + "-" + chr + "-" + field + ".tsv")
}
file.getParentFile.mkdirs()
val writer = new PrintWriter(file)
writer.println(samples.mkString(field + "\t", "\t", ""))
val keySet = (for (sample <- samples) yield this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).keySet).fold(Set[Any]())(_ ++ _)
for (key <- keySet.toList.sortWith(sortAnyAny)) {
val values = for (sample <- samples) yield this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).getOrElse(key, 0)
writer.println(values.mkString(key + "\t", "\t", ""))
}
writer.close()
}
/** Function to write 1 specific genotype field */
def getGenotypeField(samples: List[String], field: String, chr: String = "total"): Map[String, Map[String, Any]] = {
val keySet = (for (sample <- samples) yield this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).keySet).fold(Set[Any]())(_ ++ _)
(for (sample <- samples) yield sample -> {
keySet.map(key =>
key.toString -> this.samplesStats(sample).genotypeStats.getOrElse(chr, Map[String, Map[Any, Int]]()).getOrElse(field, Map[Any, Int]()).get(key)
).filter(_._2.isDefined).toMap
}).toMap
}
/** This will generate stats for one contig */
def getContigStats(contig: String,
samples: List[String],
genotypeFields: List[String] = Nil,
infoFields: List[String] = Nil,
sampleDistributions: List[String] = Nil): Map[String, Any] = {
Map(
"genotype" -> genotypeFields.map(f => f -> getGenotypeField(samples, f, contig)).toMap,
"info" -> infoFields.map(f => f -> getField(f, contig)).toMap,
"sample_distributions" -> sampleDistributions.map(f => f -> getField("SampleDistribution-" + f, contig)).toMap
)
}
/** This will generate stats for total */
def getTotalStats(samples: List[String],
genotypeFields: List[String] = Nil,
infoFields: List[String] = Nil,
sampleDistributions: List[String] = Nil) =
getContigStats("total", samples, genotypeFields, infoFields, sampleDistributions)
/** This will generate stats for total and contigs separated */
def getAllStats(contigs: List[String],
samples: List[String],
genotypeFields: List[String] = Nil,
infoFields: List[String] = Nil,
sampleDistributions: List[String] = Nil): Map[String, Any] = {
Map(
"contigs" -> contigs.map(c => c -> getContigStats(c, samples, genotypeFields, infoFields, sampleDistributions)).toMap,
"total" -> getTotalStats(samples, genotypeFields, infoFields, sampleDistributions)
)
}
}
object Stats {
/** Merge m2 into m1 */
def mergeStatsMap(m1: mutable.Map[Any, Int], m2: mutable.Map[Any, Int]): Unit = {
for (key <- m2.keySet)
m1(key) = m1.getOrElse(key, 0) + m2(key)
}
/** Merge m2 into m1 */
def mergeNestedStatsMap(m1: mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]],
m2: Map[String, Map[String, Map[Any, Int]]]): Unit = {
for ((chr, chrMap) <- m2; (field, fieldMap) <- chrMap) {
if (m1.contains(chr)) {
if (m1(chr).contains(field)) {
for ((key, value) <- fieldMap) {
if (m1(chr)(field).contains(key)) m1(chr)(field)(key) += value
else m1(chr)(field)(key) = value
}
} else m1(chr)(field) = mutable.Map(fieldMap.toList: _*)
} else m1(chr) = mutable.Map(field -> mutable.Map(fieldMap.toList: _*))
}
}
}
\ No newline at end of file
......@@ -186,6 +186,8 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers {
mustHaveVariant(record, List("Sample_101", "Sample_102")) shouldBe true
mustHaveVariant(record, List("Sample_101", "Sample_102", "Sample_103")) shouldBe false
an[IllegalArgumentException] shouldBe thrownBy(mustHaveVariant(record, List("notExistant")))
val starReader = new VCFFileReader(star, false)
starReader.iterator().foreach(x => mustHaveVariant(x, List("Sample_101")) shouldBe false)
}
......
......@@ -17,12 +17,13 @@ package nl.lumc.sasc.biopet.tools
import java.io.File
import java.nio.file.{ Files, Paths }
import htsjdk.variant.variantcontext.Allele
import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.tools.VcfStats._
import nl.lumc.sasc.biopet.tools.vcfstats.{ SampleStats, SampleToSampleStats, Stats, VcfStats }
import nl.lumc.sasc.biopet.tools.vcfstats.VcfStats._
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import nl.lumc.sasc.biopet.utils.sortAnyAny
import scala.collection.mutable
......@@ -101,45 +102,23 @@ class VcfStatsTest extends TestNGSuite with Matchers {
s1.genotypeStats.getOrElse("chr", mutable.Map[String, mutable.Map[Any, Int]]()) shouldBe mutable.Map("1" -> mutable.Map(1 -> 2), "2" -> mutable.Map(2 -> 8))
}
@Test
def testAlleleOverlap(): Unit = {
val a1 = Allele.create("G")
val a2 = Allele.create("A")
alleleOverlap(List(a1, a1), List(a1, a1)) shouldBe 2
alleleOverlap(List(a2, a2), List(a2, a2)) shouldBe 2
alleleOverlap(List(a1, a2), List(a1, a2)) shouldBe 2
alleleOverlap(List(a1, a2), List(a2, a1)) shouldBe 2
alleleOverlap(List(a2, a1), List(a1, a2)) shouldBe 2
alleleOverlap(List(a2, a1), List(a2, a1)) shouldBe 2
alleleOverlap(List(a1, a2), List(a1, a1)) shouldBe 1
alleleOverlap(List(a2, a1), List(a1, a1)) shouldBe 1
alleleOverlap(List(a1, a1), List(a1, a2)) shouldBe 1
alleleOverlap(List(a1, a1), List(a2, a1)) shouldBe 1
alleleOverlap(List(a1, a1), List(a2, a2)) shouldBe 0
alleleOverlap(List(a2, a2), List(a1, a1)) shouldBe 0
}
@Test
def testMergeStatsMap = {
val m1: mutable.Map[Any, Int] = mutable.Map("a" -> 1)
val m2: mutable.Map[Any, Int] = mutable.Map("b" -> 2)
mergeStatsMap(m1, m2)
Stats.mergeStatsMap(m1, m2)
m1 should equal(mutable.Map("a" -> 1, "b" -> 2))
val m3: mutable.Map[Any, Int] = mutable.Map(1 -> 500)
val m4: mutable.Map[Any, Int] = mutable.Map(6 -> 125)
mergeStatsMap(m3, m4)
Stats.mergeStatsMap(m3, m4)
m3 should equal(mutable.Map(1 -> 500, 6 -> 125))
mergeStatsMap(m1, m3)
Stats.mergeStatsMap(m1, m3)
m1 should equal(mutable.Map("a" -> 1, "b" -> 2, 1 -> 500, 6 -> 125))
}
......@@ -151,7 +130,7 @@ class VcfStatsTest extends TestNGSuite with Matchers {
val m2: Map[String, Map[String, Map[Any, Int]]] = Map("test" ->
Map("nested" -> Map("b" -> 2)))
mergeNestedStatsMap(m1, m2)
Stats.mergeNestedStatsMap(m1, m2)
m1 should equal(mutable.Map("test" -> mutable.Map("nested" -> mutable.Map("a" -> 1, "b" -> 2))))
......@@ -160,13 +139,13 @@ class VcfStatsTest extends TestNGSuite with Matchers {
val m4: Map[String, Map[String, Map[Any, Int]]] = Map("test" ->
Map("nestedd" -> Map(6 -> 125)))
mergeNestedStatsMap(m3, m4)
Stats.mergeNestedStatsMap(m3, m4)
m3 should equal(mutable.Map("test" -> mutable.Map("nestedd" -> mutable.Map(1 -> 500, 6 -> 125))))
val m5 = m3.toMap.map(x => x._1 -> x._2.toMap.map(y => y._1 -> y._2.toMap))
mergeNestedStatsMap(m1, m5)
Stats.mergeNestedStatsMap(m1, m5)
m1 should equal(mutable.Map("test" -> mutable.Map("nested" -> mutable.Map("a" -> 1, "b" -> 2),
"nestedd" -> mutable.Map(1 -> 500, 6 -> 125))))
......
......@@ -32,15 +32,17 @@ class BamStatsTest extends TestNGSuite with Matchers {
outputDir.deleteOnExit()
BamStats.main(Array("-b", BamStatsTest.pairedBam01.getAbsolutePath, "-o", outputDir.getAbsolutePath))
new File(outputDir, "flagstats") should exist
new File(outputDir, "flagstats.summary.json") should exist
new File(outputDir, "mapping_quality.tsv") should exist
new File(outputDir, "insert_size.tsv") should exist
new File(outputDir, "clipping.tsv") should exist
new File(outputDir, "left_clipping.tsv") should exist
new File(outputDir, "right_clipping.tsv") should exist
new File(outputDir, "5_prime_clipping.tsv") should exist
new File(outputDir, "3_prime_clipping.tsv") should exist
new File(outputDir, "bamstats.json") should exist
new File(outputDir, "bamstats.summary.json") should exist
new File(outputDir, "flagstats") shouldNot exist
new File(outputDir, "flagstats.summary.json") shouldNot exist
new File(outputDir, "mapping_quality.tsv") shouldNot exist
new File(outputDir, "insert_size.tsv") shouldNot exist
new File(outputDir, "clipping.tsv") shouldNot exist
new File(outputDir, "left_clipping.tsv") shouldNot exist
new File(outputDir, "right_clipping.tsv") shouldNot exist
new File(outputDir, "5_prime_clipping.tsv") shouldNot exist
new File(outputDir, "3_prime_clipping.tsv") shouldNot exist
}
}
......
......@@ -17,8 +17,8 @@ package nl.lumc.sasc.biopet.utils
import java.io.File
import java.util
import htsjdk.variant.variantcontext.{ Genotype, VariantContext }
import htsjdk.variant.vcf.{ VCFFileReader, VCFHeader, VCFFilterHeaderLine }
import htsjdk.variant.variantcontext.{ Allele, Genotype, VariantContext }
import htsjdk.variant.vcf.{ VCFFileReader, VCFFilterHeaderLine, VCFHeader }
import scala.collection.JavaConversions._
......@@ -149,4 +149,18 @@ object VcfUtils {
def isCompoundNoCall(genotype: Genotype): Boolean = {
genotype.isCalled && genotype.getAlleles.exists(_.isNoCall) && genotype.getAlleles.exists(_.isReference)
}
/** Give back the number of alleles that overlap */
def alleleOverlap(g1: List[Allele], g2: List[Allele], start: Int = 0): Int = {
if (g1.isEmpty) start