Skip to content
Snippets Groups Projects
Commit ef03063d authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Move method to vcfutils

parent 960476a7
No related branches found
No related tags found
No related merge requests found
......@@ -17,10 +17,10 @@ package nl.lumc.sasc.biopet.tools.vcfstats
import java.io.{File, FileOutputStream, PrintWriter}
import htsjdk.samtools.util.Interval
import htsjdk.variant.variantcontext.{Allele, Genotype, VariantContext}
import htsjdk.variant.variantcontext.{Genotype, VariantContext}
import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import nl.lumc.sasc.biopet.utils.{FastaUtils, ToolCommand}
import nl.lumc.sasc.biopet.utils.{FastaUtils, ToolCommand, VcfUtils}
import scala.collection.JavaConversions._
import scala.collection.mutable
......@@ -30,7 +30,7 @@ import scala.util.Random
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
import scala.concurrent.{ Await, Future }
import scala.concurrent.{Await, Future}
/**
* This tool will generate statistics from a vcf file
......@@ -223,7 +223,7 @@ object VcfStats extends ToolCommand {
val genotype2 = record.getGenotype(sample2)
if (genotype.getAlleles == genotype2.getAlleles)
stats.samplesStats(sample1).sampleToSample(sample2).genotypeOverlap += 1
stats.samplesStats(sample1).sampleToSample(sample2).alleleOverlap += alleleOverlap(genotype.getAlleles.toList, genotype2.getAlleles.toList)
stats.samplesStats(sample1).sampleToSample(sample2).alleleOverlap += VcfUtils.alleleOverlap(genotype.getAlleles.toList, genotype2.getAlleles.toList)
}
}
chunkCounter += 1
......@@ -332,20 +332,6 @@ object VcfStats extends ToolCommand {
value.collect { case x => x.split("\t")(index) }
}
/** Give back the number of alleles that overlap */
def alleleOverlap(g1: List[Allele], g2: List[Allele], start: Int = 0): Int = {
if (g1.isEmpty) start
else {
val found = g2.contains(g1.head)
val g2tail = if (found) {
val index = g2.indexOf(g1.head)
g2.drop(index + 1) ++ g2.take(index)
} else g2
alleleOverlap(g1.tail, g2tail, if (found) start + 1 else start)
}
}
protected[tools] def fillGeneral(additionalTags: List[String]): Map[String, Map[String, Map[Any, Int]]] = {
val buffer = mutable.Map[String, Map[Any, Int]]()
......@@ -355,7 +341,7 @@ object VcfStats extends ToolCommand {
else buffer += key -> (map + (value -> map.getOrElse(value, 0)))
}
buffer += "QUAL" -> Map("not set" -> 1)
addToBuffer("QUAL", "not set", false)
addToBuffer("SampleDistribution-Het", "not set", found = false)
addToBuffer("SampleDistribution-HetNonRef", "not set", found = false)
......@@ -464,10 +450,10 @@ object VcfStats extends ToolCommand {
else buffer += key -> (map + (value -> map.getOrElse(value, 0)))
}
buffer += "DP" -> Map("not set" -> 1)
buffer += "GQ" -> Map("not set" -> 1)
addToBuffer("DP", "not set", false)
addToBuffer("GQ", "not set", false)
addToBuffer("general", "Total", found = true)
addToBuffer("general", "Total", false)
addToBuffer("general", "Het", false)
addToBuffer("general", "HetNonRef", false)
addToBuffer("general", "Hom", false)
......
......@@ -103,28 +103,6 @@ class VcfStatsTest extends TestNGSuite with Matchers {
s1.genotypeStats.getOrElse("chr", mutable.Map[String, mutable.Map[Any, Int]]()) shouldBe mutable.Map("1" -> mutable.Map(1 -> 2), "2" -> mutable.Map(2 -> 8))
}
@Test
def testAlleleOverlap(): Unit = {
val a1 = Allele.create("G")
val a2 = Allele.create("A")
alleleOverlap(List(a1, a1), List(a1, a1)) shouldBe 2
alleleOverlap(List(a2, a2), List(a2, a2)) shouldBe 2
alleleOverlap(List(a1, a2), List(a1, a2)) shouldBe 2
alleleOverlap(List(a1, a2), List(a2, a1)) shouldBe 2
alleleOverlap(List(a2, a1), List(a1, a2)) shouldBe 2
alleleOverlap(List(a2, a1), List(a2, a1)) shouldBe 2
alleleOverlap(List(a1, a2), List(a1, a1)) shouldBe 1
alleleOverlap(List(a2, a1), List(a1, a1)) shouldBe 1
alleleOverlap(List(a1, a1), List(a1, a2)) shouldBe 1
alleleOverlap(List(a1, a1), List(a2, a1)) shouldBe 1
alleleOverlap(List(a1, a1), List(a2, a2)) shouldBe 0
alleleOverlap(List(a2, a2), List(a1, a1)) shouldBe 0
}
@Test
def testMergeStatsMap = {
val m1: mutable.Map[Any, Int] = mutable.Map("a" -> 1)
......
......@@ -17,8 +17,8 @@ package nl.lumc.sasc.biopet.utils
import java.io.File
import java.util
import htsjdk.variant.variantcontext.{ Genotype, VariantContext }
import htsjdk.variant.vcf.{ VCFFileReader, VCFHeader, VCFFilterHeaderLine }
import htsjdk.variant.variantcontext.{Allele, Genotype, VariantContext}
import htsjdk.variant.vcf.{VCFFileReader, VCFFilterHeaderLine, VCFHeader}
import scala.collection.JavaConversions._
......@@ -149,4 +149,18 @@ object VcfUtils {
def isCompoundNoCall(genotype: Genotype): Boolean = {
genotype.isCalled && genotype.getAlleles.exists(_.isNoCall) && genotype.getAlleles.exists(_.isReference)
}
/** Give back the number of alleles that overlap */
def alleleOverlap(g1: List[Allele], g2: List[Allele], start: Int = 0): Int = {
if (g1.isEmpty) start
else {
val found = g2.contains(g1.head)
val g2tail = if (found) {
val index = g2.indexOf(g1.head)
g2.drop(index + 1) ++ g2.take(index)
} else g2
alleleOverlap(g1.tail, g2tail, if (found) start + 1 else start)
}
}
}
......@@ -27,4 +27,26 @@ class VcfUtilsTest extends TestNGSuite with Matchers {
VcfUtils.isCompoundNoCall(completeNoCall) shouldBe false
}
@Test
def testAlleleOverlap(): Unit = {
val a1 = Allele.create("G")
val a2 = Allele.create("A")
VcfUtils.alleleOverlap(List(a1, a1), List(a1, a1)) shouldBe 2
VcfUtils.alleleOverlap(List(a2, a2), List(a2, a2)) shouldBe 2
VcfUtils.alleleOverlap(List(a1, a2), List(a1, a2)) shouldBe 2
VcfUtils.alleleOverlap(List(a1, a2), List(a2, a1)) shouldBe 2
VcfUtils.alleleOverlap(List(a2, a1), List(a1, a2)) shouldBe 2
VcfUtils.alleleOverlap(List(a2, a1), List(a2, a1)) shouldBe 2
VcfUtils.alleleOverlap(List(a1, a2), List(a1, a1)) shouldBe 1
VcfUtils.alleleOverlap(List(a2, a1), List(a1, a1)) shouldBe 1
VcfUtils.alleleOverlap(List(a1, a1), List(a1, a2)) shouldBe 1
VcfUtils.alleleOverlap(List(a1, a1), List(a2, a1)) shouldBe 1
VcfUtils.alleleOverlap(List(a1, a1), List(a2, a2)) shouldBe 0
VcfUtils.alleleOverlap(List(a2, a2), List(a1, a1)) shouldBe 0
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment