VcfStatsTest.scala 10.7 KB
Newer Older
bow's avatar
bow committed
1
/**
2 3 4 5 6 7 8 9 10 11 12 13 14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
Peter van 't Hof's avatar
Peter van 't Hof committed
15 16
package nl.lumc.sasc.biopet.tools

17
import java.io.File
18
import java.nio.file.{Files, Paths}
19

Sander Bollen's avatar
Sander Bollen committed
20
import htsjdk.variant.vcf.VCFFileReader
21
import nl.lumc.sasc.biopet.tools.vcfstats._
22
import nl.lumc.sasc.biopet.tools.vcfstats.VcfStats._
Peter van 't Hof's avatar
Peter van 't Hof committed
23 24 25
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.utils.sortAnyAny
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import org.apache.commons.io.FileUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
28

Peter van 't Hof's avatar
Peter van 't Hof committed
29 30 31
import scala.collection.mutable

/**
32 33 34 35
  * Test class for [[VcfStats]]
  *
  * Created by pjvan_thof on 2/5/15.
  */
Peter van 't Hof's avatar
Peter van 't Hof committed
36
class VcfStatsTest extends TestNGSuite with Matchers {
37 38 39 40
  private def resourcePath(p: String): String = {
    Paths.get(getClass.getResource(p).toURI).toString
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
41
  @Test
Peter van 't Hof's avatar
Peter van 't Hof committed
42
  def testSampleToSampleStats(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
    val s1 = SampleToSampleStats()
    val s2 = SampleToSampleStats()
    s1.alleleOverlap shouldBe 0
    s1.genotypeOverlap shouldBe 0
    s2.alleleOverlap shouldBe 0
    s2.genotypeOverlap shouldBe 0

    s1 += s2
    s1.alleleOverlap shouldBe 0
    s1.genotypeOverlap shouldBe 0
    s2.alleleOverlap shouldBe 0
    s2.genotypeOverlap shouldBe 0

    s2.alleleOverlap = 2
    s2.genotypeOverlap = 3

    s1 += s2
    s1.alleleOverlap shouldBe 2
    s1.genotypeOverlap shouldBe 3
    s2.alleleOverlap shouldBe 2
    s2.genotypeOverlap shouldBe 3

    s1 += s2
    s1.alleleOverlap shouldBe 4
    s1.genotypeOverlap shouldBe 6
    s2.alleleOverlap shouldBe 2
    s2.genotypeOverlap shouldBe 3
  }

  @Test
Peter van 't Hof's avatar
Peter van 't Hof committed
73
  def testSampleStats(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
74 75 76 77 78 79 80 81 82 83 84
    val s1 = SampleStats()
    val s2 = SampleStats()

    s1.sampleToSample += "s1" -> SampleToSampleStats()
    s1.sampleToSample += "s2" -> SampleToSampleStats()
    s2.sampleToSample += "s1" -> SampleToSampleStats()
    s2.sampleToSample += "s2" -> SampleToSampleStats()

    s1.sampleToSample("s1").alleleOverlap = 1
    s2.sampleToSample("s2").alleleOverlap = 2

85 86
    s1.genotypeStats += "1" -> mutable.Map(1 -> 1)
    s2.genotypeStats += "2" -> mutable.Map(2 -> 2)
Peter van 't Hof's avatar
Peter van 't Hof committed
87 88 89 90 91

    val ss1 = SampleToSampleStats()
    val ss2 = SampleToSampleStats()

    s1 += s2
92
    s1.genotypeStats shouldBe mutable.Map("1" -> mutable.Map(1 -> 1), "2" -> mutable.Map(2 -> 2))
Peter van 't Hof's avatar
Peter van 't Hof committed
93 94 95 96 97
    ss1.alleleOverlap = 1
    ss2.alleleOverlap = 2
    s1.sampleToSample shouldBe mutable.Map("s1" -> ss1, "s2" -> ss2)

    s1 += s2
98
    s1.genotypeStats shouldBe mutable.Map("1" -> mutable.Map(1 -> 1), "2" -> mutable.Map(2 -> 4))
Peter van 't Hof's avatar
Peter van 't Hof committed
99 100

    s1 += s1
101
    s1.genotypeStats shouldBe mutable.Map("1" -> mutable.Map(1 -> 2), "2" -> mutable.Map(2 -> 8))
Peter van 't Hof's avatar
Peter van 't Hof committed
102
  }
103

Sander Bollen's avatar
Sander Bollen committed
104
  @Test
105
  def testMergeStatsMap(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
106 107
    val m1: mutable.Map[Any, Int] = mutable.Map("a" -> 1)
    val m2: mutable.Map[Any, Int] = mutable.Map("b" -> 2)
Sander Bollen's avatar
Sander Bollen committed
108

Peter van 't Hof's avatar
Peter van 't Hof committed
109
    Stats.mergeStatsMap(m1, m2)
Sander Bollen's avatar
Sander Bollen committed
110

Peter van 't Hof's avatar
Peter van 't Hof committed
111
    m1 should equal(mutable.Map("a" -> 1, "b" -> 2))
Sander Bollen's avatar
Sander Bollen committed
112

Peter van 't Hof's avatar
Peter van 't Hof committed
113 114
    val m3: mutable.Map[Any, Int] = mutable.Map(1 -> 500)
    val m4: mutable.Map[Any, Int] = mutable.Map(6 -> 125)
Sander Bollen's avatar
Sander Bollen committed
115

Peter van 't Hof's avatar
Peter van 't Hof committed
116
    Stats.mergeStatsMap(m3, m4)
Sander Bollen's avatar
Sander Bollen committed
117

Peter van 't Hof's avatar
Peter van 't Hof committed
118
    m3 should equal(mutable.Map(1 -> 500, 6 -> 125))
Sander Bollen's avatar
Sander Bollen committed
119

Peter van 't Hof's avatar
Peter van 't Hof committed
120
    Stats.mergeStatsMap(m1, m3)
Sander Bollen's avatar
Sander Bollen committed
121

Peter van 't Hof's avatar
Peter van 't Hof committed
122
    m1 should equal(mutable.Map("a" -> 1, "b" -> 2, 1 -> 500, 6 -> 125))
Sander Bollen's avatar
Sander Bollen committed
123 124 125
  }

  @Test
126
  def testMergeNestedStatsMap(): Unit = {
127 128
    val m1: mutable.Map[String, mutable.Map[Any, Int]] =
      mutable.Map("nested" -> mutable.Map("a" -> 1))
129
    val m2: Map[String, Map[Any, Int]] = Map("nested" -> Map("b" -> 2))
Sander Bollen's avatar
Sander Bollen committed
130

Peter van 't Hof's avatar
Peter van 't Hof committed
131
    Stats.mergeNestedStatsMap(m1, m2)
Sander Bollen's avatar
Sander Bollen committed
132

133
    m1 should equal(mutable.Map("nested" -> mutable.Map("a" -> 1, "b" -> 2)))
Sander Bollen's avatar
Sander Bollen committed
134

135 136
    val m3: mutable.Map[String, mutable.Map[Any, Int]] =
      mutable.Map("nestedd" -> mutable.Map(1 -> 500))
137
    val m4: Map[String, Map[Any, Int]] = Map("nestedd" -> Map(6 -> 125))
Sander Bollen's avatar
Sander Bollen committed
138

Peter van 't Hof's avatar
Peter van 't Hof committed
139
    Stats.mergeNestedStatsMap(m3, m4)
Sander Bollen's avatar
Sander Bollen committed
140

141
    m3 should equal(mutable.Map("nestedd" -> mutable.Map(1 -> 500, 6 -> 125)))
Sander Bollen's avatar
Sander Bollen committed
142 143
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
144
  @Test
145
  def testNoExistOutputDir(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
146 147 148 149 150
    val tmp = Files.createTempDirectory("vcfStats")
    FileUtils.deleteDirectory(new File(tmp.toAbsolutePath.toString))
    val vcf = resourcePath("/chrQ.vcf.gz")
    val ref = resourcePath("/fake_chrQ.fa")

151 152
    an[IllegalArgumentException] should be thrownBy main(
      Array("-I", vcf, "-R", ref, "-o", tmp.toAbsolutePath.toString))
Peter van 't Hof's avatar
Peter van 't Hof committed
153 154
  }

Sander Bollen's avatar
Sander Bollen committed
155
  @Test
156
  def testMain(): Unit = {
157 158 159 160
    val tmp = Files.createTempDirectory("vcfStats")
    val vcf = resourcePath("/chrQ.vcf.gz")
    val ref = resourcePath("/fake_chrQ.fa")

161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
    noException should be thrownBy main(
      Array("-I", vcf, "-R", ref, "-o", tmp.toAbsolutePath.toString))
    noException should be thrownBy main(
      Array("-I", vcf, "-R", ref, "-o", tmp.toAbsolutePath.toString, "--allInfoTags"))
    noException should be thrownBy main(
      Array("-I",
            vcf,
            "-R",
            ref,
            "-o",
            tmp.toAbsolutePath.toString,
            "--allInfoTags",
            "--allGenotypeTags"))
    noException should be thrownBy main(
      Array("-I",
            vcf,
            "-R",
            ref,
            "-o",
            tmp.toAbsolutePath.toString,
            "--binSize",
            "50",
            "--writeBinStats"))
184 185

    // returns null when validation fails
186 187 188
    def validateArgs(array: Array[String]): Option[VcfStatsArgs] = {
      val argsParser = new VcfStatsOptParser("vcfstats")
      argsParser.parse(array, VcfStatsArgs())
189 190
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
191 192
    val stderr1 = new java.io.ByteArrayOutputStream
    Console.withErr(stderr1) {
193 194 195 196 197 198 199 200 201 202 203 204
      validateArgs(
        Array("-I",
              vcf,
              "-R",
              ref,
              "-o",
              tmp.toAbsolutePath.toString,
              "--binSize",
              "50",
              "--writeBinStats",
              "--genotypeWiggle",
              "NonexistentThing")) shouldBe empty
Peter van 't Hof's avatar
Peter van 't Hof committed
205
    }
206

Peter van 't Hof's avatar
Peter van 't Hof committed
207 208
    val stderr2 = new java.io.ByteArrayOutputStream
    Console.withErr(stderr2) {
209 210 211 212 213 214 215 216 217 218 219 220
      validateArgs(
        Array("-I",
              vcf,
              "-R",
              ref,
              "-o",
              tmp.toAbsolutePath.toString,
              "--binSize",
              "50",
              "--writeBinStats",
              "--generalWiggle",
              "NonexistentThing")) shouldBe empty
Peter van 't Hof's avatar
Peter van 't Hof committed
221
    }
222

Peter van 't Hof's avatar
Peter van 't Hof committed
223 224
    val stderr3 = new java.io.ByteArrayOutputStream
    Console.withErr(stderr3) {
225
      validateArgs(Array("-R", ref, "-o", tmp.toAbsolutePath.toString)) shouldBe empty
Peter van 't Hof's avatar
Peter van 't Hof committed
226
    }
Sander Bollen's avatar
Sander Bollen committed
227 228 229
  }

  @Test
230
  def testSortAnyAny(): Unit = {
Sander Bollen's avatar
Sander Bollen committed
231
    //stub
Sander Bollen's avatar
Sander Bollen committed
232 233 234
    val one: Any = 1
    val two: Any = 2
    val text: Any = "hello"
Peter van 't Hof's avatar
Peter van 't Hof committed
235
    val text2: Any = "goodbye"
Sander Bollen's avatar
Sander Bollen committed
236 237 238 239 240 241 242

    sortAnyAny(one, two) shouldBe true
    sortAnyAny(two, one) shouldBe false
    sortAnyAny(text, text2) shouldBe false
    sortAnyAny(text2, text) shouldBe true
    sortAnyAny(one, text) shouldBe true
    sortAnyAny(text, one) shouldBe false
Sander Bollen's avatar
Sander Bollen committed
243 244 245
  }

  @Test
246
  def testCheckGeneral(): Unit = {
Sander Bollen's avatar
Sander Bollen committed
247 248
    val record = new VCFFileReader(new File(resourcePath("/chrQ.vcf.gz"))).iterator().next()

Peter van 't Hof's avatar
Peter van 't Hof committed
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
    val generalStats = checkGeneral(record, List())

    generalStats.get("SampleDistribution-NonInformative") shouldEqual Some(Map(0 -> 1))
    generalStats.get("SampleDistribution-Called") shouldEqual Some(Map(3 -> 1))
    generalStats.get("SampleDistribution-Mixed") shouldEqual Some(Map(0 -> 1))
    generalStats.get("SampleDistribution-Hom") shouldEqual Some(Map(1 -> 1))
    generalStats.get("SampleDistribution-HomRef") shouldEqual Some(Map(1 -> 1))
    generalStats.get("SampleDistribution-Available") shouldEqual Some(Map(3 -> 1))
    generalStats.get("QUAL") shouldEqual Some(Map(1541 -> 1))
    generalStats.get("SampleDistribution-HetNonRef") shouldEqual Some(Map(0 -> 1))
    generalStats.get("SampleDistribution-Het") shouldEqual Some(Map(2 -> 1))
    generalStats.get("SampleDistribution-NoCall") shouldEqual Some(Map(0 -> 1))
    generalStats.get("SampleDistribution-Filtered") shouldEqual Some(Map(0 -> 1))
    generalStats.get("SampleDistribution-HomVar") shouldEqual Some(Map(0 -> 1))
    generalStats.get("SampleDistribution-Variant") shouldEqual Some(Map(2 -> 1))

    generalStats.get("general") should not be empty
    val general = generalStats("general")
Sander Bollen's avatar
Sander Bollen committed
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286

    general.get("PolymorphicInSamples") shouldEqual Some(1)
    general.get("ComplexIndel") shouldEqual Some(0)
    general.get("FullyDecoded") shouldEqual Some(0)
    general.get("PointEvent") shouldEqual Some(0)
    general.get("MNP") shouldEqual Some(0)
    general.get("Indel") shouldEqual Some(1)
    general.get("Biallelic") shouldEqual Some(1)
    general.get("SimpleDeletion") shouldEqual Some(0)
    general.get("Variant") shouldEqual Some(1)
    general.get("SymbolicOrSV") shouldEqual Some(0)
    general.get("MonomorphicInSamples") shouldEqual Some(0)
    general.get("SNP") shouldEqual Some(0)
    general.get("Filtered") shouldEqual Some(0)
    general.get("StructuralIndel") shouldEqual Some(0)
    general.get("Total") shouldEqual Some(1)
    general.get("Mixed") shouldEqual Some(0)
    general.get("NotFiltered") shouldEqual Some(1)
    general.get("Symbolic") shouldEqual Some(0)
    general.get("SimpleInsertion") shouldEqual Some(1)
Sander Bollen's avatar
Sander Bollen committed
287 288 289
  }

  @Test
290
  def testCheckGenotype(): Unit = {
Sander Bollen's avatar
Sander Bollen committed
291 292 293 294
    val record = new VCFFileReader(new File(resourcePath("/chrQ.vcf.gz"))).iterator().next()

    val genotype = record.getGenotype(0)

Peter van 't Hof's avatar
Peter van 't Hof committed
295
    val genotypeStats = checkGenotype(record, genotype, List())
Peter van 't Hof's avatar
Peter van 't Hof committed
296

Peter van 't Hof's avatar
Peter van 't Hof committed
297 298 299 300 301 302 303
    genotypeStats.get("GQ") shouldEqual Some(Map(99 -> 1))
    genotypeStats.get("AD") shouldEqual Some(Map(24 -> 1, 21 -> 1))
    genotypeStats.get("AD-used") shouldEqual Some(Map(24 -> 1, 21 -> 1))
    genotypeStats.get("DP") shouldEqual Some(Map(45 -> 1))
    genotypeStats.get("AD-alt") shouldEqual Some(Map(21 -> 1))
    genotypeStats.get("AD-ref") shouldEqual Some(Map(24 -> 1))
    genotypeStats.get("general") should not be empty
Peter van 't Hof's avatar
Peter van 't Hof committed
304

Peter van 't Hof's avatar
Peter van 't Hof committed
305
    val general = genotypeStats("general")
Sander Bollen's avatar
Sander Bollen committed
306 307 308 309 310 311 312 313 314 315 316 317 318
    general.get("Hom") shouldEqual Some(0)
    general.get("NoCall") shouldEqual Some(0)
    general.get("Variant") shouldEqual Some(1)
    general.get("Filtered") shouldEqual Some(0)
    general.get("NonInformative") shouldEqual Some(0)
    general.get("Called") shouldEqual Some(1)
    general.get("Total") shouldEqual Some(1)
    general.get("HomVar") shouldEqual Some(0)
    general.get("HomRef") shouldEqual Some(0)
    general.get("Mixed") shouldEqual Some(0)
    general.get("Available") shouldEqual Some(1)
    general.get("Het") shouldEqual Some(1)
    general.get("HetNonRef") shouldEqual Some(0)
Sander Bollen's avatar
Sander Bollen committed
319
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
320
}