VcfStatsTest.scala 13.6 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
package nl.lumc.sasc.biopet.tools

Sander Bollen's avatar
Sander Bollen committed
18
19
20
import java.io.File
import java.nio.file.{Files, Paths}

21
import htsjdk.variant.variantcontext.Allele
Sander Bollen's avatar
Sander Bollen committed
22
import htsjdk.variant.vcf.VCFFileReader
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.tools.VcfStats._
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
Peter van 't Hof's avatar
Peter van 't Hof committed
27

Peter van 't Hof's avatar
Peter van 't Hof committed
28
29
30
import scala.collection.mutable

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
 * Test class for [[VcfStats]]
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
35
 * Created by pjvan_thof on 2/5/15.
 */
class VcfStatsTest extends TestNGSuite with Matchers {
Sander Bollen's avatar
Sander Bollen committed
36
37
38
39
  private def resourcePath(p: String): String = {
    Paths.get(getClass.getResource(p).toURI).toString
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
40
41

  @Test
Peter van 't Hof's avatar
Peter van 't Hof committed
42
  def testSampleToSampleStats(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
    val s1 = SampleToSampleStats()
    val s2 = SampleToSampleStats()
    s1.alleleOverlap shouldBe 0
    s1.genotypeOverlap shouldBe 0
    s2.alleleOverlap shouldBe 0
    s2.genotypeOverlap shouldBe 0

    s1 += s2
    s1.alleleOverlap shouldBe 0
    s1.genotypeOverlap shouldBe 0
    s2.alleleOverlap shouldBe 0
    s2.genotypeOverlap shouldBe 0

    s2.alleleOverlap = 2
    s2.genotypeOverlap = 3

    s1 += s2
    s1.alleleOverlap shouldBe 2
    s1.genotypeOverlap shouldBe 3
    s2.alleleOverlap shouldBe 2
    s2.genotypeOverlap shouldBe 3

    s1 += s2
    s1.alleleOverlap shouldBe 4
    s1.genotypeOverlap shouldBe 6
    s2.alleleOverlap shouldBe 2
    s2.genotypeOverlap shouldBe 3
  }

  @Test
Peter van 't Hof's avatar
Peter van 't Hof committed
73
  def testSampleStats(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
74
75
76
77
78
79
80
81
82
83
84
    val s1 = SampleStats()
    val s2 = SampleStats()

    s1.sampleToSample += "s1" -> SampleToSampleStats()
    s1.sampleToSample += "s2" -> SampleToSampleStats()
    s2.sampleToSample += "s1" -> SampleToSampleStats()
    s2.sampleToSample += "s2" -> SampleToSampleStats()

    s1.sampleToSample("s1").alleleOverlap = 1
    s2.sampleToSample("s2").alleleOverlap = 2

Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
87
88
    val bla1 = s1.genotypeStats.getOrElse("chr", mutable.Map[String, mutable.Map[Any, Int]]()) += "1" -> mutable.Map(1 -> 1)
    s1.genotypeStats += "chr" -> bla1
    val bla2 = s2.genotypeStats.getOrElse("chr", mutable.Map[String, mutable.Map[Any, Int]]()) += "2" -> mutable.Map(2 -> 2)
    s2.genotypeStats += "chr" -> bla2
Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
91
92
93

    val ss1 = SampleToSampleStats()
    val ss2 = SampleToSampleStats()

    s1 += s2
Peter van 't Hof's avatar
Peter van 't Hof committed
94
    s1.genotypeStats.getOrElse("chr", mutable.Map[String, mutable.Map[Any, Int]]()) shouldBe mutable.Map("1" -> mutable.Map(1 -> 1), "2" -> mutable.Map(2 -> 2))
Peter van 't Hof's avatar
Peter van 't Hof committed
95
96
97
98
99
    ss1.alleleOverlap = 1
    ss2.alleleOverlap = 2
    s1.sampleToSample shouldBe mutable.Map("s1" -> ss1, "s2" -> ss2)

    s1 += s2
Peter van 't Hof's avatar
Peter van 't Hof committed
100
    s1.genotypeStats.getOrElse("chr", mutable.Map[String, mutable.Map[Any, Int]]()) shouldBe mutable.Map("1" -> mutable.Map(1 -> 1), "2" -> mutable.Map(2 -> 4))
Peter van 't Hof's avatar
Peter van 't Hof committed
101
102

    s1 += s1
Peter van 't Hof's avatar
Peter van 't Hof committed
103
    s1.genotypeStats.getOrElse("chr", mutable.Map[String, mutable.Map[Any, Int]]()) shouldBe mutable.Map("1" -> mutable.Map(1 -> 2), "2" -> mutable.Map(2 -> 8))
Peter van 't Hof's avatar
Peter van 't Hof committed
104
  }
105
106

  @Test
Peter van 't Hof's avatar
Peter van 't Hof committed
107
  def testAlleleOverlap(): Unit = {
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

    val a1 = Allele.create("G")
    val a2 = Allele.create("A")

    alleleOverlap(List(a1, a1), List(a1, a1)) shouldBe 2
    alleleOverlap(List(a2, a2), List(a2, a2)) shouldBe 2
    alleleOverlap(List(a1, a2), List(a1, a2)) shouldBe 2
    alleleOverlap(List(a1, a2), List(a2, a1)) shouldBe 2
    alleleOverlap(List(a2, a1), List(a1, a2)) shouldBe 2
    alleleOverlap(List(a2, a1), List(a2, a1)) shouldBe 2

    alleleOverlap(List(a1, a2), List(a1, a1)) shouldBe 1
    alleleOverlap(List(a2, a1), List(a1, a1)) shouldBe 1
    alleleOverlap(List(a1, a1), List(a1, a2)) shouldBe 1
    alleleOverlap(List(a1, a1), List(a2, a1)) shouldBe 1

    alleleOverlap(List(a1, a1), List(a2, a2)) shouldBe 0
    alleleOverlap(List(a2, a2), List(a1, a1)) shouldBe 0
  }
Sander Bollen's avatar
Sander Bollen committed
127
128
129

  @Test
  def testMergeStatsMap = {
Sander Bollen's avatar
Sander Bollen committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
    val m1 : mutable.Map[Any, Int] = mutable.Map("a" -> 1)
    val m2 : mutable.Map[Any, Int] = mutable.Map("b" -> 2)

    mergeStatsMap(m1, m2)

    m1 should equal (mutable.Map("a" -> 1, "b" -> 2))

    val m3 : mutable.Map[Any, Int] = mutable.Map(1 -> 500)
    val m4 : mutable.Map[Any, Int] = mutable.Map(6 -> 125)

    mergeStatsMap(m3, m4)

    m3 should equal (mutable.Map(1 -> 500, 6 -> 125))

    mergeStatsMap(m1, m3)

    m1 should equal (mutable.Map("a" -> 1, "b" -> 2, 1 -> 500, 6 -> 125))
Sander Bollen's avatar
Sander Bollen committed
147
148
149
150
  }

  @Test
  def testMergeNestedStatsMap = {
Sander Bollen's avatar
Sander Bollen committed
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
    val m1 : mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map("test" ->
      mutable.Map("nested" -> mutable.Map("a" -> 1)))
    val m2: Map[String, Map[String, Map[Any, Int]]] = Map("test" ->
      Map("nested" -> Map("b" -> 2)))

    mergeNestedStatsMap(m1, m2)

    m1 should equal (mutable.Map("test" -> mutable.Map("nested" -> mutable.Map("a" -> 1, "b" -> 2))))

    val m3 : mutable.Map[String, mutable.Map[String, mutable.Map[Any, Int]]] = mutable.Map("test" ->
      mutable.Map("nestedd" -> mutable.Map(1 -> 500)))
    val m4: Map[String, Map[String, Map[Any, Int]]] = Map("test" ->
      Map("nestedd" -> Map(6 -> 125)))

    mergeNestedStatsMap(m3, m4)

    m3 should equal (mutable.Map("test" -> mutable.Map("nestedd" -> mutable.Map(1 -> 500, 6 -> 125))))

    val m5 = m3.toMap.map(x => x._1 -> x._2.toMap.map(y => y._1 -> y._2.toMap))

    mergeNestedStatsMap(m1, m5)

    m1 should equal (mutable.Map("test" -> mutable.Map("nested" -> mutable.Map("a" -> 1, "b" -> 2),
    "nestedd" -> mutable.Map(1 -> 500, 6 -> 125))))
Sander Bollen's avatar
Sander Bollen committed
175
176
177
178
  }

  @Test
  def testValueOfTsv = {
Sander Bollen's avatar
Sander Bollen committed
179
180
181
182
183
184
185
    val i = new File(resourcePath("/sample.tsv"))

    valueFromTsv(i, "Sample_ID_1", "library") should be (Some("Lib_ID_1"))
    valueFromTsv(i, "Sample_ID_2", "library") should be (Some("Lib_ID_2"))
    valueFromTsv(i, "Sample_ID_1", "bam") should be (Some("MyFirst.bam"))
    valueFromTsv(i, "Sample_ID_2", "bam") should be (Some("MySecond.bam"))
    valueFromTsv(i, "Sample_ID_3", "bam") should be (empty)
Sander Bollen's avatar
Sander Bollen committed
186
187
188
189
  }

  @Test
  def testMain = {
Sander Bollen's avatar
Sander Bollen committed
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
    val tmp = Files.createTempDirectory("vcfStats")
    val vcf = resourcePath("/chrQ.vcf.gz")
    val ref = resourcePath("/fake_chrQ.fa")

    noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o", tmp.toAbsolutePath.toString))
    noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o", tmp.toAbsolutePath.toString, "--allInfoTags"))
    noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o",
      tmp.toAbsolutePath.toString, "--allInfoTags", "--allGenotypeTags"))
    noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o",
      tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats"))
    noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o",
      tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats",
      "--generalWiggle", "Total"))
    noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o",
      tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats",
      "--genotypeWiggle", "Total"))

    val genotypes = List("Het", "HetNonRef", "Hom", "HomRef", "HomVar", "Mixed", "NoCall", "NonInformative",
      "Available", "Called", "Filtered", "Variant")

    genotypes.foreach(
      x => noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o",
        tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats",
        "--genotypeWiggle", x))
    )

    val general = List("Biallelic", "ComplexIndel", "Filtered", "FullyDecoded", "Indel", "Mixed",
      "MNP", "MonomorphicInSamples", "NotFiltered", "PointEvent", "PolymorphicInSamples",
      "SimpleDeletion", "SimpleInsertion", "SNP", "StructuralIndel", "Symbolic",
      "SymbolicOrSV", "Variant")

    general.foreach(
      x => noException should be thrownBy main(Array("-I", vcf, "-R", ref, "-o",
        tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats",
        "--generalWiggle", x))
    )
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243


    // returns null when validation fails
    def validateArgs(array: Array[String]): Option[Args] = {
      val argsParser = new OptParser
      argsParser.parse(array, Args())
    }

    validateArgs(Array("-I", vcf, "-R", ref, "-o",
      tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats",
      "--genotypeWiggle", "NonexistentThing")) shouldBe empty

    validateArgs(Array("-I", vcf, "-R", ref, "-o",
      tmp.toAbsolutePath.toString, "--binSize", "50", "--writeBinStats",
      "--generalWiggle", "NonexistentThing")) shouldBe empty

    validateArgs(Array("-R", ref, "-o",
      tmp.toAbsolutePath.toString)) shouldBe empty
Sander Bollen's avatar
Sander Bollen committed
244
245
246
247
248
  }

  @Test
  def testSortAnyAny = {
    //stub
Sander Bollen's avatar
Sander Bollen committed
249
250
251
252
253
254
255
256
257
258
259
    val one: Any = 1
    val two: Any = 2
    val text: Any = "hello"
    val text2 : Any = "goodbye"

    sortAnyAny(one, two) shouldBe true
    sortAnyAny(two, one) shouldBe false
    sortAnyAny(text, text2) shouldBe false
    sortAnyAny(text2, text) shouldBe true
    sortAnyAny(one, text) shouldBe true
    sortAnyAny(text, one) shouldBe false
Sander Bollen's avatar
Sander Bollen committed
260
261
262
263
  }

  @Test
  def testCheckGeneral = {
Sander Bollen's avatar
Sander Bollen committed
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
    val record = new VCFFileReader(new File(resourcePath("/chrQ.vcf.gz"))).iterator().next()

    val blah = checkGeneral(record, List())
    println(blah.toString())

    blah.get("chrQ") should not be empty
    blah.get("total") should not be empty

    val chrq = blah.get("chrQ").get
    chrq.get("SampleDistribution-NonInformative") shouldEqual Some(Map(0 -> 1))
    chrq.get("SampleDistribution-Called") shouldEqual Some(Map(3 -> 1))
    chrq.get("SampleDistribution-Mixed") shouldEqual Some(Map(0 -> 1))
    chrq.get("SampleDistribution-Hom") shouldEqual Some(Map(1 -> 1))
    chrq.get("SampleDistribution-HomRef") shouldEqual Some(Map(1 -> 1))
    chrq.get("SampleDistribution-Available") shouldEqual Some(Map(3 -> 1))
    chrq.get("QUAL") shouldEqual Some(Map(1541 -> 1))
    chrq.get("SampleDistribution-HetNonRef") shouldEqual Some(Map(0 -> 1))
    chrq.get("SampleDistribution-Het") shouldEqual Some(Map(2 -> 1))
    chrq.get("SampleDistribution-NoCall") shouldEqual Some(Map(0 -> 1))
    chrq.get("SampleDistribution-Filtered") shouldEqual Some(Map(0 -> 1))
    chrq.get("SampleDistribution-HomVar") shouldEqual Some(Map(0 -> 1))
    chrq.get("SampleDistribution-Variant") shouldEqual Some(Map(2 -> 1))

    chrq.get("general") should not be empty
    val general = chrq.get("general").get

    general.get("PolymorphicInSamples") shouldEqual Some(1)
    general.get("ComplexIndel") shouldEqual Some(0)
    general.get("FullyDecoded") shouldEqual Some(0)
    general.get("PointEvent") shouldEqual Some(0)
    general.get("MNP") shouldEqual Some(0)
    general.get("Indel") shouldEqual Some(1)
    general.get("Biallelic") shouldEqual Some(1)
    general.get("SimpleDeletion") shouldEqual Some(0)
    general.get("Variant") shouldEqual Some(1)
    general.get("SymbolicOrSV") shouldEqual Some(0)
    general.get("MonomorphicInSamples") shouldEqual Some(0)
    general.get("SNP") shouldEqual Some(0)
    general.get("Filtered") shouldEqual Some(0)
    general.get("StructuralIndel") shouldEqual Some(0)
    general.get("Total") shouldEqual Some(1)
    general.get("Mixed") shouldEqual Some(0)
    general.get("NotFiltered") shouldEqual Some(1)
    general.get("Symbolic") shouldEqual Some(0)
    general.get("SimpleInsertion") shouldEqual Some(1)


    val total = blah.get("total").get
    total.get("SampleDistribution-NonInformative") shouldEqual Some(Map(0 -> 1))
    total.get("SampleDistribution-Called") shouldEqual Some(Map(3 -> 1))
    total.get("SampleDistribution-Mixed") shouldEqual Some(Map(0 -> 1))
    total.get("SampleDistribution-Hom") shouldEqual Some(Map(1 -> 1))
    total.get("SampleDistribution-HomRef") shouldEqual Some(Map(1 -> 1))
    total.get("SampleDistribution-Available") shouldEqual Some(Map(3 -> 1))
    total.get("QUAL") shouldEqual Some(Map(1541 -> 1))
    total.get("SampleDistribution-HetNonRef") shouldEqual Some(Map(0 -> 1))
    total.get("SampleDistribution-Het") shouldEqual Some(Map(2 -> 1))
    total.get("SampleDistribution-NoCall") shouldEqual Some(Map(0 -> 1))
    total.get("SampleDistribution-Filtered") shouldEqual Some(Map(0 -> 1))
    total.get("SampleDistribution-HomVar") shouldEqual Some(Map(0 -> 1))
    total.get("SampleDistribution-Variant") shouldEqual Some(Map(2 -> 1))

    chrq.get("general") should not be empty
    val totGeneral = chrq.get("general").get

    totGeneral.get("PolymorphicInSamples") shouldEqual Some(1)
    totGeneral.get("ComplexIndel") shouldEqual Some(0)
    totGeneral.get("FullyDecoded") shouldEqual Some(0)
    totGeneral.get("PointEvent") shouldEqual Some(0)
    totGeneral.get("MNP") shouldEqual Some(0)
    totGeneral.get("Indel") shouldEqual Some(1)
    totGeneral.get("Biallelic") shouldEqual Some(1)
    totGeneral.get("SimpleDeletion") shouldEqual Some(0)
    totGeneral.get("Variant") shouldEqual Some(1)
    totGeneral.get("SymbolicOrSV") shouldEqual Some(0)
    totGeneral.get("MonomorphicInSamples") shouldEqual Some(0)
    totGeneral.get("SNP") shouldEqual Some(0)
    totGeneral.get("Filtered") shouldEqual Some(0)
    totGeneral.get("StructuralIndel") shouldEqual Some(0)
    totGeneral.get("Total") shouldEqual Some(1)
    totGeneral.get("Mixed") shouldEqual Some(0)
    totGeneral.get("NotFiltered") shouldEqual Some(1)
    totGeneral.get("Symbolic") shouldEqual Some(0)
    totGeneral.get("SimpleInsertion") shouldEqual Some(1)
Sander Bollen's avatar
Sander Bollen committed
348
349
350
351
352
353
  }

  @Test
  def testCheckGenotype = {
    //stub
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
354
}