WipeReadsUnitTest.scala 19.6 KB
Newer Older
1
2
3
4
5
6
7
8
/**
 * Copyright (c) 2014 Leiden University Medical Center - Sequencing Analysis Support Core <sasc@lumc.nl>
 * @author Wibowo Arindrarto <w.arindrarto@lumc.nl>
 */
package nl.lumc.sasc.biopet.core.apps

import java.nio.file.Paths
import java.io.{ File, IOException }
9
import scala.collection.JavaConverters._
10

11
import htsjdk.samtools._
12
13
14
import org.scalatest.Assertions
import org.testng.annotations.Test

15
16
17
/* TODO: helper function to create SAMRecord from SAM alignment string
         so we can test as if using real SAMRecords
*/
18
19
20
21
22
23
24
class WipeReadsUnitTest extends Assertions {

  import WipeReads._

  private def resourcePath(p: String): String =
    Paths.get(getClass.getResource(p).toURI).toString

25
26
27
28
29
30
31
32
33
34
35
  private lazy val samP: SAMLineParser = {
    val samh = new SAMFileHeader
    samh.addSequence(new SAMSequenceRecord("chrQ", 10000))
    samh.addReadGroup(new SAMReadGroupRecord("001"))
    samh.addReadGroup(new SAMReadGroupRecord("002"))
    new SAMLineParser(samh)
  }

  private def makeSAMs(raws: String*): Seq[SAMRecord] =
    raws.map(s => samP.parseLine(s))

36
37
38
39
40
41
  private def makeTempBAM(): File =
    File.createTempFile("WipeReads", java.util.UUID.randomUUID.toString + ".bam")

  private def makeTempBAMIndex(bam: File): File =
    new File(bam.getAbsolutePath.stripSuffix(".bam") + ".bai")

42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  val sBAMFile1 = new File(resourcePath("/single01.bam"))
  val sBAMRecs1 = makeSAMs(
    "r02\t0\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r01\t16\tchrQ\t190\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r01\t16\tchrQ\t290\t60\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001",
    "r04\t0\tchrQ\t450\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r03\t16\tchrQ\t690\t60\t10M\t*\t0\t0\tCCCCCTTTTT\tHHHHHHHHHH\tRG:Z:001",
    "r05\t0\tchrQ\t890\t60\t5M200N5M\t*\t0\t0\tGATACGATAC\tFEFEFEFEFE\tRG:Z:001",
    "r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001"
  )

  val sBAMFile2 = new File(resourcePath("/single02.bam"))
  val sBAMRecs2 = makeSAMs(
    "r02\t0\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r01\t16\tchrQ\t190\t30\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002",
    "r01\t16\tchrQ\t290\t30\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002",
    "r04\t0\tchrQ\t450\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r07\t16\tchrQ\t460\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r07\t16\tchrQ\t860\t30\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001",
    "r08\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:002"
  )

  val sBAMFile3 = new File(resourcePath("/single03.bam"))
  val sBAMFile4 = new File(resourcePath("/single04.bam"))

  val pBAMFile1 = new File(resourcePath("/paired01.bam"))
  val pBAMRecs1 = makeSAMs(
    "r02\t99\tchrQ\t50\t60\t10M\t=\t90\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r02\t147\tchrQ\t90\t60\t10M\t=\t50\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001",
    "r01\t163\tchrQ\t150\t60\t10M\t=\t190\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:001",
    "r01\t83\tchrQ\t190\t60\t10M\t=\t150\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001",
    "r01\t163\tchrQ\t250\t60\t10M\t=\t290\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:001",
    "r01\t83\tchrQ\t290\t60\t10M\t=\t250\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001",
    "r04\t99\tchrQ\t450\t60\t10M\t=\t490\t50\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r04\t147\tchrQ\t490\t60\t10M\t=\t450\t-50\tGCATGCATGC\tEEFFGGHHII\tRG:Z:001",
    "r03\t163\tchrQ\t650\t60\t10M\t=\t690\t50\tTTTTTCCCCC\tHHHHHHHHHH\tRG:Z:001",
    "r03\t83\tchrQ\t690\t60\t10M\t=\t650\t-50\tCCCCCTTTTT\tHHHHHHHHHH\tRG:Z:001",
    "r05\t99\tchrQ\t890\t60\t5M200N5M\t=\t1140\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r05\t147\tchrQ\t1140\t60\t10M\t=\t890\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001",
    "r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001",
    "r06\t4\t*\t0\t0\t*\t*\t0\t0\tGCGCGCGCGC\tHIHIHIHIHI\tRG:Z:001"
  )

  val pBAMFile2 = new File(resourcePath("/paired02.bam"))
  val pBAMRecs2 = makeSAMs(
    "r02\t99\tchrQ\t50\t60\t10M\t=\t90\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r02\t147\tchrQ\t90\t60\t10M\t=\t50\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001",
    "r01\t163\tchrQ\t150\t30\t10M\t=\t190\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:002",
    "r01\t83\tchrQ\t190\t30\t10M\t=\t150\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002",
    "r01\t163\tchrQ\t250\t30\t10M\t=\t290\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:002",
    "r01\t83\tchrQ\t290\t30\t10M\t=\t250\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002",
    "r04\t99\tchrQ\t450\t60\t10M\t=\t490\t50\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r04\t147\tchrQ\t490\t60\t10M\t=\t450\t-50\tGCATGCATGC\tEEFFGGHHII\tRG:Z:001",
    "r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001",
    "r08\t4\t*\t0\t0\t*\t*\t0\t0\tGCGCGCGCGC\tHIHIHIHIHI\tRG:Z:002"
  )

  val pBAMFile3 = new File(resourcePath("/paired03.bam"))
  val BEDFile1 = new File(resourcePath("/rrna01.bed"))
  val minArgList = List("-I", sBAMFile1.toString, "-l", BEDFile1.toString, "-o", "mock.bam")
bow's avatar
bow committed
103

104
  @Test def testMakeRawIntervalFromBED() = {
105
    val intervals: Vector[RawInterval] = makeRawIntervalFromFile(BEDFile1).toVector
bow's avatar
bow committed
106
107
    assert(intervals.length == 3)
    assert(intervals.last.chrom == "chrQ")
108
109
110
111
112
    assert(intervals.last.start == 291)
    assert(intervals.last.end == 320)
    assert(intervals.head.chrom == "chrQ")
    assert(intervals.head.start == 991)
    assert(intervals.head.end == 1000)
bow's avatar
bow committed
113
114
115
116
  }

  @Test def testSingleBAMDefault() = {
    val intervals: Iterator[RawInterval] = Iterator(
bow's avatar
bow committed
117
118
119
      RawInterval("chrQ", 291, 320), // overlaps r01, second hit,
      RawInterval("chrQ", 451, 480), // overlaps r04
      RawInterval("chrQ", 991, 1000) // overlaps nothing; lies in the spliced region of r05
bow's avatar
bow committed
120
121
122
123
    )
    // NOTE: while it's possible to have our filter produce false positives
    //       it is highly unlikely in our test cases as we are setting a very low FP rate
    //       and only filling the filter with a few items
124
    val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile1, bloomSize = 1000, bloomFp = 1e-10)
bow's avatar
bow committed
125
    // by default, set elements are SAM record read names
126
127
    assert(!isFilteredOut(sBAMRecs1(0)))
    assert(isFilteredOut(sBAMRecs1(1)))
bow's avatar
bow committed
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
    assert(isFilteredOut(sBAMRecs1(2)))
    assert(isFilteredOut(sBAMRecs1(3)))
    assert(!isFilteredOut(sBAMRecs1(4)))
    assert(!isFilteredOut(sBAMRecs1(5)))
    assert(!isFilteredOut(sBAMRecs1(6)))
  }

  @Test def testSingleBAMIntervalWithoutChr() = {
    val intervals: Iterator[RawInterval] = Iterator(
      RawInterval("Q", 291, 320),
      RawInterval("chrQ", 451, 480),
      RawInterval("P", 191, 480)
    )
    val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile1, bloomSize = 1000, bloomFp = 1e-10)
    assert(!isFilteredOut(sBAMRecs1(0)))
    assert(isFilteredOut(sBAMRecs1(1)))
144
145
146
147
148
    assert(isFilteredOut(sBAMRecs1(2)))
    assert(isFilteredOut(sBAMRecs1(3)))
    assert(!isFilteredOut(sBAMRecs1(4)))
    assert(!isFilteredOut(sBAMRecs1(5)))
    assert(!isFilteredOut(sBAMRecs1(6)))
bow's avatar
bow committed
149
150
  }

151
152
  @Test def testSingleBAMDefaultPartialExonOverlap() = {
    val intervals: Iterator[RawInterval] = Iterator(
bow's avatar
bow committed
153
      RawInterval("chrQ", 881, 1000) // overlaps first exon of r05
154
    )
155
156
157
158
159
160
161
162
    val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile1, bloomSize = 1000, bloomFp = 1e-10)
    assert(!isFilteredOut(sBAMRecs1(0)))
    assert(!isFilteredOut(sBAMRecs1(1)))
    assert(!isFilteredOut(sBAMRecs1(2)))
    assert(!isFilteredOut(sBAMRecs1(3)))
    assert(!isFilteredOut(sBAMRecs1(4)))
    assert(isFilteredOut(sBAMRecs1(5)))
    assert(!isFilteredOut(sBAMRecs1(6)))
163
164
165
166
  }

  @Test def testSingleBAMDefaultNoExonOverlap() = {
    val intervals: Iterator[RawInterval] = Iterator(
167
168
      RawInterval("chrP", 881, 1000),
      RawInterval("chrQ", 900, 920)
169
    )
170
171
172
173
174
175
176
177
178
    val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile1, bloomSize = 1000, bloomFp = 1e-10)
    assert(!isFilteredOut(sBAMRecs1(0)))
    assert(!isFilteredOut(sBAMRecs1(1)))
    assert(!isFilteredOut(sBAMRecs1(2)))
    assert(!isFilteredOut(sBAMRecs1(3)))
    assert(!isFilteredOut(sBAMRecs1(4)))
    assert(!isFilteredOut(sBAMRecs1(5)))
    assert(!isFilteredOut(sBAMRecs1(5)))
    assert(!isFilteredOut(sBAMRecs1(6)))
179
180
  }

bow's avatar
bow committed
181
182
  @Test def testSingleBAMFilterOutMultiNotSet() = {
    val intervals: Iterator[RawInterval] = Iterator(
bow's avatar
bow committed
183
184
185
      RawInterval("chrQ", 291, 320), // overlaps r01, second hit,
      RawInterval("chrQ", 451, 480), // overlaps r04
      RawInterval("chrQ", 991, 1000) // overlaps nothing; lies in the spliced region of r05
bow's avatar
bow committed
186
    )
187
    val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile1, bloomSize = 1000, bloomFp = 1e-10,
188
      filterOutMulti = false)
189
190
191
192
193
194
195
    assert(!isFilteredOut(sBAMRecs1(0)))
    assert(!isFilteredOut(sBAMRecs1(1)))
    assert(isFilteredOut(sBAMRecs1(2)))
    assert(isFilteredOut(sBAMRecs1(3)))
    assert(!isFilteredOut(sBAMRecs1(4)))
    assert(!isFilteredOut(sBAMRecs1(5)))
    assert(!isFilteredOut(sBAMRecs1(6)))
bow's avatar
bow committed
196
197
198
199
  }

  @Test def testSingleBAMFilterMinMapQ() = {
    val intervals: Iterator[RawInterval] = Iterator(
bow's avatar
bow committed
200
201
      RawInterval("chrQ", 291, 320),
      RawInterval("chrQ", 451, 480)
bow's avatar
bow committed
202
    )
203
    val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile2, bloomSize = 1000, bloomFp = 1e-10,
204
      minMapQ = 60)
205
    assert(!isFilteredOut(sBAMRecs2(0)))
bow's avatar
bow committed
206
    // r01 is not in since it is below the MAPQ threshold
207
208
209
210
211
212
213
    assert(!isFilteredOut(sBAMRecs2(1)))
    assert(!isFilteredOut(sBAMRecs2(2)))
    assert(isFilteredOut(sBAMRecs2(3)))
    assert(isFilteredOut(sBAMRecs2(4)))
    assert(isFilteredOut(sBAMRecs2(5)))
    assert(!isFilteredOut(sBAMRecs2(6)))
    assert(!isFilteredOut(sBAMRecs2(7)))
bow's avatar
bow committed
214
215
216
217
  }

  @Test def testSingleBAMFilterMinMapQFilterOutMultiNotSet() = {
    val intervals: Iterator[RawInterval] = Iterator(
bow's avatar
bow committed
218
219
      RawInterval("chrQ", 291, 320),
      RawInterval("chrQ", 451, 480)
bow's avatar
bow committed
220
    )
221
    val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile2, bloomSize = 1000, bloomFp = 1e-10,
222
      minMapQ = 60, filterOutMulti = false)
223
224
    assert(!isFilteredOut(sBAMRecs2(0)))
    assert(!isFilteredOut(sBAMRecs2(1)))
bow's avatar
bow committed
225
    // this r01 is not in since it is below the MAPQ threshold
226
227
228
    assert(!isFilteredOut(sBAMRecs2(2)))
    assert(isFilteredOut(sBAMRecs2(3)))
    assert(isFilteredOut(sBAMRecs2(4)))
bow's avatar
bow committed
229
    // this r07 is not in since filterOuMulti is false
230
231
232
    assert(!isFilteredOut(sBAMRecs2(5)))
    assert(!isFilteredOut(sBAMRecs2(6)))
    assert(!isFilteredOut(sBAMRecs2(7)))
bow's avatar
bow committed
233
234
235
236
  }

  @Test def testSingleBAMFilterReadGroupIDs() = {
    val intervals: Iterator[RawInterval] = Iterator(
bow's avatar
bow committed
237
238
      RawInterval("chrQ", 291, 320),
      RawInterval("chrQ", 451, 480)
bow's avatar
bow committed
239
    )
240
    val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile2, bloomSize = 1000, bloomFp = 1e-10,
241
      readGroupIDs = Set("002", "003"))
242
    assert(!isFilteredOut(sBAMRecs2(0)))
bow's avatar
bow committed
243
    // only r01 is in the set since it is RG 002
244
245
246
247
248
249
250
    assert(isFilteredOut(sBAMRecs2(1)))
    assert(isFilteredOut(sBAMRecs2(2)))
    assert(!isFilteredOut(sBAMRecs2(3)))
    assert(!isFilteredOut(sBAMRecs2(4)))
    assert(!isFilteredOut(sBAMRecs2(5)))
    assert(!isFilteredOut(sBAMRecs2(6)))
    assert(!isFilteredOut(sBAMRecs2(7)))
bow's avatar
bow committed
251
252
253
254
  }

  @Test def testPairBAMDefault() = {
    val intervals: Iterator[RawInterval] = Iterator(
bow's avatar
bow committed
255
256
257
      RawInterval("chrQ", 291, 320), // overlaps r01, second hit,
      RawInterval("chrQ", 451, 480), // overlaps r04
      RawInterval("chrQ", 991, 1000) // overlaps nothing; lies in the spliced region of r05
bow's avatar
bow committed
258
    )
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
    val isFilteredOut = makeFilterOutFunction(intervals, pBAMFile1, bloomSize = 1000, bloomFp = 1e-10)
    assert(!isFilteredOut(pBAMRecs1(0)))
    assert(!isFilteredOut(pBAMRecs1(1)))
    assert(isFilteredOut(pBAMRecs1(2)))
    assert(isFilteredOut(pBAMRecs1(3)))
    assert(isFilteredOut(pBAMRecs1(4)))
    assert(isFilteredOut(pBAMRecs1(5)))
    assert(isFilteredOut(pBAMRecs1(6)))
    assert(isFilteredOut(pBAMRecs1(7)))
    assert(!isFilteredOut(pBAMRecs1(8)))
    assert(!isFilteredOut(pBAMRecs1(9)))
    assert(!isFilteredOut(pBAMRecs1(10)))
    assert(!isFilteredOut(pBAMRecs1(11)))
    assert(!isFilteredOut(pBAMRecs1(12)))
    assert(!isFilteredOut(pBAMRecs1(13)))
bow's avatar
bow committed
274
  }
275

276
277
  @Test def testPairBAMPartialExonOverlap() = {
    val intervals: Iterator[RawInterval] = Iterator(
bow's avatar
bow committed
278
      RawInterval("chrQ", 891, 1000)
279
    )
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
    val isFilteredOut = makeFilterOutFunction(intervals, pBAMFile1, bloomSize = 1000, bloomFp = 1e-10)
    assert(!isFilteredOut(pBAMRecs1(0)))
    assert(!isFilteredOut(pBAMRecs1(1)))
    assert(!isFilteredOut(pBAMRecs1(2)))
    assert(!isFilteredOut(pBAMRecs1(3)))
    assert(!isFilteredOut(pBAMRecs1(4)))
    assert(!isFilteredOut(pBAMRecs1(5)))
    assert(!isFilteredOut(pBAMRecs1(6)))
    assert(!isFilteredOut(pBAMRecs1(7)))
    assert(!isFilteredOut(pBAMRecs1(8)))
    assert(!isFilteredOut(pBAMRecs1(9)))
    assert(isFilteredOut(pBAMRecs1(10)))
    assert(isFilteredOut(pBAMRecs1(11)))
    assert(!isFilteredOut(pBAMRecs1(12)))
    assert(!isFilteredOut(pBAMRecs1(13)))
295
296
  }

bow's avatar
bow committed
297
298
  @Test def testPairBAMFilterOutMultiNotSet() = {
    val intervals: Iterator[RawInterval] = Iterator(
bow's avatar
bow committed
299
300
301
      RawInterval("chrQ", 291, 320), // overlaps r01, second hit,
      RawInterval("chrQ", 451, 480), // overlaps r04
      RawInterval("chrQ", 991, 1000) // overlaps nothing; lies in the spliced region of r05
bow's avatar
bow committed
302
    )
303
    val isFilteredOut = makeFilterOutFunction(intervals, pBAMFile1, bloomSize = 1000, bloomFp = 1e-10,
304
      filterOutMulti = false)
305
306
307
308
309
310
311
312
313
314
315
316
317
318
    assert(!isFilteredOut(pBAMRecs1(0)))
    assert(!isFilteredOut(pBAMRecs1(1)))
    assert(!isFilteredOut(pBAMRecs1(2)))
    assert(!isFilteredOut(pBAMRecs1(3)))
    assert(isFilteredOut(pBAMRecs1(4)))
    assert(isFilteredOut(pBAMRecs1(5)))
    assert(isFilteredOut(pBAMRecs1(6)))
    assert(isFilteredOut(pBAMRecs1(7)))
    assert(!isFilteredOut(pBAMRecs1(8)))
    assert(!isFilteredOut(pBAMRecs1(9)))
    assert(!isFilteredOut(pBAMRecs1(10)))
    assert(!isFilteredOut(pBAMRecs1(11)))
    assert(!isFilteredOut(pBAMRecs1(12)))
    assert(!isFilteredOut(pBAMRecs1(13)))
bow's avatar
bow committed
319
320
321
322
  }

  @Test def testPairBAMFilterMinMapQ() = {
    val intervals: Iterator[RawInterval] = Iterator(
bow's avatar
bow committed
323
324
      RawInterval("chrQ", 291, 320),
      RawInterval("chrQ", 451, 480)
bow's avatar
bow committed
325
    )
326
    val isFilteredOut = makeFilterOutFunction(intervals, pBAMFile2, bloomSize = 1000, bloomFp = 1e-10,
327
      minMapQ = 60)
bow's avatar
bow committed
328
    // r01 is not in since it is below the MAPQ threshold
329
330
331
332
333
334
335
336
337
338
    assert(!isFilteredOut(pBAMRecs2(0)))
    assert(!isFilteredOut(pBAMRecs2(1)))
    assert(!isFilteredOut(pBAMRecs2(2)))
    assert(!isFilteredOut(pBAMRecs2(3)))
    assert(!isFilteredOut(pBAMRecs2(4)))
    assert(!isFilteredOut(pBAMRecs2(5)))
    assert(isFilteredOut(pBAMRecs2(6)))
    assert(isFilteredOut(pBAMRecs2(7)))
    assert(!isFilteredOut(pBAMRecs2(8)))
    assert(!isFilteredOut(pBAMRecs2(9)))
bow's avatar
bow committed
339
340
341
342
  }

  @Test def testPairBAMFilterReadGroupIDs() = {
    val intervals: Iterator[RawInterval] = Iterator(
bow's avatar
bow committed
343
344
      RawInterval("chrQ", 291, 320),
      RawInterval("chrQ", 451, 480)
bow's avatar
bow committed
345
    )
346
    val isFilteredOut = makeFilterOutFunction(intervals, pBAMFile2, bloomSize = 1000, bloomFp = 1e-10,
347
      readGroupIDs = Set("002", "003"))
bow's avatar
bow committed
348
    // only r01 is in the set since it is RG 002
349
350
351
352
353
354
355
356
357
358
    assert(!isFilteredOut(pBAMRecs2(0)))
    assert(!isFilteredOut(pBAMRecs2(1)))
    assert(isFilteredOut(pBAMRecs2(2)))
    assert(isFilteredOut(pBAMRecs2(3)))
    assert(isFilteredOut(pBAMRecs2(4)))
    assert(isFilteredOut(pBAMRecs2(5)))
    assert(!isFilteredOut(pBAMRecs2(6)))
    assert(!isFilteredOut(pBAMRecs2(7)))
    assert(!isFilteredOut(pBAMRecs2(8)))
    assert(!isFilteredOut(pBAMRecs2(9)))
bow's avatar
bow committed
359
360
  }

361
362
  @Test def testWriteSingleBAMDefault() = {
    val mockFilterOutFunc = (r: SAMRecord) => Set("r03", "r04", "r05").contains(r.getReadName)
363
364
    val outBAM = makeTempBAM()
    val outBAMIndex = makeTempBAMIndex(outBAM)
365
366
    outBAM.deleteOnExit()
    outBAMIndex.deleteOnExit()
367
368
    writeFilteredBAM(mockFilterOutFunc, sBAMFile1, outBAM)
    val exp = new SAMFileReader(sBAMFile3).asScala
369
    val obs = new SAMFileReader(outBAM).asScala
370
371
    for ((e, o) <- exp.zip(obs))
      assert(e.getSAMString === o.getSAMString)
372
373
374
375
    assert(outBAM.exists)
    assert(outBAMIndex.exists)
  }

376
377
378
379
380
381
382
383
384
385
  @Test def testWriteSingleBAMAndFilteredBAM() = {
    val mockFilterOutFunc = (r: SAMRecord) => Set("r03", "r04", "r05").contains(r.getReadName)
    val outBAM = makeTempBAM()
    val outBAMIndex = makeTempBAMIndex(outBAM)
    outBAM.deleteOnExit()
    outBAMIndex.deleteOnExit()
    val filteredOutBAM = makeTempBAM()
    val filteredOutBAMIndex = makeTempBAMIndex(filteredOutBAM)
    filteredOutBAM.deleteOnExit()
    filteredOutBAMIndex.deleteOnExit()
386
387
    writeFilteredBAM(mockFilterOutFunc, sBAMFile1, outBAM, filteredOutBAM = filteredOutBAM)
    val exp = new SAMFileReader(sBAMFile4).asScala
388
    val obs = new SAMFileReader(filteredOutBAM).asScala
389
390
    for ((e, o) <- exp.zip(obs))
      assert(e.getSAMString === o.getSAMString)
391
392
393
394
395
396
    assert(outBAM.exists)
    assert(outBAMIndex.exists)
    assert(filteredOutBAM.exists)
    assert(filteredOutBAMIndex.exists)
  }

397
398
  @Test def testWritePairBAMDefault() = {
    val mockFilterOutFunc = (r: SAMRecord) => Set("r03", "r04", "r05").contains(r.getReadName)
399
400
    val outBAM = makeTempBAM()
    val outBAMIndex = makeTempBAMIndex(outBAM)
401
402
    outBAM.deleteOnExit()
    outBAMIndex.deleteOnExit()
403
404
    writeFilteredBAM(mockFilterOutFunc, pBAMFile1, outBAM)
    val exp = new SAMFileReader(pBAMFile3).asScala
405
    val obs = new SAMFileReader(outBAM).asScala
406
407
    for ((e, o) <- exp.zip(obs))
      assert(e.getSAMString === o.getSAMString)
408
409
410
411
    assert(outBAM.exists)
    assert(outBAMIndex.exists)
  }

412
413
414
415
416
417
418
419
420
421
422
423
  @Test def testOptMinimum() = {
    val opts = parseOption(Map(), minArgList)
    assert(opts.contains("inputBAM"))
    assert(opts.contains("targetRegions"))
    assert(opts.contains("outputBAM"))
  }

  @Test def testOptMissingBAI() = {
    val pathBAM = File.createTempFile("WipeReads", java.util.UUID.randomUUID.toString)
    assert(pathBAM.exists)
    val argList = List(
      "--inputBAM", pathBAM.toPath.toString,
424
      "--targetRegions", BEDFile1.getPath,
425
426
427
428
429
430
431
432
433
434
435
      "--outputBAM", "mock.bam")
    val thrown = intercept[IOException] {
      parseOption(Map(), argList)
    }
    assert(thrown.getMessage === "Index for input BAM file " + pathBAM + " not found")
    pathBAM.deleteOnExit()
  }

  @Test def testOptMissingRegions() = {
    val pathRegion = "/i/dont/exist.bed"
    val argList = List(
436
      "--inputBAM", sBAMFile1.getPath,
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
      "--targetRegions", pathRegion,
      "--outputBAM", "mock.bam"
    )
    val thrown = intercept[IOException] {
      parseOption(Map(), argList)
    }
    assert(thrown.getMessage === "Input file " + pathRegion + " not found")
  }

  @Test def testOptUnexpected() = {
    val argList = List("--strand", "sense") ::: minArgList
    val thrown = intercept[IllegalArgumentException] {
      parseOption(Map(), argList)
    }
    assert(thrown.getMessage === "Unexpected or duplicate option flag: --strand")
  }

  @Test def testOptMinOverlapFraction() = {
    val argList = List("--minOverlapFraction", "0.8") ::: minArgList
    val opts = parseOption(Map(), argList)
    assert(opts("minOverlapFraction") == 0.8)
  }

  @Test def testOptMinMapQ() = {
    val argList = List("--minMapQ", "13") ::: minArgList
    val opts = parseOption(Map(), argList)
    assert(opts("minMapQ") == 13)
  }

  @Test def testOptMakeIndex() = {
467
    val argList = List("--noMakeIndex") ::: minArgList
468
    val opts = parseOption(Map(), argList)
469
    assert(opts("noMakeIndex") == true) // why can't we evaluate directly??
470
471
472
473
474
475
476
477
478
479
480
  }

  @Test def testOptLimitToRegion() = {
    val argList = List("--limitToRegion") ::: minArgList
    val opts = parseOption(Map(), argList)
    assert(opts("limitToRegion") == true)
  }

  @Test def testOptSingleReadGroup() = {
    val argList = List("--readGroup", "g1") ::: minArgList
    val opts = parseOption(Map(), argList)
481
    assert(opts("readGroup") == Set("g1"))
482
483
484
485
486
  }

  @Test def testOptMultipleReadGroup() = {
    val argList = List("--readGroup", "g1,g2") ::: minArgList
    val opts = parseOption(Map(), argList)
487
    assert(opts("readGroup") == Set("g1", "g2"))
488
  }
489
}