WipeReadsTest.scala 22.2 KB
Newer Older
1
2
3
4
/**
 * Copyright (c) 2014 Leiden University Medical Center - Sequencing Analysis Support Core <sasc@lumc.nl>
 * @author Wibowo Arindrarto <w.arindrarto@lumc.nl>
 */
bow's avatar
bow committed
5
package nl.lumc.sasc.biopet.tools
6

bow's avatar
bow committed
7
import java.io.File
8
import java.nio.file.Paths
bow's avatar
bow committed
9
import scala.collection.JavaConverters._
10

bow's avatar
bow committed
11
import htsjdk.samtools.SAMFileHeader
12
import htsjdk.samtools.SAMFileWriter
bow's avatar
bow committed
13
14
15
16
17
18
19
20
import htsjdk.samtools.SAMLineParser
import htsjdk.samtools.SAMReadGroupRecord
import htsjdk.samtools.SAMRecord
import htsjdk.samtools.SAMSequenceRecord
import htsjdk.samtools.SamReader
import htsjdk.samtools.SamReaderFactory
import htsjdk.samtools.ValidationStringency
import htsjdk.samtools.util.Interval
21
import org.scalatest.Matchers
22
23
24
import org.mockito.Matchers._
import org.mockito.Mockito.{ inOrder => inOrd, times, verify }
import org.scalatest.mock.MockitoSugar
25
import org.scalatest.testng.TestNGSuite
bow's avatar
bow committed
26
import org.testng.annotations.Test
bow's avatar
bow committed
27

bow's avatar
bow committed
28
class WipeReadsTest extends TestNGSuite with MockitoSugar with Matchers {
29

bow's avatar
bow committed
30
  import WipeReads._
31
32
33
34

  private def resourcePath(p: String): String =
    Paths.get(getClass.getResource(p).toURI).toString

bow's avatar
bow committed
35
  private val samP: SAMLineParser = {
36
37
    val samh = new SAMFileHeader
    samh.addSequence(new SAMSequenceRecord("chrQ", 10000))
38
    samh.addSequence(new SAMSequenceRecord("chrR", 10000))
39
40
41
42
43
    samh.addReadGroup(new SAMReadGroupRecord("001"))
    samh.addReadGroup(new SAMReadGroupRecord("002"))
    new SAMLineParser(samh)
  }

bow's avatar
bow committed
44
  private def makeSams(raws: String*): Seq[SAMRecord] =
45
46
    raws.map(s => samP.parseLine(s))

bow's avatar
bow committed
47
48
49
50
51
  private def makeSamReader(f: File): SamReader = SamReaderFactory
    .make()
    .validationStringency(ValidationStringency.LENIENT)
    .open(f)

52
53
54
  val bloomSize: Long = 1000
  val bloomFp: Double = 1e-10

bow's avatar
bow committed
55
56
  val sBamFile1 = new File(resourcePath("/single01.bam"))
  val sBamRecs1 = makeSams(
57
58
59
60
61
62
63
64
65
    "r02\t0\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r01\t16\tchrQ\t190\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r01\t16\tchrQ\t290\t60\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001",
    "r04\t0\tchrQ\t450\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r03\t16\tchrQ\t690\t60\t10M\t*\t0\t0\tCCCCCTTTTT\tHHHHHHHHHH\tRG:Z:001",
    "r05\t0\tchrQ\t890\t60\t5M200N5M\t*\t0\t0\tGATACGATAC\tFEFEFEFEFE\tRG:Z:001",
    "r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001"
  )

bow's avatar
bow committed
66
67
  val sBamFile2 = new File(resourcePath("/single02.bam"))
  val sBamRecs2 = makeSams(
68
69
70
71
72
73
74
75
76
77
    "r02\t0\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r01\t16\tchrQ\t190\t30\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002",
    "r01\t16\tchrQ\t290\t30\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002",
    "r04\t0\tchrQ\t450\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r07\t16\tchrQ\t460\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r07\t16\tchrQ\t860\t30\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001",
    "r08\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:002"
  )

bow's avatar
bow committed
78
79
  val sBamFile3 = new File(resourcePath("/single03.bam"))
  val sBamFile4 = new File(resourcePath("/single04.bam"))
80

81
82
83
84
85
86
87
88
89
  val sBamFile5 = new File(resourcePath("/single05.bam"))
  val sBamRecs5 = makeSams(
    "r02\t16\tchrR\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r04\t0\tchrQ\t500\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r01\t0\tchrR\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r03\t16\tchrQ\t500\t60\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001",
    "r05\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001"
  )

bow's avatar
bow committed
90
91
  val pBamFile1 = new File(resourcePath("/paired01.bam"))
  val pBamRecs1 = makeSams(
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
    "r02\t99\tchrQ\t50\t60\t10M\t=\t90\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r02\t147\tchrQ\t90\t60\t10M\t=\t50\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001",
    "r01\t163\tchrQ\t150\t60\t10M\t=\t190\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:001",
    "r01\t83\tchrQ\t190\t60\t10M\t=\t150\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001",
    "r01\t163\tchrQ\t250\t60\t10M\t=\t290\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:001",
    "r01\t83\tchrQ\t290\t60\t10M\t=\t250\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001",
    "r04\t99\tchrQ\t450\t60\t10M\t=\t490\t50\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r04\t147\tchrQ\t490\t60\t10M\t=\t450\t-50\tGCATGCATGC\tEEFFGGHHII\tRG:Z:001",
    "r03\t163\tchrQ\t650\t60\t10M\t=\t690\t50\tTTTTTCCCCC\tHHHHHHHHHH\tRG:Z:001",
    "r03\t83\tchrQ\t690\t60\t10M\t=\t650\t-50\tCCCCCTTTTT\tHHHHHHHHHH\tRG:Z:001",
    "r05\t99\tchrQ\t890\t60\t5M200N5M\t=\t1140\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r05\t147\tchrQ\t1140\t60\t10M\t=\t890\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001",
    "r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001",
    "r06\t4\t*\t0\t0\t*\t*\t0\t0\tGCGCGCGCGC\tHIHIHIHIHI\tRG:Z:001"
  )

bow's avatar
bow committed
108
109
  val pBamFile2 = new File(resourcePath("/paired02.bam"))
  val pBamRecs2 = makeSams(
110
111
112
113
114
115
116
117
118
119
120
121
    "r02\t99\tchrQ\t50\t60\t10M\t=\t90\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
    "r02\t147\tchrQ\t90\t60\t10M\t=\t50\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001",
    "r01\t163\tchrQ\t150\t30\t10M\t=\t190\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:002",
    "r01\t83\tchrQ\t190\t30\t10M\t=\t150\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002",
    "r01\t163\tchrQ\t250\t30\t10M\t=\t290\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:002",
    "r01\t83\tchrQ\t290\t30\t10M\t=\t250\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002",
    "r04\t99\tchrQ\t450\t60\t10M\t=\t490\t50\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
    "r04\t147\tchrQ\t490\t60\t10M\t=\t450\t-50\tGCATGCATGC\tEEFFGGHHII\tRG:Z:001",
    "r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001",
    "r08\t4\t*\t0\t0\t*\t*\t0\t0\tGCGCGCGCGC\tHIHIHIHIHI\tRG:Z:002"
  )

bow's avatar
bow committed
122
  val pBamFile3 = new File(resourcePath("/paired03.bam"))
bow's avatar
bow committed
123

bow's avatar
bow committed
124
  val BedFile1 = new File(resourcePath("/rrna01.bed"))
bow's avatar
bow committed
125
  val BedFile2 = new File(resourcePath("/rrna02.bed"))
bow's avatar
bow committed
126
  val RefFlatFile1 = new File(resourcePath("/rrna01.refFlat"))
bow's avatar
bow committed
127
  val GtfFile1 = new File(resourcePath("/rrna01.gtf"))
bow's avatar
bow committed
128

bow's avatar
bow committed
129
130
131
132
133
134
135
  @Test def testMakeIntervalFromUnknown() = {
    val thrown = intercept[IllegalArgumentException] {
      makeIntervalFromFile(new File("false.bam"))
    }
    thrown.getMessage should ===("Unexpected interval file type: false.bam")
  }

bow's avatar
bow committed
136
  @Test def testMakeIntervalFromBed() = {
bow's avatar
bow committed
137
    val intervals: List[Interval] = makeIntervalFromFile(BedFile1)
bow's avatar
bow committed
138
    intervals.length shouldBe 3
bow's avatar
bow committed
139
    intervals.head.getSequence should ===("chrQ")
bow's avatar
bow committed
140
141
142
143
144
145
146
147
148
149
150
    intervals.head.getStart shouldBe 991
    intervals.head.getEnd shouldBe 1000
    intervals.last.getSequence should ===("chrQ")
    intervals.last.getStart shouldBe 291
    intervals.last.getEnd shouldBe 320
  }

  @Test def testMakeIntervalFromRefFlat() = {
    val intervals: List[Interval] = makeIntervalFromFile(RefFlatFile1)
    intervals.length shouldBe 5
    intervals.head.getSequence should ===("chrS")
151
    intervals.head.getStart shouldBe 101
bow's avatar
bow committed
152
    intervals.head.getEnd shouldBe 500
bow's avatar
bow committed
153
    intervals(2).getSequence should ===("chrQ")
154
    intervals(2).getStart shouldBe 801
bow's avatar
bow committed
155
156
    intervals(2).getEnd shouldBe 1000
    intervals.last.getSequence should ===("chrQ")
157
    intervals.last.getStart shouldBe 101
bow's avatar
bow committed
158
    intervals.last.getEnd shouldBe 200
bow's avatar
bow committed
159
160
  }

bow's avatar
bow committed
161
162
163
164
165
166
167
168
169
170
171
  @Test def testMakeIntervalFromGtf() = {
    val intervals: List[Interval] = makeIntervalFromFile(GtfFile1, "exon")
    intervals.length shouldBe 3
    intervals.head.getSequence should ===("chrQ")
    intervals.head.getStart shouldBe 669
    intervals.head.getEnd shouldBe 778
    intervals.last.getSequence should ===("chrP")
    intervals.last.getStart shouldBe 2949
    intervals.last.getEnd shouldBe 3063
  }

bow's avatar
bow committed
172
173
174
175
176
177
178
179
180
181
182
  @Test def testMakeIntervalFromBedOverlap() = {
    val intervals: List[Interval] = makeIntervalFromFile(BedFile2)
    intervals.length shouldBe 4
    intervals.head.getSequence should ===("chrQ")
    intervals.head.getStart shouldBe 451
    intervals.head.getEnd shouldBe 480
    intervals.last.getSequence should ===("chrQ")
    intervals.last.getStart shouldBe 2
    intervals.last.getEnd shouldBe 250
  }

bow's avatar
bow committed
183
  @Test def testSingleBamDefault() = {
bow's avatar
bow committed
184
185
186
187
    val intervals: List[Interval] = List(
      new Interval("chrQ", 291, 320), // overlaps r01, second hit,
      new Interval("chrQ", 451, 480), // overlaps r04
      new Interval("chrQ", 991, 1000) // overlaps nothing; lies in the spliced region of r05
bow's avatar
bow committed
188
189
190
191
    )
    // NOTE: while it's possible to have our filter produce false positives
    //       it is highly unlikely in our test cases as we are setting a very low FP rate
    //       and only filling the filter with a few items
bow's avatar
bow committed
192
    val filterNotFunc = makeFilterNotFunction(intervals, sBamFile1, bloomSize = bloomSize, bloomFp = bloomFp)
bow's avatar
bow committed
193
    // by default, set elements are SAM record read names
bow's avatar
bow committed
194
195
196
197
198
199
200
    filterNotFunc(sBamRecs1(0)) shouldBe false
    filterNotFunc(sBamRecs1(1)) shouldBe true
    filterNotFunc(sBamRecs1(2)) shouldBe true
    filterNotFunc(sBamRecs1(3)) shouldBe true
    filterNotFunc(sBamRecs1(4)) shouldBe false
    filterNotFunc(sBamRecs1(5)) shouldBe false
    filterNotFunc(sBamRecs1(6)) shouldBe false
bow's avatar
bow committed
201
202
  }

bow's avatar
bow committed
203
  @Test def testSingleBamIntervalWithoutChr() = {
bow's avatar
bow committed
204
205
206
207
    val intervals: List[Interval] = List(
      new Interval("Q", 291, 320),
      new Interval("chrQ", 451, 480),
      new Interval("P", 191, 480)
bow's avatar
bow committed
208
    )
bow's avatar
bow committed
209
210
211
212
213
214
215
216
    val filterNotFunc = makeFilterNotFunction(intervals, sBamFile1, bloomSize = bloomSize, bloomFp = bloomFp)
    filterNotFunc(sBamRecs1(0)) shouldBe false
    filterNotFunc(sBamRecs1(1)) shouldBe true
    filterNotFunc(sBamRecs1(2)) shouldBe true
    filterNotFunc(sBamRecs1(3)) shouldBe true
    filterNotFunc(sBamRecs1(4)) shouldBe false
    filterNotFunc(sBamRecs1(5)) shouldBe false
    filterNotFunc(sBamRecs1(6)) shouldBe false
bow's avatar
bow committed
217
218
  }

bow's avatar
bow committed
219
  @Test def testSingleBamDefaultPartialExonOverlap() = {
bow's avatar
bow committed
220
221
    val intervals: List[Interval] = List(
      new Interval("chrQ", 881, 1000) // overlaps first exon of r05
222
    )
bow's avatar
bow committed
223
224
225
226
227
228
229
230
    val filterNotFunc = makeFilterNotFunction(intervals, sBamFile1, bloomSize = bloomSize, bloomFp = bloomFp)
    filterNotFunc(sBamRecs1(0)) shouldBe false
    filterNotFunc(sBamRecs1(1)) shouldBe false
    filterNotFunc(sBamRecs1(2)) shouldBe false
    filterNotFunc(sBamRecs1(3)) shouldBe false
    filterNotFunc(sBamRecs1(4)) shouldBe false
    filterNotFunc(sBamRecs1(5)) shouldBe true
    filterNotFunc(sBamRecs1(6)) shouldBe false
231
232
  }

bow's avatar
bow committed
233
  @Test def testSingleBamDefaultNoExonOverlap() = {
bow's avatar
bow committed
234
235
236
    val intervals: List[Interval] = List(
      new Interval("chrP", 881, 1000),
      new Interval("chrQ", 900, 920)
237
    )
bow's avatar
bow committed
238
239
240
241
242
243
244
245
246
    val filterNotFunc = makeFilterNotFunction(intervals, sBamFile1, bloomSize = bloomSize, bloomFp = bloomFp)
    filterNotFunc(sBamRecs1(0)) shouldBe false
    filterNotFunc(sBamRecs1(1)) shouldBe false
    filterNotFunc(sBamRecs1(2)) shouldBe false
    filterNotFunc(sBamRecs1(3)) shouldBe false
    filterNotFunc(sBamRecs1(4)) shouldBe false
    filterNotFunc(sBamRecs1(5)) shouldBe false
    filterNotFunc(sBamRecs1(5)) shouldBe false
    filterNotFunc(sBamRecs1(6)) shouldBe false
247
248
  }

249
250
251
252
253
254
255
256
257
258
259
260
261
  @Test def testSingleBamDifferentChromosomes() = {
    val intervals: List[Interval] = List(
      new Interval("chrQ", 50, 55),
      new Interval("chrR", 500, 505)
    )
    val filterNotFunc = makeFilterNotFunction(intervals, sBamFile5, bloomSize = bloomSize, bloomFp = bloomFp)
    filterNotFunc(sBamRecs5(0)) shouldBe true
    filterNotFunc(sBamRecs5(1)) shouldBe false
    filterNotFunc(sBamRecs5(2)) shouldBe false
    filterNotFunc(sBamRecs5(3)) shouldBe true
    filterNotFunc(sBamRecs5(4)) shouldBe false
  }

bow's avatar
bow committed
262
  @Test def testSingleBamFilterOutMultiNotSet() = {
bow's avatar
bow committed
263
264
265
266
    val intervals: List[Interval] = List(
      new Interval("chrQ", 291, 320), // overlaps r01, second hit,
      new Interval("chrQ", 451, 480), // overlaps r04
      new Interval("chrQ", 991, 1000) // overlaps nothing; lies in the spliced region of r05
bow's avatar
bow committed
267
    )
bow's avatar
bow committed
268
    val filterNotFunc = makeFilterNotFunction(intervals, sBamFile1, bloomSize = bloomSize, bloomFp = bloomFp,
269
      filterOutMulti = false)
bow's avatar
bow committed
270
271
272
273
274
275
276
    filterNotFunc(sBamRecs1(0)) shouldBe false
    filterNotFunc(sBamRecs1(1)) shouldBe false
    filterNotFunc(sBamRecs1(2)) shouldBe true
    filterNotFunc(sBamRecs1(3)) shouldBe true
    filterNotFunc(sBamRecs1(4)) shouldBe false
    filterNotFunc(sBamRecs1(5)) shouldBe false
    filterNotFunc(sBamRecs1(6)) shouldBe false
bow's avatar
bow committed
277
278
  }

bow's avatar
bow committed
279
  @Test def testSingleBamFilterMinMapQ() = {
bow's avatar
bow committed
280
281
282
    val intervals: List[Interval] = List(
      new Interval("chrQ", 291, 320),
      new Interval("chrQ", 451, 480)
bow's avatar
bow committed
283
    )
bow's avatar
bow committed
284
    val filterNotFunc = makeFilterNotFunction(intervals, sBamFile2, bloomSize = bloomSize, bloomFp = bloomFp,
285
      minMapQ = 60)
bow's avatar
bow committed
286
    filterNotFunc(sBamRecs2(0)) shouldBe false
bow's avatar
bow committed
287
    // r01 is not in since it is below the MAPQ threshold
bow's avatar
bow committed
288
289
290
291
292
293
294
    filterNotFunc(sBamRecs2(1)) shouldBe false
    filterNotFunc(sBamRecs2(2)) shouldBe false
    filterNotFunc(sBamRecs2(3)) shouldBe true
    filterNotFunc(sBamRecs2(4)) shouldBe true
    filterNotFunc(sBamRecs2(5)) shouldBe true
    filterNotFunc(sBamRecs2(6)) shouldBe false
    filterNotFunc(sBamRecs2(7)) shouldBe false
bow's avatar
bow committed
295
296
  }

bow's avatar
bow committed
297
  @Test def testSingleBamFilterMinMapQFilterOutMultiNotSet() = {
bow's avatar
bow committed
298
299
300
    val intervals: List[Interval] = List(
      new Interval("chrQ", 291, 320),
      new Interval("chrQ", 451, 480)
bow's avatar
bow committed
301
    )
bow's avatar
bow committed
302
    val filterNotFunc = makeFilterNotFunction(intervals, sBamFile2, bloomSize = bloomSize, bloomFp = bloomFp,
303
      minMapQ = 60, filterOutMulti = false)
bow's avatar
bow committed
304
305
    filterNotFunc(sBamRecs2(0)) shouldBe false
    filterNotFunc(sBamRecs2(1)) shouldBe false
bow's avatar
bow committed
306
    // this r01 is not in since it is below the MAPQ threshold
bow's avatar
bow committed
307
308
309
    filterNotFunc(sBamRecs2(2)) shouldBe false
    filterNotFunc(sBamRecs2(3)) shouldBe true
    filterNotFunc(sBamRecs2(4)) shouldBe true
bow's avatar
bow committed
310
    // this r07 is not in since filterOutMulti is false
bow's avatar
bow committed
311
312
313
    filterNotFunc(sBamRecs2(5)) shouldBe false
    filterNotFunc(sBamRecs2(6)) shouldBe false
    filterNotFunc(sBamRecs2(7)) shouldBe false
bow's avatar
bow committed
314
315
  }

bow's avatar
bow committed
316
  @Test def testSingleBamFilterReadGroupIDs() = {
bow's avatar
bow committed
317
318
319
    val intervals: List[Interval] = List(
      new Interval("chrQ", 291, 320),
      new Interval("chrQ", 451, 480)
bow's avatar
bow committed
320
    )
bow's avatar
bow committed
321
    val filterNotFunc = makeFilterNotFunction(intervals, sBamFile2, bloomSize = bloomSize, bloomFp = bloomFp,
bow's avatar
bow committed
322
      readGroupIds = Set("002", "003"))
bow's avatar
bow committed
323
    filterNotFunc(sBamRecs2(0)) shouldBe false
bow's avatar
bow committed
324
    // only r01 is in the set since it is RG 002
bow's avatar
bow committed
325
326
327
328
329
330
331
    filterNotFunc(sBamRecs2(1)) shouldBe true
    filterNotFunc(sBamRecs2(2)) shouldBe true
    filterNotFunc(sBamRecs2(3)) shouldBe false
    filterNotFunc(sBamRecs2(4)) shouldBe false
    filterNotFunc(sBamRecs2(5)) shouldBe false
    filterNotFunc(sBamRecs2(6)) shouldBe false
    filterNotFunc(sBamRecs2(7)) shouldBe false
bow's avatar
bow committed
332
333
  }

bow's avatar
bow committed
334
  @Test def testPairBamDefault() = {
bow's avatar
bow committed
335
336
337
338
    val intervals: List[Interval] = List(
      new Interval("chrQ", 291, 320), // overlaps r01, second hit,
      new Interval("chrQ", 451, 480), // overlaps r04
      new Interval("chrQ", 991, 1000) // overlaps nothing; lies in the spliced region of r05
bow's avatar
bow committed
339
    )
bow's avatar
bow committed
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
    val filterNotFunc = makeFilterNotFunction(intervals, pBamFile1, bloomSize = bloomSize, bloomFp = bloomFp)
    filterNotFunc(pBamRecs1(0)) shouldBe false
    filterNotFunc(pBamRecs1(1)) shouldBe false
    filterNotFunc(pBamRecs1(2)) shouldBe true
    filterNotFunc(pBamRecs1(3)) shouldBe true
    filterNotFunc(pBamRecs1(4)) shouldBe true
    filterNotFunc(pBamRecs1(5)) shouldBe true
    filterNotFunc(pBamRecs1(6)) shouldBe true
    filterNotFunc(pBamRecs1(7)) shouldBe true
    filterNotFunc(pBamRecs1(8)) shouldBe false
    filterNotFunc(pBamRecs1(9)) shouldBe false
    filterNotFunc(pBamRecs1(10)) shouldBe false
    filterNotFunc(pBamRecs1(11)) shouldBe false
    filterNotFunc(pBamRecs1(12)) shouldBe false
    filterNotFunc(pBamRecs1(13)) shouldBe false
bow's avatar
bow committed
355
  }
356

bow's avatar
bow committed
357
  @Test def testPairBamPartialExonOverlap() = {
bow's avatar
bow committed
358
359
    val intervals: List[Interval] = List(
      new Interval("chrQ", 891, 1000)
360
    )
bow's avatar
bow committed
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
    val filterNotFunc = makeFilterNotFunction(intervals, pBamFile1, bloomSize = bloomSize, bloomFp = bloomFp)
    filterNotFunc(pBamRecs1(0)) shouldBe false
    filterNotFunc(pBamRecs1(1)) shouldBe false
    filterNotFunc(pBamRecs1(2)) shouldBe false
    filterNotFunc(pBamRecs1(3)) shouldBe false
    filterNotFunc(pBamRecs1(4)) shouldBe false
    filterNotFunc(pBamRecs1(5)) shouldBe false
    filterNotFunc(pBamRecs1(6)) shouldBe false
    filterNotFunc(pBamRecs1(7)) shouldBe false
    filterNotFunc(pBamRecs1(8)) shouldBe false
    filterNotFunc(pBamRecs1(9)) shouldBe false
    filterNotFunc(pBamRecs1(10)) shouldBe true
    filterNotFunc(pBamRecs1(11)) shouldBe true
    filterNotFunc(pBamRecs1(12)) shouldBe false
    filterNotFunc(pBamRecs1(13)) shouldBe false
376
377
  }

bow's avatar
bow committed
378
  @Test def testPairBamFilterOutMultiNotSet() = {
bow's avatar
bow committed
379
380
381
382
    val intervals: List[Interval] = List(
      new Interval("chrQ", 291, 320), // overlaps r01, second hit,
      new Interval("chrQ", 451, 480), // overlaps r04
      new Interval("chrQ", 991, 1000) // overlaps nothing; lies in the spliced region of r05
bow's avatar
bow committed
383
    )
bow's avatar
bow committed
384
    val filterNotFunc = makeFilterNotFunction(intervals, pBamFile1, bloomSize = bloomSize, bloomFp = bloomFp,
385
      filterOutMulti = false)
bow's avatar
bow committed
386
387
388
389
390
391
392
393
394
395
396
397
398
399
    filterNotFunc(pBamRecs1(0)) shouldBe false
    filterNotFunc(pBamRecs1(1)) shouldBe false
    filterNotFunc(pBamRecs1(2)) shouldBe false
    filterNotFunc(pBamRecs1(3)) shouldBe false
    filterNotFunc(pBamRecs1(4)) shouldBe true
    filterNotFunc(pBamRecs1(5)) shouldBe true
    filterNotFunc(pBamRecs1(6)) shouldBe true
    filterNotFunc(pBamRecs1(7)) shouldBe true
    filterNotFunc(pBamRecs1(8)) shouldBe false
    filterNotFunc(pBamRecs1(9)) shouldBe false
    filterNotFunc(pBamRecs1(10)) shouldBe false
    filterNotFunc(pBamRecs1(11)) shouldBe false
    filterNotFunc(pBamRecs1(12)) shouldBe false
    filterNotFunc(pBamRecs1(13)) shouldBe false
bow's avatar
bow committed
400
401
  }

bow's avatar
bow committed
402
  @Test def testPairBamFilterMinMapQ() = {
bow's avatar
bow committed
403
404
405
    val intervals: List[Interval] = List(
      new Interval("chrQ", 291, 320),
      new Interval("chrQ", 451, 480)
bow's avatar
bow committed
406
    )
bow's avatar
bow committed
407
    val filterNotFunc = makeFilterNotFunction(intervals, pBamFile2, bloomSize = bloomSize, bloomFp = bloomFp,
408
      minMapQ = 60)
bow's avatar
bow committed
409
    // r01 is not in since it is below the MAPQ threshold
bow's avatar
bow committed
410
411
412
413
414
415
416
417
418
419
    filterNotFunc(pBamRecs2(0)) shouldBe false
    filterNotFunc(pBamRecs2(1)) shouldBe false
    filterNotFunc(pBamRecs2(2)) shouldBe false
    filterNotFunc(pBamRecs2(3)) shouldBe false
    filterNotFunc(pBamRecs2(4)) shouldBe false
    filterNotFunc(pBamRecs2(5)) shouldBe false
    filterNotFunc(pBamRecs2(6)) shouldBe true
    filterNotFunc(pBamRecs2(7)) shouldBe true
    filterNotFunc(pBamRecs2(8)) shouldBe false
    filterNotFunc(pBamRecs2(9)) shouldBe false
bow's avatar
bow committed
420
421
  }

bow's avatar
bow committed
422
  @Test def testPairBamFilterReadGroupIDs() = {
bow's avatar
bow committed
423
424
425
    val intervals: List[Interval] = List(
      new Interval("chrQ", 291, 320),
      new Interval("chrQ", 451, 480)
bow's avatar
bow committed
426
    )
bow's avatar
bow committed
427
    val filterNotFunc = makeFilterNotFunction(intervals, pBamFile2, bloomSize = bloomSize, bloomFp = bloomFp,
bow's avatar
bow committed
428
      readGroupIds = Set("002", "003"))
bow's avatar
bow committed
429
    // only r01 is in the set since it is RG 002
bow's avatar
bow committed
430
431
432
433
434
435
436
437
438
439
    filterNotFunc(pBamRecs2(0)) shouldBe false
    filterNotFunc(pBamRecs2(1)) shouldBe false
    filterNotFunc(pBamRecs2(2)) shouldBe true
    filterNotFunc(pBamRecs2(3)) shouldBe true
    filterNotFunc(pBamRecs2(4)) shouldBe true
    filterNotFunc(pBamRecs2(5)) shouldBe true
    filterNotFunc(pBamRecs2(6)) shouldBe false
    filterNotFunc(pBamRecs2(7)) shouldBe false
    filterNotFunc(pBamRecs2(8)) shouldBe false
    filterNotFunc(pBamRecs2(9)) shouldBe false
bow's avatar
bow committed
440
  }
bow's avatar
bow committed
441
  @Test def testWriteSingleBamDefault() = {
442
    val mockFilterOutFunc = (r: SAMRecord) => Set("r03", "r04", "r05").contains(r.getReadName)
443
    val outBam = mock[SAMFileWriter]
444
445
446

    val stdout = new java.io.ByteArrayOutputStream
    Console.withOut(stdout) {
447
      writeFilteredBam(mockFilterOutFunc, makeSamReader(sBamFile1), outBam)
448
    }
bow's avatar
bow committed
449
    stdout.toString should ===(
450
451
      "count_included\tcount_excluded\n%d\t%d\n"
        .format(4, 3)
452
453
    )

454
455
456
457
458
459
    val exp = makeSamReader(sBamFile3).asScala.toList
    verify(outBam, times(4)).addAlignment(anyObject.asInstanceOf[SAMRecord])
    val obs = inOrd(outBam)
    exp.foreach(x => {
      obs.verify(outBam).addAlignment(x)
    })
460
461
  }

bow's avatar
bow committed
462
  @Test def testWriteSingleBamAndFilteredBAM() = {
463
    val mockFilterOutFunc = (r: SAMRecord) => Set("r03", "r04", "r05").contains(r.getReadName)
464
465
    val outBam = mock[SAMFileWriter]
    val filtBam = Some(mock[SAMFileWriter])
466
467
468

    val stdout = new java.io.ByteArrayOutputStream
    Console.withOut(stdout) {
469
      writeFilteredBam(mockFilterOutFunc, makeSamReader(sBamFile1), outBam, filteredOutBam = filtBam)
470
    }
bow's avatar
bow committed
471
    stdout.toString should ===(
472
473
      "count_included\tcount_excluded\n%d\t%d\n"
        .format(4, 3)
474
475
    )

bow's avatar
bow committed
476
    val exp = makeSamReader(sBamFile4).asScala
477
478
479
480
481
    verify(filtBam.get, times(3)).addAlignment(anyObject.asInstanceOf[SAMRecord])
    val obs = inOrd(filtBam.get)
    exp.foreach(x => {
      obs.verify(filtBam.get).addAlignment(x)
    })
482
483
  }

bow's avatar
bow committed
484
  @Test def testWritePairBamDefault() = {
485
    val mockFilterOutFunc = (r: SAMRecord) => Set("r03", "r04", "r05").contains(r.getReadName)
486
    val outBam = mock[SAMFileWriter]
487
488
489

    val stdout = new java.io.ByteArrayOutputStream
    Console.withOut(stdout) {
490
      writeFilteredBam(mockFilterOutFunc, makeSamReader(pBamFile1), outBam)
491
    }
bow's avatar
bow committed
492
    stdout.toString should ===(
493
494
      "count_included\tcount_excluded\n%d\t%d\n"
        .format(8, 6)
495
    )
496
497
498
499
500
501
    val exp = makeSamReader(pBamFile3).asScala.toList
    verify(outBam, times(8)).addAlignment(anyObject.asInstanceOf[SAMRecord])
    val obs = inOrd(outBam)
    exp.foreach(x => {
      obs.verify(outBam).addAlignment(x)
    })
502
  }
bow's avatar
bow committed
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525

  @Test def testArgsMinimum() = {
    val parsed = parseArgs(Array(
      "-I", sBamFile1.getPath,
      "-r", BedFile1.getPath,
      "-o", "/tmp/wr.bam"
    ))
    parsed.inputBam shouldBe sBamFile1
    parsed.targetRegions shouldBe BedFile1
    parsed.outputBam shouldBe new File("/tmp/wr.bam")
  }

  @Test def testArgsMaximum() = {
    val parsed = parseArgs(Array(
      "-I", pBamFile1.getPath,
      "-r", BedFile1.getPath,
      "-o", "/tmp/wr.bam",
      "-f", "/tmp/wrf.bam",
      "-Q", "30",
      "-G", "001",
      "-G", "002",
      "--limit_removal",
      "--no_make_index",
bow's avatar
bow committed
526
      "--feature_type", "gene",
bow's avatar
bow committed
527
528
529
530
531
532
533
534
535
536
537
538
      "--bloom_size", "10000",
      "--false_positive", "1e-8"
    ))
    parsed.inputBam shouldBe pBamFile1
    parsed.targetRegions shouldBe BedFile1
    parsed.outputBam shouldBe new File("/tmp/wr.bam")
    parsed.filteredOutBam shouldBe Some(new File("/tmp/wrf.bam"))
    parsed.minMapQ shouldBe 30
    parsed.readGroupIds should contain("001")
    parsed.readGroupIds should contain("002")
    parsed.limitToRegion shouldBe true
    parsed.noMakeIndex shouldBe true
bow's avatar
bow committed
539
    parsed.featureType should ===("gene")
bow's avatar
bow committed
540
541
542
    parsed.bloomSize shouldBe 10000
    parsed.bloomFp shouldBe 1e-8
  }
543
}