Commit e6fbe498 authored by bow's avatar bow
Browse files

Add more unit tests for WipeReads

parent 5b340f1e
......@@ -3,13 +3,15 @@
@RG ID:001 DS:paired-end reads SM:WipeReadsTestCase
r02 99 chrQ 50 60 10M = 90 50 TACGTACGTA EEFFGGHHII RG:Z:001
r02 147 chrQ 90 60 10M = 50 -50 ATGCATGCAT EEFFGGHHII RG:Z:001
r01 163 chrQ 150 60 10M = 190 50 AAAAAGGGGG GGGGGGGGGG RG:Z:001
r01 83 chrQ 190 60 10M = 150 -50 GGGGGAAAAA GGGGGGGGGG RG:Z:001
r01 163 chrQ 250 60 10M = 290 50 AAAAAGGGGG GGGGGGGGGG RG:Z:001
r01 83 chrQ 290 60 10M = 250 -50 GGGGGAAAAA GGGGGGGGGG RG:Z:001
r04 99 chrQ 450 60 10M = 490 50 CGTACGTACG EEFFGGHHII RG:Z:001
r04 147 chrQ 490 60 10M = 450 -50 GCATGCATGC EEFFGGHHII RG:Z:001
r03 163 chrQ 650 60 10M = 690 50 TTTTTCCCCC HHHHHHHHHH RG:Z:001
r03 83 chrQ 690 60 10M = 650 -50 CCCCCTTTTT HHHHHHHHHH RG:Z:001
r02 99 chrQ 850 60 5M100N5M = 1290 50 TACGTACGTA EEFFGGHHII RG:Z:001
r02 147 chrQ 1290 60 5M100N5M = 1250 -50 ATGCATGCAT EEFFGGHHII RG:Z:001
r05 99 chrQ 850 60 5M100N5M = 1290 50 TACGTACGTA EEFFGGHHII RG:Z:001
r05 147 chrQ 1290 60 5M100N5M = 1250 -50 ATGCATGCAT EEFFGGHHII RG:Z:001
r06 4 * 0 0 * * 0 0 ATATATATAT HIHIHIHIHI RG:Z:001
r06 4 * 0 0 * * 0 0 GCGCGCGCGC HIHIHIHIHI RG:Z:001
@HD VN:1.0 SO:coordinate
@SQ SN:chrQ LN:10000
@RG ID:001 DS:paired-end reads SM:WipeReadsTestCase
@RG ID:002 DS:paired-end reads SM:WipeReadsTestCase
r02 99 chrQ 50 60 10M = 90 50 TACGTACGTA EEFFGGHHII RG:Z:001
r02 147 chrQ 90 60 10M = 50 -50 ATGCATGCAT EEFFGGHHII RG:Z:001
r01 163 chrQ 150 30 10M = 190 50 AAAAAGGGGG GGGGGGGGGG RG:Z:002
r01 83 chrQ 190 30 10M = 150 -50 GGGGGAAAAA GGGGGGGGGG RG:Z:002
r01 163 chrQ 250 30 10M = 290 50 AAAAAGGGGG GGGGGGGGGG RG:Z:002
r01 83 chrQ 290 30 10M = 250 -50 GGGGGAAAAA GGGGGGGGGG RG:Z:002
r04 99 chrQ 450 60 10M = 490 50 CGTACGTACG EEFFGGHHII RG:Z:001
r04 147 chrQ 490 60 10M = 450 -50 GCATGCATGC EEFFGGHHII RG:Z:001
r06 4 * 0 0 * * 0 0 ATATATATAT HIHIHIHIHI RG:Z:001
r08 4 * 0 0 * * 0 0 GCGCGCGCGC HIHIHIHIHI RG:Z:002
......@@ -3,8 +3,10 @@
@RG ID:001 DS:single-end reads SM:WipeReadsTestCase
@RG ID:002 DS:single-end reads SM:WipeReadsTestCase
r02 0 chrQ 50 60 10M * 0 0 TACGTACGTA EEFFGGHHII RG:Z:001
r01 16 chrQ 190 30 10M * 0 0 TACGTACGTA EEFFGGHHII RG:Z:002
r01 16 chrQ 190 30 10M * 0 0 GGGGGAAAAA GGGGGGGGGG RG:Z:002
r01 16 chrQ 290 30 10M * 0 0 GGGGGAAAAA GGGGGGGGGG RG:Z:002
r04 0 chrQ 450 60 10M * 0 0 CGTACGTACG EEFFGGHHII RG:Z:001
r07 16 chrQ 460 60 10M * 0 0 CGTACGTACG EEFFGGHHII RG:Z:001
r07 16 chrQ 860 30 10M * 0 0 CGTACGTACG EEFFGGHHII RG:Z:001
r06 4 * 0 0 * * 0 0 ATATATATAT HIHIHIHIHI RG:Z:001
r08 4 * 0 0 * * 0 0 ATATATATAT HIHIHIHIHI RG:Z:002
......@@ -21,6 +21,7 @@ class WipeReadsUnitTest extends Assertions {
val sbam01 = new File(resourcePath("/single01.bam"))
val sbam02 = new File(resourcePath("/single02.bam"))
val pbam01 = new File(resourcePath("/paired01.bam"))
val pbam02 = new File(resourcePath("/paired02.bam"))
val bed01 = new File(resourcePath("/rrna01.bed"))
val minArgList = List("-I", sbam01.toString, "-l", bed01.toString, "-o", "mock.bam")
......@@ -85,12 +86,32 @@ class WipeReadsUnitTest extends Assertions {
RawInterval("chrQ", 451, 480, "+")
)
val bf = makeBloomFilter(intervals, sbam02, bloomSize = 1000, bloomFp = 1e-10, minMapQ = 60)
// r01 is not in since it is below the MAPQ threshold
assert(!bf.contains("r01").isTrue)
assert(!bf.contains("r02").isTrue)
assert(!bf.contains("r06").isTrue)
assert(!bf.contains("r08").isTrue)
// only r04 is in the set since r01 is below the MAPQ threshold
assert(bf.contains("r04").isTrue)
assert(bf.contains("r07").isTrue)
}
@Test def testSingleBAMFilterMinMapQFilterOutMultiNotSet() = {
val intervals: Iterator[RawInterval] = Iterator(
RawInterval("chrQ", 291, 320, "+"),
RawInterval("chrQ", 451, 480, "+")
)
val bf = makeBloomFilter(intervals, sbam02, bloomSize = 1000, bloomFp = 1e-10,
minMapQ = 60, filterOutMulti = false)
assert(!bf.contains("r02\t0\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001\n").isTrue)
assert(!bf.contains("r01\t16\tchrQ\t190\t30\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002\n").isTrue)
// this r01 is not in since it is below the MAPQ threshold
assert(!bf.contains("r01\t16\tchrQ\t290\t30\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002\n").isTrue)
assert(!bf.contains("r07\t16\tchrQ\t860\t30\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001\n").isTrue)
assert(!bf.contains("r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001\n").isTrue)
assert(!bf.contains("r08\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:002\n").isTrue)
assert(bf.contains("r04\t0\tchrQ\t450\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001\n").isTrue)
// this r07 is not in since filterOuMulti is false
assert(bf.contains("r07\t16\tchrQ\t460\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001\n").isTrue)
}
@Test def testSingleBAMFilterReadGroupIDs() = {
......@@ -126,6 +147,59 @@ class WipeReadsUnitTest extends Assertions {
assert(bf.contains("r04").isTrue)
}
@Test def testPairBAMFilterOutMultiNotSet() = {
val intervals: Iterator[RawInterval] = Iterator(
RawInterval("chrQ", 291, 320, "+"), // overlaps r01, second hit,
RawInterval("chrQ", 451, 480, "+"), // overlaps r04
RawInterval("chrQ", 991, 1000, "+") // overlaps nothing; lies in the spliced region of r05
)
val bf = makeBloomFilter(intervals, pbam01, bloomSize = 1000, bloomFp = 1e-10, filterOutMulti = false)
assert(!bf.contains("r02\t99\tchrQ\t50\t60\t10M\t=\t90\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001\n").isTrue)
assert(!bf.contains("r02\t147\tchrQ\t90\t60\t10M\t=\t50\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001\n").isTrue)
assert(!bf.contains("r01\t163\tchrQ\t150\t60\t10M\t=\t190\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:001\n").isTrue)
assert(!bf.contains("r01\t83\tchrQ\t190\t60\t10M\t=\t150\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001\n").isTrue)
assert(!bf.contains("r03\t163\tchrQ\t650\t60\t10M\t=\t690\t50\tTTTTTCCCCC\tHHHHHHHHHH\tRG:Z:001\n").isTrue)
assert(!bf.contains("r03\t83\tchrQ\t690\t60\t10M\t=\t650\t-50\tCCCCCTTTTT\tHHHHHHHHHH\tRG:Z:001\n").isTrue)
assert(!bf.contains("r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001\n").isTrue)
assert(!bf.contains("r06\t4\t*\t0\t0\t*\t*\t0\t0\tGCGCGCGCGC\tHIHIHIHIHI\tRG:Z:001\n").isTrue)
assert(bf.contains("r01\t163\tchrQ\t250\t60\t10M\t=\t290\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:001\n").isTrue)
assert(bf.contains("r01\t83\tchrQ\t290\t60\t10M\t=\t250\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001\n").isTrue)
assert(bf.contains("r04\t99\tchrQ\t450\t60\t10M\t=\t490\t50\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001\n").isTrue)
assert(bf.contains("r04\t147\tchrQ\t490\t60\t10M\t=\t450\t-50\tGCATGCATGC\tEEFFGGHHII\tRG:Z:001\n").isTrue)
/* TODO: exclude r05 from set
assert(!bf.contains("r05\t99\tchrQ\t850\t60\t5M100N5M\t=\t1290\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001\n").isTrue)
assert(!bf.contains("r05\t147\tchrQ\t1290\t60\t5M100N5M\t=\t1250\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001\n").isTrue)
*/
}
@Test def testPairBAMFilterMinMapQ() = {
val intervals: Iterator[RawInterval] = Iterator(
RawInterval("chrQ", 291, 320, "+"),
RawInterval("chrQ", 451, 480, "+")
)
val bf = makeBloomFilter(intervals, pbam02, bloomSize = 1000, bloomFp = 1e-10, minMapQ = 60)
// r01 is not in since it is below the MAPQ threshold
assert(!bf.contains("r01").isTrue)
assert(!bf.contains("r02").isTrue)
assert(!bf.contains("r06").isTrue)
assert(!bf.contains("r08").isTrue)
assert(bf.contains("r04").isTrue)
}
@Test def testPairBAMFilterReadGroupIDs() = {
val intervals: Iterator[RawInterval] = Iterator(
RawInterval("chrQ", 291, 320, "+"),
RawInterval("chrQ", 451, 480, "+")
)
val bf = makeBloomFilter(intervals, pbam02, bloomSize = 1000, bloomFp = 1e-10, readGroupIDs = Set("002", "003"))
assert(!bf.contains("r02").isTrue)
assert(!bf.contains("r04").isTrue)
assert(!bf.contains("r06").isTrue)
assert(!bf.contains("r08").isTrue)
// only r01 is in the set since it is RG 002
assert(bf.contains("r01").isTrue)
}
@Test def testOptMinimum() = {
val opts = parseOption(Map(), minArgList)
assert(opts.contains("inputBAM"))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment