Commit e1444972 authored by bow's avatar bow
Browse files

Update returned filter function in WipeReads to use SAMRecords only

parent 2c5d1151
......@@ -149,7 +149,7 @@ object WipeReads extends MainCommand {
inBAM: File, inBAMIndex: File = null,
filterOutMulti: Boolean = true,
minMapQ: Int = 0, readGroupIDs: Set[String] = Set(),
bloomSize: Int = 100000000, bloomFp: Double = 1e-10): (Any => Boolean) = {
bloomSize: Int = 100000000, bloomFp: Double = 1e-10): (SAMRecord => Boolean) = {
// TODO: implement optional index creation
/** Function to check for BAM file index and return a SAMFileReader given a File */
......@@ -318,20 +318,14 @@ object WipeReads extends MainCommand {
.foldLeft(bfm.create())(_.++(_))
if (filterOutMulti)
(rec: Any) => rec match {
case rec: SAMRecord => filteredOutSet.contains(rec.getReadName).isTrue
case rec: String => filteredOutSet.contains(rec).isTrue
case _ => false
}
(rec: SAMRecord) => filteredOutSet.contains(rec.getReadName).isTrue
else
(rec: Any) => rec match {
case rec: SAMRecord if rec.getReadPairedFlag =>
(rec: SAMRecord) => {
if (rec.getReadPairedFlag)
filteredOutSet.contains(rec.getReadName + "_" + rec.getAlignmentStart).isTrue &&
filteredOutSet.contains(rec.getReadName + "_" + rec.getMateAlignmentStart).isTrue
case rec: SAMRecord if !rec.getReadPairedFlag =>
else
filteredOutSet.contains(rec.getReadName + "_" + rec.getAlignmentStart).isTrue
case rec: String => filteredOutSet.contains(rec).isTrue
case _ => false
}
}
......
......@@ -8,7 +8,7 @@ import java.nio.file.Paths
import java.io.{ File, IOException }
import scala.collection.JavaConverters._
import htsjdk.samtools.{ SAMFileReader, SAMRecord }
import htsjdk.samtools._
import org.scalatest.Assertions
import org.testng.annotations.Test
......@@ -22,24 +22,87 @@ class WipeReadsUnitTest extends Assertions {
private def resourcePath(p: String): String =
Paths.get(getClass.getResource(p).toURI).toString
private lazy val samP: SAMLineParser = {
val samh = new SAMFileHeader
samh.addSequence(new SAMSequenceRecord("chrQ", 10000))
samh.addReadGroup(new SAMReadGroupRecord("001"))
samh.addReadGroup(new SAMReadGroupRecord("002"))
new SAMLineParser(samh)
}
private def makeSAMs(raws: String*): Seq[SAMRecord] =
raws.map(s => samP.parseLine(s))
private def makeTempBAM(): File =
File.createTempFile("WipeReads", java.util.UUID.randomUUID.toString + ".bam")
private def makeTempBAMIndex(bam: File): File =
new File(bam.getAbsolutePath.stripSuffix(".bam") + ".bai")
val sbam01 = new File(resourcePath("/single01.bam"))
val sbam02 = new File(resourcePath("/single02.bam"))
val sbam03 = new File(resourcePath("/single03.bam"))
val sbam04 = new File(resourcePath("/single04.bam"))
val pbam01 = new File(resourcePath("/paired01.bam"))
val pbam02 = new File(resourcePath("/paired02.bam"))
val pbam03 = new File(resourcePath("/paired03.bam"))
val bed01 = new File(resourcePath("/rrna01.bed"))
val minArgList = List("-I", sbam01.toString, "-l", bed01.toString, "-o", "mock.bam")
val sBAMFile1 = new File(resourcePath("/single01.bam"))
val sBAMRecs1 = makeSAMs(
"r02\t0\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
"r01\t16\tchrQ\t190\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
"r01\t16\tchrQ\t290\t60\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001",
"r04\t0\tchrQ\t450\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
"r03\t16\tchrQ\t690\t60\t10M\t*\t0\t0\tCCCCCTTTTT\tHHHHHHHHHH\tRG:Z:001",
"r05\t0\tchrQ\t890\t60\t5M200N5M\t*\t0\t0\tGATACGATAC\tFEFEFEFEFE\tRG:Z:001",
"r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001"
)
val sBAMFile2 = new File(resourcePath("/single02.bam"))
val sBAMRecs2 = makeSAMs(
"r02\t0\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
"r01\t16\tchrQ\t190\t30\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002",
"r01\t16\tchrQ\t290\t30\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002",
"r04\t0\tchrQ\t450\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
"r07\t16\tchrQ\t460\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
"r07\t16\tchrQ\t860\t30\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
"r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001",
"r08\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:002"
)
val sBAMFile3 = new File(resourcePath("/single03.bam"))
val sBAMFile4 = new File(resourcePath("/single04.bam"))
val pBAMFile1 = new File(resourcePath("/paired01.bam"))
val pBAMRecs1 = makeSAMs(
"r02\t99\tchrQ\t50\t60\t10M\t=\t90\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
"r02\t147\tchrQ\t90\t60\t10M\t=\t50\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001",
"r01\t163\tchrQ\t150\t60\t10M\t=\t190\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:001",
"r01\t83\tchrQ\t190\t60\t10M\t=\t150\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001",
"r01\t163\tchrQ\t250\t60\t10M\t=\t290\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:001",
"r01\t83\tchrQ\t290\t60\t10M\t=\t250\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001",
"r04\t99\tchrQ\t450\t60\t10M\t=\t490\t50\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
"r04\t147\tchrQ\t490\t60\t10M\t=\t450\t-50\tGCATGCATGC\tEEFFGGHHII\tRG:Z:001",
"r03\t163\tchrQ\t650\t60\t10M\t=\t690\t50\tTTTTTCCCCC\tHHHHHHHHHH\tRG:Z:001",
"r03\t83\tchrQ\t690\t60\t10M\t=\t650\t-50\tCCCCCTTTTT\tHHHHHHHHHH\tRG:Z:001",
"r05\t99\tchrQ\t890\t60\t5M200N5M\t=\t1140\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
"r05\t147\tchrQ\t1140\t60\t10M\t=\t890\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001",
"r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001",
"r06\t4\t*\t0\t0\t*\t*\t0\t0\tGCGCGCGCGC\tHIHIHIHIHI\tRG:Z:001"
)
val pBAMFile2 = new File(resourcePath("/paired02.bam"))
val pBAMRecs2 = makeSAMs(
"r02\t99\tchrQ\t50\t60\t10M\t=\t90\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001",
"r02\t147\tchrQ\t90\t60\t10M\t=\t50\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001",
"r01\t163\tchrQ\t150\t30\t10M\t=\t190\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:002",
"r01\t83\tchrQ\t190\t30\t10M\t=\t150\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002",
"r01\t163\tchrQ\t250\t30\t10M\t=\t290\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:002",
"r01\t83\tchrQ\t290\t30\t10M\t=\t250\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002",
"r04\t99\tchrQ\t450\t60\t10M\t=\t490\t50\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001",
"r04\t147\tchrQ\t490\t60\t10M\t=\t450\t-50\tGCATGCATGC\tEEFFGGHHII\tRG:Z:001",
"r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001",
"r08\t4\t*\t0\t0\t*\t*\t0\t0\tGCGCGCGCGC\tHIHIHIHIHI\tRG:Z:002"
)
val pBAMFile3 = new File(resourcePath("/paired03.bam"))
val BEDFile1 = new File(resourcePath("/rrna01.bed"))
val minArgList = List("-I", sBAMFile1.toString, "-l", BEDFile1.toString, "-o", "mock.bam")
@Test def testMakeRawIntervalFromBED() = {
val intervals: Vector[RawInterval] = makeRawIntervalFromFile(bed01).toVector
val intervals: Vector[RawInterval] = makeRawIntervalFromFile(BEDFile1).toVector
assert(intervals.length == 3)
assert(intervals.last.chrom == "chrQ")
assert(intervals.last.start == 291)
......@@ -58,40 +121,45 @@ class WipeReadsUnitTest extends Assertions {
// NOTE: while it's possible to have our filter produce false positives
// it is highly unlikely in our test cases as we are setting a very low FP rate
// and only filling the filter with a few items
val isFilteredOut = makeFilterOutFunction(intervals, sbam01, bloomSize = 1000, bloomFp = 1e-10)
val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile1, bloomSize = 1000, bloomFp = 1e-10)
// by default, set elements are SAM record read names
assert(!isFilteredOut("r02"))
assert(!isFilteredOut("r03"))
assert(!isFilteredOut("r05"))
assert(!isFilteredOut("r06"))
assert(isFilteredOut("r01"))
assert(isFilteredOut("r04"))
assert(!isFilteredOut(sBAMRecs1(0)))
assert(isFilteredOut(sBAMRecs1(1)))
assert(isFilteredOut(sBAMRecs1(2)))
assert(isFilteredOut(sBAMRecs1(3)))
assert(!isFilteredOut(sBAMRecs1(4)))
assert(!isFilteredOut(sBAMRecs1(5)))
assert(!isFilteredOut(sBAMRecs1(6)))
}
@Test def testSingleBAMDefaultPartialExonOverlap() = {
val intervals: Iterator[RawInterval] = Iterator(
RawInterval("chrQ", 881, 1000) // overlaps first exon of r05
)
val isFilteredOut = makeFilterOutFunction(intervals, sbam01, bloomSize = 1000, bloomFp = 1e-10)
assert(!isFilteredOut("r01"))
assert(!isFilteredOut("r02"))
assert(!isFilteredOut("r03"))
assert(!isFilteredOut("r04"))
assert(!isFilteredOut("r06"))
assert(isFilteredOut("r05"))
val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile1, bloomSize = 1000, bloomFp = 1e-10)
assert(!isFilteredOut(sBAMRecs1(0)))
assert(!isFilteredOut(sBAMRecs1(1)))
assert(!isFilteredOut(sBAMRecs1(2)))
assert(!isFilteredOut(sBAMRecs1(3)))
assert(!isFilteredOut(sBAMRecs1(4)))
assert(isFilteredOut(sBAMRecs1(5)))
assert(!isFilteredOut(sBAMRecs1(6)))
}
@Test def testSingleBAMDefaultNoExonOverlap() = {
val intervals: Iterator[RawInterval] = Iterator(
RawInterval("chrP", 881, 1000)
RawInterval("chrP", 881, 1000),
RawInterval("chrQ", 900, 920)
)
val isFilteredOut = makeFilterOutFunction(intervals, sbam01, bloomSize = 1000, bloomFp = 1e-10)
assert(!isFilteredOut("r01"))
assert(!isFilteredOut("r02"))
assert(!isFilteredOut("r03"))
assert(!isFilteredOut("r04"))
assert(!isFilteredOut("r06"))
assert(!isFilteredOut("r05"))
val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile1, bloomSize = 1000, bloomFp = 1e-10)
assert(!isFilteredOut(sBAMRecs1(0)))
assert(!isFilteredOut(sBAMRecs1(1)))
assert(!isFilteredOut(sBAMRecs1(2)))
assert(!isFilteredOut(sBAMRecs1(3)))
assert(!isFilteredOut(sBAMRecs1(4)))
assert(!isFilteredOut(sBAMRecs1(5)))
assert(!isFilteredOut(sBAMRecs1(5)))
assert(!isFilteredOut(sBAMRecs1(6)))
}
@Test def testSingleBAMFilterOutMultiNotSet() = {
......@@ -100,15 +168,15 @@ class WipeReadsUnitTest extends Assertions {
RawInterval("chrQ", 451, 480), // overlaps r04
RawInterval("chrQ", 991, 1000) // overlaps nothing; lies in the spliced region of r05
)
val isFilteredOut = makeFilterOutFunction(intervals, sbam01, bloomSize = 1000, bloomFp = 1e-10,
val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile1, bloomSize = 1000, bloomFp = 1e-10,
filterOutMulti = false)
assert(!isFilteredOut("r02_50"))
assert(!isFilteredOut("r01_190"))
assert(!isFilteredOut("r03_690"))
assert(!isFilteredOut("r06_0"))
assert(isFilteredOut("r01_290"))
assert(isFilteredOut("r04_450"))
assert(!isFilteredOut("r05_890"))
assert(!isFilteredOut(sBAMRecs1(0)))
assert(!isFilteredOut(sBAMRecs1(1)))
assert(isFilteredOut(sBAMRecs1(2)))
assert(isFilteredOut(sBAMRecs1(3)))
assert(!isFilteredOut(sBAMRecs1(4)))
assert(!isFilteredOut(sBAMRecs1(5)))
assert(!isFilteredOut(sBAMRecs1(6)))
}
@Test def testSingleBAMFilterMinMapQ() = {
......@@ -116,15 +184,17 @@ class WipeReadsUnitTest extends Assertions {
RawInterval("chrQ", 291, 320),
RawInterval("chrQ", 451, 480)
)
val isFilteredOut = makeFilterOutFunction(intervals, sbam02, bloomSize = 1000, bloomFp = 1e-10,
val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile2, bloomSize = 1000, bloomFp = 1e-10,
minMapQ = 60)
assert(!isFilteredOut(sBAMRecs2(0)))
// r01 is not in since it is below the MAPQ threshold
assert(!isFilteredOut("r01"))
assert(!isFilteredOut("r02"))
assert(!isFilteredOut("r06"))
assert(!isFilteredOut("r08"))
assert(isFilteredOut("r04"))
assert(isFilteredOut("r07"))
assert(!isFilteredOut(sBAMRecs2(1)))
assert(!isFilteredOut(sBAMRecs2(2)))
assert(isFilteredOut(sBAMRecs2(3)))
assert(isFilteredOut(sBAMRecs2(4)))
assert(isFilteredOut(sBAMRecs2(5)))
assert(!isFilteredOut(sBAMRecs2(6)))
assert(!isFilteredOut(sBAMRecs2(7)))
}
@Test def testSingleBAMFilterMinMapQFilterOutMultiNotSet() = {
......@@ -132,18 +202,18 @@ class WipeReadsUnitTest extends Assertions {
RawInterval("chrQ", 291, 320),
RawInterval("chrQ", 451, 480)
)
val isFilteredOut = makeFilterOutFunction(intervals, sbam02, bloomSize = 1000, bloomFp = 1e-10,
val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile2, bloomSize = 1000, bloomFp = 1e-10,
minMapQ = 60, filterOutMulti = false)
assert(!isFilteredOut("r02_50"))
assert(!isFilteredOut("r01_190"))
assert(!isFilteredOut(sBAMRecs2(0)))
assert(!isFilteredOut(sBAMRecs2(1)))
// this r01 is not in since it is below the MAPQ threshold
assert(!isFilteredOut("r01_290"))
assert(!isFilteredOut("r07_860"))
assert(!isFilteredOut("r06_0"))
assert(!isFilteredOut("r08_0"))
assert(isFilteredOut("r04_450"))
assert(!isFilteredOut(sBAMRecs2(2)))
assert(isFilteredOut(sBAMRecs2(3)))
assert(isFilteredOut(sBAMRecs2(4)))
// this r07 is not in since filterOuMulti is false
assert(isFilteredOut("r07_460"))
assert(!isFilteredOut(sBAMRecs2(5)))
assert(!isFilteredOut(sBAMRecs2(6)))
assert(!isFilteredOut(sBAMRecs2(7)))
}
@Test def testSingleBAMFilterReadGroupIDs() = {
......@@ -151,14 +221,17 @@ class WipeReadsUnitTest extends Assertions {
RawInterval("chrQ", 291, 320),
RawInterval("chrQ", 451, 480)
)
val isFilteredOut = makeFilterOutFunction(intervals, sbam02, bloomSize = 1000, bloomFp = 1e-10,
val isFilteredOut = makeFilterOutFunction(intervals, sBAMFile2, bloomSize = 1000, bloomFp = 1e-10,
readGroupIDs = Set("002", "003"))
assert(!isFilteredOut("r02"))
assert(!isFilteredOut("r04"))
assert(!isFilteredOut("r06"))
assert(!isFilteredOut("r08"))
assert(!isFilteredOut(sBAMRecs2(0)))
// only r01 is in the set since it is RG 002
assert(isFilteredOut("r01"))
assert(isFilteredOut(sBAMRecs2(1)))
assert(isFilteredOut(sBAMRecs2(2)))
assert(!isFilteredOut(sBAMRecs2(3)))
assert(!isFilteredOut(sBAMRecs2(4)))
assert(!isFilteredOut(sBAMRecs2(5)))
assert(!isFilteredOut(sBAMRecs2(6)))
assert(!isFilteredOut(sBAMRecs2(7)))
}
@Test def testPairBAMDefault() = {
......@@ -167,26 +240,42 @@ class WipeReadsUnitTest extends Assertions {
RawInterval("chrQ", 451, 480), // overlaps r04
RawInterval("chrQ", 991, 1000) // overlaps nothing; lies in the spliced region of r05
)
val isFilteredOut = makeFilterOutFunction(intervals, pbam01, bloomSize = 1000, bloomFp = 1e-10)
assert(!isFilteredOut("r02"))
assert(!isFilteredOut("r03"))
assert(!isFilteredOut("r05"))
assert(!isFilteredOut("r06"))
assert(isFilteredOut("r01"))
assert(isFilteredOut("r04"))
val isFilteredOut = makeFilterOutFunction(intervals, pBAMFile1, bloomSize = 1000, bloomFp = 1e-10)
assert(!isFilteredOut(pBAMRecs1(0)))
assert(!isFilteredOut(pBAMRecs1(1)))
assert(isFilteredOut(pBAMRecs1(2)))
assert(isFilteredOut(pBAMRecs1(3)))
assert(isFilteredOut(pBAMRecs1(4)))
assert(isFilteredOut(pBAMRecs1(5)))
assert(isFilteredOut(pBAMRecs1(6)))
assert(isFilteredOut(pBAMRecs1(7)))
assert(!isFilteredOut(pBAMRecs1(8)))
assert(!isFilteredOut(pBAMRecs1(9)))
assert(!isFilteredOut(pBAMRecs1(10)))
assert(!isFilteredOut(pBAMRecs1(11)))
assert(!isFilteredOut(pBAMRecs1(12)))
assert(!isFilteredOut(pBAMRecs1(13)))
}
@Test def testPairBAMPartialExonOverlap() = {
val intervals: Iterator[RawInterval] = Iterator(
RawInterval("chrQ", 891, 1000)
)
val isFilteredOut = makeFilterOutFunction(intervals, pbam01, bloomSize = 1000, bloomFp = 1e-10)
assert(!isFilteredOut("r01"))
assert(!isFilteredOut("r02"))
assert(!isFilteredOut("r03"))
assert(!isFilteredOut("r04"))
assert(!isFilteredOut("r06"))
assert(isFilteredOut("r05"))
val isFilteredOut = makeFilterOutFunction(intervals, pBAMFile1, bloomSize = 1000, bloomFp = 1e-10)
assert(!isFilteredOut(pBAMRecs1(0)))
assert(!isFilteredOut(pBAMRecs1(1)))
assert(!isFilteredOut(pBAMRecs1(2)))
assert(!isFilteredOut(pBAMRecs1(3)))
assert(!isFilteredOut(pBAMRecs1(4)))
assert(!isFilteredOut(pBAMRecs1(5)))
assert(!isFilteredOut(pBAMRecs1(6)))
assert(!isFilteredOut(pBAMRecs1(7)))
assert(!isFilteredOut(pBAMRecs1(8)))
assert(!isFilteredOut(pBAMRecs1(9)))
assert(isFilteredOut(pBAMRecs1(10)))
assert(isFilteredOut(pBAMRecs1(11)))
assert(!isFilteredOut(pBAMRecs1(12)))
assert(!isFilteredOut(pBAMRecs1(13)))
}
@Test def testPairBAMFilterOutMultiNotSet() = {
......@@ -195,21 +284,22 @@ class WipeReadsUnitTest extends Assertions {
RawInterval("chrQ", 451, 480), // overlaps r04
RawInterval("chrQ", 991, 1000) // overlaps nothing; lies in the spliced region of r05
)
val isFilteredOut = makeFilterOutFunction(intervals, pbam01, bloomSize = 1000, bloomFp = 1e-10,
val isFilteredOut = makeFilterOutFunction(intervals, pBAMFile1, bloomSize = 1000, bloomFp = 1e-10,
filterOutMulti = false)
assert(!isFilteredOut("r02_50"))
assert(!isFilteredOut("r02_90"))
assert(!isFilteredOut("r01_150"))
assert(!isFilteredOut("r01_190"))
assert(!isFilteredOut("r03_650"))
assert(!isFilteredOut("r03_690"))
assert(!isFilteredOut("r06_0"))
assert(isFilteredOut("r01_250"))
assert(isFilteredOut("r01_290"))
assert(isFilteredOut("r04_450"))
assert(isFilteredOut("r04_490"))
assert(!isFilteredOut("r05_850"))
assert(!isFilteredOut("r05_1140"))
assert(!isFilteredOut(pBAMRecs1(0)))
assert(!isFilteredOut(pBAMRecs1(1)))
assert(!isFilteredOut(pBAMRecs1(2)))
assert(!isFilteredOut(pBAMRecs1(3)))
assert(isFilteredOut(pBAMRecs1(4)))
assert(isFilteredOut(pBAMRecs1(5)))
assert(isFilteredOut(pBAMRecs1(6)))
assert(isFilteredOut(pBAMRecs1(7)))
assert(!isFilteredOut(pBAMRecs1(8)))
assert(!isFilteredOut(pBAMRecs1(9)))
assert(!isFilteredOut(pBAMRecs1(10)))
assert(!isFilteredOut(pBAMRecs1(11)))
assert(!isFilteredOut(pBAMRecs1(12)))
assert(!isFilteredOut(pBAMRecs1(13)))
}
@Test def testPairBAMFilterMinMapQ() = {
......@@ -217,14 +307,19 @@ class WipeReadsUnitTest extends Assertions {
RawInterval("chrQ", 291, 320),
RawInterval("chrQ", 451, 480)
)
val isFilteredOut = makeFilterOutFunction(intervals, pbam02, bloomSize = 1000, bloomFp = 1e-10,
val isFilteredOut = makeFilterOutFunction(intervals, pBAMFile2, bloomSize = 1000, bloomFp = 1e-10,
minMapQ = 60)
// r01 is not in since it is below the MAPQ threshold
assert(!isFilteredOut("r01"))
assert(!isFilteredOut("r02"))
assert(!isFilteredOut("r06"))
assert(!isFilteredOut("r08"))
assert(isFilteredOut("r04"))
assert(!isFilteredOut(pBAMRecs2(0)))
assert(!isFilteredOut(pBAMRecs2(1)))
assert(!isFilteredOut(pBAMRecs2(2)))
assert(!isFilteredOut(pBAMRecs2(3)))
assert(!isFilteredOut(pBAMRecs2(4)))
assert(!isFilteredOut(pBAMRecs2(5)))
assert(isFilteredOut(pBAMRecs2(6)))
assert(isFilteredOut(pBAMRecs2(7)))
assert(!isFilteredOut(pBAMRecs2(8)))
assert(!isFilteredOut(pBAMRecs2(9)))
}
@Test def testPairBAMFilterReadGroupIDs() = {
......@@ -232,14 +327,19 @@ class WipeReadsUnitTest extends Assertions {
RawInterval("chrQ", 291, 320),
RawInterval("chrQ", 451, 480)
)
val isFilteredOut = makeFilterOutFunction(intervals, pbam02, bloomSize = 1000, bloomFp = 1e-10,
val isFilteredOut = makeFilterOutFunction(intervals, pBAMFile2, bloomSize = 1000, bloomFp = 1e-10,
readGroupIDs = Set("002", "003"))
assert(!isFilteredOut("r02"))
assert(!isFilteredOut("r04"))
assert(!isFilteredOut("r06"))
assert(!isFilteredOut("r08"))
// only r01 is in the set since it is RG 002
assert(isFilteredOut("r01"))
assert(!isFilteredOut(pBAMRecs2(0)))
assert(!isFilteredOut(pBAMRecs2(1)))
assert(isFilteredOut(pBAMRecs2(2)))
assert(isFilteredOut(pBAMRecs2(3)))
assert(isFilteredOut(pBAMRecs2(4)))
assert(isFilteredOut(pBAMRecs2(5)))
assert(!isFilteredOut(pBAMRecs2(6)))
assert(!isFilteredOut(pBAMRecs2(7)))
assert(!isFilteredOut(pBAMRecs2(8)))
assert(!isFilteredOut(pBAMRecs2(9)))
}
@Test def testWriteSingleBAMDefault() = {
......@@ -248,8 +348,8 @@ class WipeReadsUnitTest extends Assertions {
val outBAMIndex = makeTempBAMIndex(outBAM)
outBAM.deleteOnExit()
outBAMIndex.deleteOnExit()
writeFilteredBAM(mockFilterOutFunc, sbam01, outBAM)
val exp = new SAMFileReader(sbam03).asScala
writeFilteredBAM(mockFilterOutFunc, sBAMFile1, outBAM)
val exp = new SAMFileReader(sBAMFile3).asScala
val obs = new SAMFileReader(outBAM).asScala
for ((e, o) <- exp.zip(obs))
assert(e.getSAMString === o.getSAMString)
......@@ -267,8 +367,8 @@ class WipeReadsUnitTest extends Assertions {
val filteredOutBAMIndex = makeTempBAMIndex(filteredOutBAM)
filteredOutBAM.deleteOnExit()
filteredOutBAMIndex.deleteOnExit()
writeFilteredBAM(mockFilterOutFunc, sbam01, outBAM, filteredOutBAM = filteredOutBAM)
val exp = new SAMFileReader(sbam04).asScala
writeFilteredBAM(mockFilterOutFunc, sBAMFile1, outBAM, filteredOutBAM = filteredOutBAM)
val exp = new SAMFileReader(sBAMFile4).asScala
val obs = new SAMFileReader(filteredOutBAM).asScala
for ((e, o) <- exp.zip(obs))
assert(e.getSAMString === o.getSAMString)
......@@ -284,8 +384,8 @@ class WipeReadsUnitTest extends Assertions {
val outBAMIndex = makeTempBAMIndex(outBAM)
outBAM.deleteOnExit()
outBAMIndex.deleteOnExit()
writeFilteredBAM(mockFilterOutFunc, pbam01, outBAM)
val exp = new SAMFileReader(pbam03).asScala
writeFilteredBAM(mockFilterOutFunc, pBAMFile1, outBAM)
val exp = new SAMFileReader(pBAMFile3).asScala
val obs = new SAMFileReader(outBAM).asScala
for ((e, o) <- exp.zip(obs))
assert(e.getSAMString === o.getSAMString)
......@@ -305,7 +405,7 @@ class WipeReadsUnitTest extends Assertions {
assert(pathBAM.exists)
val argList = List(
"--inputBAM", pathBAM.toPath.toString,
"--targetRegions", bed01.getPath,
"--targetRegions", BEDFile1.getPath,
"--outputBAM", "mock.bam")
val thrown = intercept[IOException] {
parseOption(Map(), argList)
......@@ -317,7 +417,7 @@ class WipeReadsUnitTest extends Assertions {
@Test def testOptMissingRegions() = {
val pathRegion = "/i/dont/exist.bed"
val argList = List(
"--inputBAM", sbam01.getPath,
"--inputBAM", sBAMFile1.getPath,
"--targetRegions", pathRegion,
"--outputBAM", "mock.bam"
)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment