Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
e1444972
Commit
e1444972
authored
Oct 09, 2014
by
bow
Browse files
Update returned filter function in WipeReads to use SAMRecords only
parent
2c5d1151
Changes
2
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/WipeReads.scala
View file @
e1444972
...
...
@@ -149,7 +149,7 @@ object WipeReads extends MainCommand {
inBAM
:
File
,
inBAMIndex
:
File
=
null
,
filterOutMulti
:
Boolean
=
true
,
minMapQ
:
Int
=
0
,
readGroupIDs
:
Set
[
String
]
=
Set
(),
bloomSize
:
Int
=
100000000
,
bloomFp
:
Double
=
1
e
-
10
)
:
(
Any
=>
Boolean
)
=
{
bloomSize
:
Int
=
100000000
,
bloomFp
:
Double
=
1
e
-
10
)
:
(
SAMRecord
=>
Boolean
)
=
{
// TODO: implement optional index creation
/** Function to check for BAM file index and return a SAMFileReader given a File */
...
...
@@ -318,20 +318,14 @@ object WipeReads extends MainCommand {
.
foldLeft
(
bfm
.
create
())(
_
.++(
_
))
if
(
filterOutMulti
)
(
rec
:
Any
)
=>
rec
match
{
case
rec
:
SAMRecord
=>
filteredOutSet
.
contains
(
rec
.
getReadName
).
isTrue
case
rec
:
String
=>
filteredOutSet
.
contains
(
rec
).
isTrue
case
_
=>
false
}
(
rec
:
SAMRecord
)
=>
filteredOutSet
.
contains
(
rec
.
getReadName
).
isTrue
else
(
rec
:
Any
)
=>
rec
match
{
case
rec
:
SAMRecord
if
rec.getReadPairedFlag
=>
(
rec
:
SAMRecord
)
=>
{
if
(
rec
.
getReadPairedFlag
)
filteredOutSet
.
contains
(
rec
.
getReadName
+
"_"
+
rec
.
getAlignmentStart
).
isTrue
&&
filteredOutSet
.
contains
(
rec
.
getReadName
+
"_"
+
rec
.
getMateAlignmentStart
).
isTrue
case
rec
:
SAMRecord
if
!rec.getReadPairedFlag
=>
filteredOutSet
.
contains
(
rec
.
getReadName
+
"_"
+
rec
.
getAlignmentStart
).
isTrue
case
rec
:
String
=>
filteredOutSet
.
contains
(
rec
).
isTrue
case
_
=>
false
else
filteredOutSet
.
contains
(
rec
.
getReadName
+
"_"
+
rec
.
getAlignmentStart
).
isTrue
}
}
...
...
biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/apps/WipeReadsUnitTest.scala
View file @
e1444972
...
...
@@ -8,7 +8,7 @@ import java.nio.file.Paths
import
java.io.
{
File
,
IOException
}
import
scala.collection.JavaConverters._
import
htsjdk.samtools.
{
SAMFileReader
,
SAMRecord
}
import
htsjdk.samtools.
_
import
org.scalatest.Assertions
import
org.testng.annotations.Test
...
...
@@ -22,24 +22,87 @@ class WipeReadsUnitTest extends Assertions {
private
def
resourcePath
(
p
:
String
)
:
String
=
Paths
.
get
(
getClass
.
getResource
(
p
).
toURI
).
toString
private
lazy
val
samP
:
SAMLineParser
=
{
val
samh
=
new
SAMFileHeader
samh
.
addSequence
(
new
SAMSequenceRecord
(
"chrQ"
,
10000
))
samh
.
addReadGroup
(
new
SAMReadGroupRecord
(
"001"
))
samh
.
addReadGroup
(
new
SAMReadGroupRecord
(
"002"
))
new
SAMLineParser
(
samh
)
}
private
def
makeSAMs
(
raws
:
String*
)
:
Seq
[
SAMRecord
]
=
raws
.
map
(
s
=>
samP
.
parseLine
(
s
))
private
def
makeTempBAM
()
:
File
=
File
.
createTempFile
(
"WipeReads"
,
java
.
util
.
UUID
.
randomUUID
.
toString
+
".bam"
)
private
def
makeTempBAMIndex
(
bam
:
File
)
:
File
=
new
File
(
bam
.
getAbsolutePath
.
stripSuffix
(
".bam"
)
+
".bai"
)
val
sbam01
=
new
File
(
resourcePath
(
"/single01.bam"
))
val
sbam02
=
new
File
(
resourcePath
(
"/single02.bam"
))
val
sbam03
=
new
File
(
resourcePath
(
"/single03.bam"
))
val
sbam04
=
new
File
(
resourcePath
(
"/single04.bam"
))
val
pbam01
=
new
File
(
resourcePath
(
"/paired01.bam"
))
val
pbam02
=
new
File
(
resourcePath
(
"/paired02.bam"
))
val
pbam03
=
new
File
(
resourcePath
(
"/paired03.bam"
))
val
bed01
=
new
File
(
resourcePath
(
"/rrna01.bed"
))
val
minArgList
=
List
(
"-I"
,
sbam01
.
toString
,
"-l"
,
bed01
.
toString
,
"-o"
,
"mock.bam"
)
val
sBAMFile1
=
new
File
(
resourcePath
(
"/single01.bam"
))
val
sBAMRecs1
=
makeSAMs
(
"r02\t0\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
,
"r01\t16\tchrQ\t190\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
,
"r01\t16\tchrQ\t290\t60\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001"
,
"r04\t0\tchrQ\t450\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001"
,
"r03\t16\tchrQ\t690\t60\t10M\t*\t0\t0\tCCCCCTTTTT\tHHHHHHHHHH\tRG:Z:001"
,
"r05\t0\tchrQ\t890\t60\t5M200N5M\t*\t0\t0\tGATACGATAC\tFEFEFEFEFE\tRG:Z:001"
,
"r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001"
)
val
sBAMFile2
=
new
File
(
resourcePath
(
"/single02.bam"
))
val
sBAMRecs2
=
makeSAMs
(
"r02\t0\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
,
"r01\t16\tchrQ\t190\t30\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002"
,
"r01\t16\tchrQ\t290\t30\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002"
,
"r04\t0\tchrQ\t450\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001"
,
"r07\t16\tchrQ\t460\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001"
,
"r07\t16\tchrQ\t860\t30\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001"
,
"r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001"
,
"r08\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:002"
)
val
sBAMFile3
=
new
File
(
resourcePath
(
"/single03.bam"
))
val
sBAMFile4
=
new
File
(
resourcePath
(
"/single04.bam"
))
val
pBAMFile1
=
new
File
(
resourcePath
(
"/paired01.bam"
))
val
pBAMRecs1
=
makeSAMs
(
"r02\t99\tchrQ\t50\t60\t10M\t=\t90\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
,
"r02\t147\tchrQ\t90\t60\t10M\t=\t50\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001"
,
"r01\t163\tchrQ\t150\t60\t10M\t=\t190\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:001"
,
"r01\t83\tchrQ\t190\t60\t10M\t=\t150\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001"
,
"r01\t163\tchrQ\t250\t60\t10M\t=\t290\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:001"
,
"r01\t83\tchrQ\t290\t60\t10M\t=\t250\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001"
,
"r04\t99\tchrQ\t450\t60\t10M\t=\t490\t50\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001"
,
"r04\t147\tchrQ\t490\t60\t10M\t=\t450\t-50\tGCATGCATGC\tEEFFGGHHII\tRG:Z:001"
,
"r03\t163\tchrQ\t650\t60\t10M\t=\t690\t50\tTTTTTCCCCC\tHHHHHHHHHH\tRG:Z:001"
,
"r03\t83\tchrQ\t690\t60\t10M\t=\t650\t-50\tCCCCCTTTTT\tHHHHHHHHHH\tRG:Z:001"
,
"r05\t99\tchrQ\t890\t60\t5M200N5M\t=\t1140\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
,
"r05\t147\tchrQ\t1140\t60\t10M\t=\t890\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001"
,
"r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001"
,
"r06\t4\t*\t0\t0\t*\t*\t0\t0\tGCGCGCGCGC\tHIHIHIHIHI\tRG:Z:001"
)
val
pBAMFile2
=
new
File
(
resourcePath
(
"/paired02.bam"
))
val
pBAMRecs2
=
makeSAMs
(
"r02\t99\tchrQ\t50\t60\t10M\t=\t90\t50\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001"
,
"r02\t147\tchrQ\t90\t60\t10M\t=\t50\t-50\tATGCATGCAT\tEEFFGGHHII\tRG:Z:001"
,
"r01\t163\tchrQ\t150\t30\t10M\t=\t190\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:002"
,
"r01\t83\tchrQ\t190\t30\t10M\t=\t150\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002"
,
"r01\t163\tchrQ\t250\t30\t10M\t=\t290\t50\tAAAAAGGGGG\tGGGGGGGGGG\tRG:Z:002"
,
"r01\t83\tchrQ\t290\t30\t10M\t=\t250\t-50\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:002"
,
"r04\t99\tchrQ\t450\t60\t10M\t=\t490\t50\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001"
,
"r04\t147\tchrQ\t490\t60\t10M\t=\t450\t-50\tGCATGCATGC\tEEFFGGHHII\tRG:Z:001"
,
"r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001"
,
"r08\t4\t*\t0\t0\t*\t*\t0\t0\tGCGCGCGCGC\tHIHIHIHIHI\tRG:Z:002"
)
val
pBAMFile3
=
new
File
(
resourcePath
(
"/paired03.bam"
))
val
BEDFile1
=
new
File
(
resourcePath
(
"/rrna01.bed"
))
val
minArgList
=
List
(
"-I"
,
sBAMFile1
.
toString
,
"-l"
,
BEDFile1
.
toString
,
"-o"
,
"mock.bam"
)
@Test
def
testMakeRawIntervalFromBED
()
=
{
val
intervals
:
Vector
[
RawInterval
]
=
makeRawIntervalFromFile
(
bed0
1
).
toVector
val
intervals
:
Vector
[
RawInterval
]
=
makeRawIntervalFromFile
(
BEDFile
1
).
toVector
assert
(
intervals
.
length
==
3
)
assert
(
intervals
.
last
.
chrom
==
"chrQ"
)
assert
(
intervals
.
last
.
start
==
291
)
...
...
@@ -58,40 +121,45 @@ class WipeReadsUnitTest extends Assertions {
// NOTE: while it's possible to have our filter produce false positives
// it is highly unlikely in our test cases as we are setting a very low FP rate
// and only filling the filter with a few items
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
s
bam0
1
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
)
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
s
BAMFile
1
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
)
// by default, set elements are SAM record read names
assert
(!
isFilteredOut
(
"r02"
))
assert
(!
isFilteredOut
(
"r03"
))
assert
(!
isFilteredOut
(
"r05"
))
assert
(!
isFilteredOut
(
"r06"
))
assert
(
isFilteredOut
(
"r01"
))
assert
(
isFilteredOut
(
"r04"
))
assert
(!
isFilteredOut
(
sBAMRecs1
(
0
)))
assert
(
isFilteredOut
(
sBAMRecs1
(
1
)))
assert
(
isFilteredOut
(
sBAMRecs1
(
2
)))
assert
(
isFilteredOut
(
sBAMRecs1
(
3
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
4
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
5
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
6
)))
}
@Test
def
testSingleBAMDefaultPartialExonOverlap
()
=
{
val
intervals
:
Iterator
[
RawInterval
]
=
Iterator
(
RawInterval
(
"chrQ"
,
881
,
1000
)
// overlaps first exon of r05
)
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
sbam01
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
)
assert
(!
isFilteredOut
(
"r01"
))
assert
(!
isFilteredOut
(
"r02"
))
assert
(!
isFilteredOut
(
"r03"
))
assert
(!
isFilteredOut
(
"r04"
))
assert
(!
isFilteredOut
(
"r06"
))
assert
(
isFilteredOut
(
"r05"
))
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
sBAMFile1
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
)
assert
(!
isFilteredOut
(
sBAMRecs1
(
0
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
1
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
2
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
3
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
4
)))
assert
(
isFilteredOut
(
sBAMRecs1
(
5
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
6
)))
}
@Test
def
testSingleBAMDefaultNoExonOverlap
()
=
{
val
intervals
:
Iterator
[
RawInterval
]
=
Iterator
(
RawInterval
(
"chrP"
,
881
,
1000
)
RawInterval
(
"chrP"
,
881
,
1000
),
RawInterval
(
"chrQ"
,
900
,
920
)
)
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
sbam01
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
)
assert
(!
isFilteredOut
(
"r01"
))
assert
(!
isFilteredOut
(
"r02"
))
assert
(!
isFilteredOut
(
"r03"
))
assert
(!
isFilteredOut
(
"r04"
))
assert
(!
isFilteredOut
(
"r06"
))
assert
(!
isFilteredOut
(
"r05"
))
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
sBAMFile1
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
)
assert
(!
isFilteredOut
(
sBAMRecs1
(
0
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
1
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
2
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
3
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
4
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
5
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
5
)))
assert
(!
isFilteredOut
(
sBAMRecs1
(
6
)))
}
@Test
def
testSingleBAMFilterOutMultiNotSet
()
=
{
...
...
@@ -100,15 +168,15 @@ class WipeReadsUnitTest extends Assertions {
RawInterval
(
"chrQ"
,
451
,
480
),
// overlaps r04
RawInterval
(
"chrQ"
,
991
,
1000
)
// overlaps nothing; lies in the spliced region of r05
)
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
s
bam0
1
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
s
BAMFile
1
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
filterOutMulti
=
false
)
assert
(!
isFilteredOut
(
"r02_50"
))
assert
(!
isFilteredOut
(
"r01_190"
))
assert
(
!
isFilteredOut
(
"r03_690"
))
assert
(
!
isFilteredOut
(
"r06_0"
))
assert
(
isFilteredOut
(
"r01_290"
))
assert
(
isFilteredOut
(
"r04_450"
))
assert
(!
isFilteredOut
(
"r05_890"
))
assert
(!
isFilteredOut
(
sBAMRecs1
(
0
)
))
assert
(!
isFilteredOut
(
sBAMRecs1
(
1
)
))
assert
(
isFilteredOut
(
sBAMRecs1
(
2
)
))
assert
(
isFilteredOut
(
sBAMRecs1
(
3
)
))
assert
(
!
isFilteredOut
(
sBAMRecs1
(
4
)
))
assert
(
!
isFilteredOut
(
sBAMRecs1
(
5
)
))
assert
(!
isFilteredOut
(
sBAMRecs1
(
6
)
))
}
@Test
def
testSingleBAMFilterMinMapQ
()
=
{
...
...
@@ -116,15 +184,17 @@ class WipeReadsUnitTest extends Assertions {
RawInterval
(
"chrQ"
,
291
,
320
),
RawInterval
(
"chrQ"
,
451
,
480
)
)
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
s
bam0
2
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
s
BAMFile
2
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
minMapQ
=
60
)
assert
(!
isFilteredOut
(
sBAMRecs2
(
0
)))
// r01 is not in since it is below the MAPQ threshold
assert
(!
isFilteredOut
(
"r01"
))
assert
(!
isFilteredOut
(
"r02"
))
assert
(!
isFilteredOut
(
"r06"
))
assert
(!
isFilteredOut
(
"r08"
))
assert
(
isFilteredOut
(
"r04"
))
assert
(
isFilteredOut
(
"r07"
))
assert
(!
isFilteredOut
(
sBAMRecs2
(
1
)))
assert
(!
isFilteredOut
(
sBAMRecs2
(
2
)))
assert
(
isFilteredOut
(
sBAMRecs2
(
3
)))
assert
(
isFilteredOut
(
sBAMRecs2
(
4
)))
assert
(
isFilteredOut
(
sBAMRecs2
(
5
)))
assert
(!
isFilteredOut
(
sBAMRecs2
(
6
)))
assert
(!
isFilteredOut
(
sBAMRecs2
(
7
)))
}
@Test
def
testSingleBAMFilterMinMapQFilterOutMultiNotSet
()
=
{
...
...
@@ -132,18 +202,18 @@ class WipeReadsUnitTest extends Assertions {
RawInterval
(
"chrQ"
,
291
,
320
),
RawInterval
(
"chrQ"
,
451
,
480
)
)
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
s
bam0
2
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
s
BAMFile
2
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
minMapQ
=
60
,
filterOutMulti
=
false
)
assert
(!
isFilteredOut
(
"r02_50"
))
assert
(!
isFilteredOut
(
"r01_190"
))
assert
(!
isFilteredOut
(
sBAMRecs2
(
0
)
))
assert
(!
isFilteredOut
(
sBAMRecs2
(
1
)
))
// this r01 is not in since it is below the MAPQ threshold
assert
(!
isFilteredOut
(
"r01_290"
))
assert
(!
isFilteredOut
(
"r07_860"
))
assert
(!
isFilteredOut
(
"r06_0"
))
assert
(!
isFilteredOut
(
"r08_0"
))
assert
(
isFilteredOut
(
"r04_450"
))
assert
(!
isFilteredOut
(
sBAMRecs2
(
2
)))
assert
(
isFilteredOut
(
sBAMRecs2
(
3
)))
assert
(
isFilteredOut
(
sBAMRecs2
(
4
)))
// this r07 is not in since filterOuMulti is false
assert
(
isFilteredOut
(
"r07_460"
))
assert
(!
isFilteredOut
(
sBAMRecs2
(
5
)))
assert
(!
isFilteredOut
(
sBAMRecs2
(
6
)))
assert
(!
isFilteredOut
(
sBAMRecs2
(
7
)))
}
@Test
def
testSingleBAMFilterReadGroupIDs
()
=
{
...
...
@@ -151,14 +221,17 @@ class WipeReadsUnitTest extends Assertions {
RawInterval
(
"chrQ"
,
291
,
320
),
RawInterval
(
"chrQ"
,
451
,
480
)
)
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
s
bam0
2
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
s
BAMFile
2
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
readGroupIDs
=
Set
(
"002"
,
"003"
))
assert
(!
isFilteredOut
(
"r02"
))
assert
(!
isFilteredOut
(
"r04"
))
assert
(!
isFilteredOut
(
"r06"
))
assert
(!
isFilteredOut
(
"r08"
))
assert
(!
isFilteredOut
(
sBAMRecs2
(
0
)))
// only r01 is in the set since it is RG 002
assert
(
isFilteredOut
(
"r01"
))
assert
(
isFilteredOut
(
sBAMRecs2
(
1
)))
assert
(
isFilteredOut
(
sBAMRecs2
(
2
)))
assert
(!
isFilteredOut
(
sBAMRecs2
(
3
)))
assert
(!
isFilteredOut
(
sBAMRecs2
(
4
)))
assert
(!
isFilteredOut
(
sBAMRecs2
(
5
)))
assert
(!
isFilteredOut
(
sBAMRecs2
(
6
)))
assert
(!
isFilteredOut
(
sBAMRecs2
(
7
)))
}
@Test
def
testPairBAMDefault
()
=
{
...
...
@@ -167,26 +240,42 @@ class WipeReadsUnitTest extends Assertions {
RawInterval
(
"chrQ"
,
451
,
480
),
// overlaps r04
RawInterval
(
"chrQ"
,
991
,
1000
)
// overlaps nothing; lies in the spliced region of r05
)
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
pbam01
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
)
assert
(!
isFilteredOut
(
"r02"
))
assert
(!
isFilteredOut
(
"r03"
))
assert
(!
isFilteredOut
(
"r05"
))
assert
(!
isFilteredOut
(
"r06"
))
assert
(
isFilteredOut
(
"r01"
))
assert
(
isFilteredOut
(
"r04"
))
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
pBAMFile1
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
)
assert
(!
isFilteredOut
(
pBAMRecs1
(
0
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
1
)))
assert
(
isFilteredOut
(
pBAMRecs1
(
2
)))
assert
(
isFilteredOut
(
pBAMRecs1
(
3
)))
assert
(
isFilteredOut
(
pBAMRecs1
(
4
)))
assert
(
isFilteredOut
(
pBAMRecs1
(
5
)))
assert
(
isFilteredOut
(
pBAMRecs1
(
6
)))
assert
(
isFilteredOut
(
pBAMRecs1
(
7
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
8
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
9
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
10
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
11
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
12
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
13
)))
}
@Test
def
testPairBAMPartialExonOverlap
()
=
{
val
intervals
:
Iterator
[
RawInterval
]
=
Iterator
(
RawInterval
(
"chrQ"
,
891
,
1000
)
)
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
pbam01
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
)
assert
(!
isFilteredOut
(
"r01"
))
assert
(!
isFilteredOut
(
"r02"
))
assert
(!
isFilteredOut
(
"r03"
))
assert
(!
isFilteredOut
(
"r04"
))
assert
(!
isFilteredOut
(
"r06"
))
assert
(
isFilteredOut
(
"r05"
))
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
pBAMFile1
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
)
assert
(!
isFilteredOut
(
pBAMRecs1
(
0
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
1
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
2
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
3
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
4
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
5
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
6
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
7
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
8
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
9
)))
assert
(
isFilteredOut
(
pBAMRecs1
(
10
)))
assert
(
isFilteredOut
(
pBAMRecs1
(
11
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
12
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
13
)))
}
@Test
def
testPairBAMFilterOutMultiNotSet
()
=
{
...
...
@@ -195,21 +284,22 @@ class WipeReadsUnitTest extends Assertions {
RawInterval
(
"chrQ"
,
451
,
480
),
// overlaps r04
RawInterval
(
"chrQ"
,
991
,
1000
)
// overlaps nothing; lies in the spliced region of r05
)
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
p
bam0
1
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
p
BAMFile
1
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
filterOutMulti
=
false
)
assert
(!
isFilteredOut
(
"r02_50"
))
assert
(!
isFilteredOut
(
"r02_90"
))
assert
(!
isFilteredOut
(
"r01_150"
))
assert
(!
isFilteredOut
(
"r01_190"
))
assert
(!
isFilteredOut
(
"r03_650"
))
assert
(!
isFilteredOut
(
"r03_690"
))
assert
(!
isFilteredOut
(
"r06_0"
))
assert
(
isFilteredOut
(
"r01_250"
))
assert
(
isFilteredOut
(
"r01_290"
))
assert
(
isFilteredOut
(
"r04_450"
))
assert
(
isFilteredOut
(
"r04_490"
))
assert
(!
isFilteredOut
(
"r05_850"
))
assert
(!
isFilteredOut
(
"r05_1140"
))
assert
(!
isFilteredOut
(
pBAMRecs1
(
0
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
1
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
2
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
3
)))
assert
(
isFilteredOut
(
pBAMRecs1
(
4
)))
assert
(
isFilteredOut
(
pBAMRecs1
(
5
)))
assert
(
isFilteredOut
(
pBAMRecs1
(
6
)))
assert
(
isFilteredOut
(
pBAMRecs1
(
7
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
8
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
9
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
10
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
11
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
12
)))
assert
(!
isFilteredOut
(
pBAMRecs1
(
13
)))
}
@Test
def
testPairBAMFilterMinMapQ
()
=
{
...
...
@@ -217,14 +307,19 @@ class WipeReadsUnitTest extends Assertions {
RawInterval
(
"chrQ"
,
291
,
320
),
RawInterval
(
"chrQ"
,
451
,
480
)
)
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
p
bam0
2
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
p
BAMFile
2
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
minMapQ
=
60
)
// r01 is not in since it is below the MAPQ threshold
assert
(!
isFilteredOut
(
"r01"
))
assert
(!
isFilteredOut
(
"r02"
))
assert
(!
isFilteredOut
(
"r06"
))
assert
(!
isFilteredOut
(
"r08"
))
assert
(
isFilteredOut
(
"r04"
))
assert
(!
isFilteredOut
(
pBAMRecs2
(
0
)))
assert
(!
isFilteredOut
(
pBAMRecs2
(
1
)))
assert
(!
isFilteredOut
(
pBAMRecs2
(
2
)))
assert
(!
isFilteredOut
(
pBAMRecs2
(
3
)))
assert
(!
isFilteredOut
(
pBAMRecs2
(
4
)))
assert
(!
isFilteredOut
(
pBAMRecs2
(
5
)))
assert
(
isFilteredOut
(
pBAMRecs2
(
6
)))
assert
(
isFilteredOut
(
pBAMRecs2
(
7
)))
assert
(!
isFilteredOut
(
pBAMRecs2
(
8
)))
assert
(!
isFilteredOut
(
pBAMRecs2
(
9
)))
}
@Test
def
testPairBAMFilterReadGroupIDs
()
=
{
...
...
@@ -232,14 +327,19 @@ class WipeReadsUnitTest extends Assertions {
RawInterval
(
"chrQ"
,
291
,
320
),
RawInterval
(
"chrQ"
,
451
,
480
)
)
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
p
bam0
2
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
val
isFilteredOut
=
makeFilterOutFunction
(
intervals
,
p
BAMFile
2
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
readGroupIDs
=
Set
(
"002"
,
"003"
))
assert
(!
isFilteredOut
(
"r02"
))
assert
(!
isFilteredOut
(
"r04"
))
assert
(!
isFilteredOut
(
"r06"
))
assert
(!
isFilteredOut
(
"r08"
))
// only r01 is in the set since it is RG 002
assert
(
isFilteredOut
(
"r01"
))
assert
(!
isFilteredOut
(
pBAMRecs2
(
0
)))
assert
(!
isFilteredOut
(
pBAMRecs2
(
1
)))
assert
(
isFilteredOut
(
pBAMRecs2
(
2
)))
assert
(
isFilteredOut
(
pBAMRecs2
(
3
)))
assert
(
isFilteredOut
(
pBAMRecs2
(
4
)))
assert
(
isFilteredOut
(
pBAMRecs2
(
5
)))
assert
(!
isFilteredOut
(
pBAMRecs2
(
6
)))
assert
(!
isFilteredOut
(
pBAMRecs2
(
7
)))
assert
(!
isFilteredOut
(
pBAMRecs2
(
8
)))
assert
(!
isFilteredOut
(
pBAMRecs2
(
9
)))
}
@Test
def
testWriteSingleBAMDefault
()
=
{
...
...
@@ -248,8 +348,8 @@ class WipeReadsUnitTest extends Assertions {
val
outBAMIndex
=
makeTempBAMIndex
(
outBAM
)
outBAM
.
deleteOnExit
()
outBAMIndex
.
deleteOnExit
()
writeFilteredBAM
(
mockFilterOutFunc
,
s
bam0
1
,
outBAM
)
val
exp
=
new
SAMFileReader
(
s
bam0
3
).
asScala
writeFilteredBAM
(
mockFilterOutFunc
,
s
BAMFile
1
,
outBAM
)
val
exp
=
new
SAMFileReader
(
s
BAMFile
3
).
asScala
val
obs
=
new
SAMFileReader
(
outBAM
).
asScala
for
((
e
,
o
)
<-
exp
.
zip
(
obs
))
assert
(
e
.
getSAMString
===
o
.
getSAMString
)
...
...
@@ -267,8 +367,8 @@ class WipeReadsUnitTest extends Assertions {
val
filteredOutBAMIndex
=
makeTempBAMIndex
(
filteredOutBAM
)
filteredOutBAM
.
deleteOnExit
()
filteredOutBAMIndex
.
deleteOnExit
()
writeFilteredBAM
(
mockFilterOutFunc
,
s
bam0
1
,
outBAM
,
filteredOutBAM
=
filteredOutBAM
)
val
exp
=
new
SAMFileReader
(
s
bam0
4
).
asScala
writeFilteredBAM
(
mockFilterOutFunc
,
s
BAMFile
1
,
outBAM
,
filteredOutBAM
=
filteredOutBAM
)
val
exp
=
new
SAMFileReader
(
s
BAMFile
4
).
asScala
val
obs
=
new
SAMFileReader
(
filteredOutBAM
).
asScala
for
((
e
,
o
)
<-
exp
.
zip
(
obs
))
assert
(
e
.
getSAMString
===
o
.
getSAMString
)
...
...
@@ -284,8 +384,8 @@ class WipeReadsUnitTest extends Assertions {
val
outBAMIndex
=
makeTempBAMIndex
(
outBAM
)
outBAM
.
deleteOnExit
()
outBAMIndex
.
deleteOnExit
()
writeFilteredBAM
(
mockFilterOutFunc
,
p
bam0
1
,
outBAM
)
val
exp
=
new
SAMFileReader
(
p
bam0
3
).
asScala
writeFilteredBAM
(
mockFilterOutFunc
,
p
BAMFile
1
,
outBAM
)
val
exp
=
new
SAMFileReader
(
p
BAMFile
3
).
asScala
val
obs
=
new
SAMFileReader
(
outBAM
).
asScala
for
((
e
,
o
)
<-
exp
.
zip
(
obs
))
assert
(
e
.
getSAMString
===
o
.
getSAMString
)
...
...
@@ -305,7 +405,7 @@ class WipeReadsUnitTest extends Assertions {
assert
(
pathBAM
.
exists
)
val
argList
=
List
(
"--inputBAM"
,
pathBAM
.
toPath
.
toString
,
"--targetRegions"
,
bed0
1
.
getPath
,
"--targetRegions"
,
BEDFile
1
.
getPath
,
"--outputBAM"
,
"mock.bam"
)
val
thrown
=
intercept
[
IOException
]
{
parseOption
(
Map
(),
argList
)
...
...
@@ -317,7 +417,7 @@ class WipeReadsUnitTest extends Assertions {
@Test
def
testOptMissingRegions
()
=
{
val
pathRegion
=
"/i/dont/exist.bed"
val
argList
=
List
(
"--inputBAM"
,
s
bam0
1
.
getPath
,
"--inputBAM"
,
s
BAMFile
1
.
getPath
,
"--targetRegions"
,
pathRegion
,
"--outputBAM"
,
"mock.bam"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment