Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
149f43d4
Commit
149f43d4
authored
Oct 04, 2014
by
bow
Browse files
Add tests for WipeReads overlap finders
parent
d502cf63
Changes
15
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/WipeReads.scala
View file @
149f43d4
...
...
@@ -232,6 +232,4 @@ object WipeReads {
|By default, if the removed reads are also mapped to other regions outside
|the given ones, they will also be removed.
"""
.
stripMargin
}
biopet-framework/src/test/resources/README.txt
0 → 100644
View file @
149f43d4
Test datasets for the Biopet Framework
======================================
* Please add an entry here when adding a new test dataset.
Filename Explanation
======== ===========
single01.sam single-end SAM file, used for testing WipeReads
single01.bam single01.sam compressed with samtools v0.1.18
single01.bam.bai index for single01.bam
single02.sam single-end SAM file, used for testing WipeReads
single02.bam single02.sam compressed with samtools v0.1.18
single02.bam.bai index for single02.bam
paired01.sam paired-end SAM file, used for testing WipeReads
paired01.bam paired01.sam compressed with samtools v0.1.18
paired01.bam.bai index for paired01.bam
biopet-framework/src/test/resources/paired01.bam
0 → 100644
View file @
149f43d4
File added
biopet-framework/src/test/resources/paired01.bam.bai
0 → 100644
View file @
149f43d4
File added
biopet-framework/src/test/resources/paired01.sam
0 → 100644
View file @
149f43d4
@HD VN:1.0 SO:coordinate
@SQ SN:chrQ LN:10000
@RG ID:001 DS:paired-end reads SM:WipeReadsTestCase
r02 99 chrQ 50 60 10M = 90 50 TACGTACGTA EEFFGGHHII RG:Z:001
r02 147 chrQ 90 60 10M = 50 -50 ATGCATGCAT EEFFGGHHII RG:Z:001
r01 163 chrQ 250 60 10M = 290 50 AAAAAGGGGG GGGGGGGGGG RG:Z:001
r01 83 chrQ 290 60 10M = 250 -50 GGGGGAAAAA GGGGGGGGGG RG:Z:001
r04 99 chrQ 450 60 10M = 490 50 CGTACGTACG EEFFGGHHII RG:Z:001
r04 147 chrQ 490 60 10M = 450 -50 GCATGCATGC EEFFGGHHII RG:Z:001
r03 163 chrQ 650 60 10M = 690 50 TTTTTCCCCC HHHHHHHHHH RG:Z:001
r03 83 chrQ 690 60 10M = 650 -50 CCCCCTTTTT HHHHHHHHHH RG:Z:001
r02 99 chrQ 850 60 5M100N5M = 1290 50 TACGTACGTA EEFFGGHHII RG:Z:001
r02 147 chrQ 1290 60 5M100N5M = 1250 -50 ATGCATGCAT EEFFGGHHII RG:Z:001
r06 4 * 0 0 * * 0 0 ATATATATAT HIHIHIHIHI RG:Z:001
r06 4 * 0 0 * * 0 0 GCGCGCGCGC HIHIHIHIHI RG:Z:001
biopet-framework/src/test/resources/single.bam
deleted
100644 → 0
View file @
d502cf63
File deleted
biopet-framework/src/test/resources/single.bam.bai
deleted
100644 → 0
View file @
d502cf63
File deleted
biopet-framework/src/test/resources/single.sam
deleted
100644 → 0
View file @
d502cf63
@HD VN:1.0 SO:coordinate
@SQ SN:chrQ LN:1000
@RG ID:002 DS:single-end reads SM:WipeReadsTestCase
r02 0 chrQ 50 60 10M * 0 0 TACGTACGTA EEFFGGHHII RG:Z:002
r01 16 chrQ 290 60 10M * 0 0 GGGGGAAAAA GGGGGGGGGG RG:Z:002
r04 0 chrQ 450 60 10M * 0 0 CGTACGTACG EEFFGGHHII RG:Z:002
r03 16 chrQ 690 60 10M * 0 0 CCCCCTTTTT HHHHHHHHHH RG:Z:002
r06 4 * 0 0 * * 0 0 ATATATATAT HIHIHIHIHI RG:Z:002
biopet-framework/src/test/resources/single01.bam
0 → 100644
View file @
149f43d4
File added
biopet-framework/src/test/resources/single01.bam.bai
0 → 100644
View file @
149f43d4
File added
biopet-framework/src/test/resources/single01.sam
0 → 100644
View file @
149f43d4
@HD VN:1.0 SO:coordinate
@SQ SN:chrQ LN:10000
@RG ID:001 DS:single-end reads SM:WipeReadsTestCase
r02 0 chrQ 50 60 10M * 0 0 TACGTACGTA EEFFGGHHII RG:Z:001
r01 16 chrQ 190 60 10M * 0 0 TACGTACGTA EEFFGGHHII RG:Z:001
r01 16 chrQ 290 60 10M * 0 0 GGGGGAAAAA GGGGGGGGGG RG:Z:001
r04 0 chrQ 450 60 10M * 0 0 CGTACGTACG EEFFGGHHII RG:Z:001
r03 16 chrQ 690 60 10M * 0 0 CCCCCTTTTT HHHHHHHHHH RG:Z:001
r05 0 chrQ 890 60 5M200N5M * 0 0 GATACGATAC FEFEFEFEFE RG:Z:001
r06 4 * 0 0 * * 0 0 ATATATATAT HIHIHIHIHI RG:Z:001
biopet-framework/src/test/resources/single02.bam
0 → 100644
View file @
149f43d4
File added
biopet-framework/src/test/resources/single02.bam.bai
0 → 100644
View file @
149f43d4
File added
biopet-framework/src/test/resources/single02.sam
0 → 100644
View file @
149f43d4
@HD VN:1.0 SO:coordinate
@SQ SN:chrQ LN:10000
@RG ID:001 DS:single-end reads SM:WipeReadsTestCase
@RG ID:002 DS:single-end reads SM:WipeReadsTestCase
r02 0 chrQ 50 60 10M * 0 0 TACGTACGTA EEFFGGHHII RG:Z:001
r01 16 chrQ 190 30 10M * 0 0 TACGTACGTA EEFFGGHHII RG:Z:002
r01 16 chrQ 290 30 10M * 0 0 GGGGGAAAAA GGGGGGGGGG RG:Z:002
r04 0 chrQ 450 60 10M * 0 0 CGTACGTACG EEFFGGHHII RG:Z:001
r06 4 * 0 0 * * 0 0 ATATATATAT HIHIHIHIHI RG:Z:001
r08 4 * 0 0 * * 0 0 ATATATATAT HIHIHIHIHI RG:Z:002
biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/apps/WipeReadsUnitTest.scala
View file @
149f43d4
...
...
@@ -18,9 +18,113 @@ class WipeReadsUnitTest extends Assertions {
private
def
resourcePath
(
p
:
String
)
:
String
=
Paths
.
get
(
getClass
.
getResource
(
p
).
toURI
).
toString
val
bam01
=
resourcePath
(
"/single.bam"
)
val
bed01
=
resourcePath
(
"/rrna01.bed"
)
val
minArgList
=
List
(
"-I"
,
bam01
.
toString
,
"-l"
,
bed01
.
toString
,
"-o"
,
"mock.bam"
)
val
sbam01
=
new
File
(
resourcePath
(
"/single01.bam"
))
val
sbam02
=
new
File
(
resourcePath
(
"/single02.bam"
))
val
pbam01
=
new
File
(
resourcePath
(
"/paired01.bam"
))
val
bed01
=
new
File
(
resourcePath
(
"/rrna01.bed"
))
val
minArgList
=
List
(
"-I"
,
sbam01
.
toString
,
"-l"
,
bed01
.
toString
,
"-o"
,
"mock.bam"
)
@Test
def
rawIntervalBED
()
=
{
val
intervals
:
Vector
[
RawInterval
]
=
makeRawIntervalFromFile
(
bed01
).
toVector
assert
(
intervals
.
length
==
3
)
assert
(
intervals
.
head
.
chrom
==
"chrQ"
)
assert
(
intervals
.
head
.
start
==
291
)
assert
(
intervals
.
head
.
end
==
320
)
assert
(
intervals
.
head
.
strand
==
"+"
)
assert
(
intervals
.
last
.
chrom
==
"chrQ"
)
assert
(
intervals
.
last
.
start
==
991
)
assert
(
intervals
.
last
.
end
==
1000
)
assert
(
intervals
.
last
.
strand
==
"+"
)
}
@Test
def
testSingleBAMDefault
()
=
{
val
intervals
:
Iterator
[
RawInterval
]
=
Iterator
(
RawInterval
(
"chrQ"
,
291
,
320
,
"+"
),
// overlaps r01, second hit,
RawInterval
(
"chrQ"
,
451
,
480
,
"+"
),
// overlaps r04
RawInterval
(
"chrQ"
,
991
,
1000
,
"+"
)
// overlaps nothing; lies in the spliced region of r05
)
// NOTE: while it's possible to have our filter produce false positives
// it is highly unlikely in our test cases as we are setting a very low FP rate
// and only filling the filter with a few items
val
bf
=
makeBloomFilter
(
intervals
,
sbam01
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
)
// by default, set elements are SAM record read names
assert
(!
bf
.
contains
(
"r02"
).
isTrue
)
assert
(!
bf
.
contains
(
"r03"
).
isTrue
)
/* TODO: exclude r05 from set since the mapped read does not overlap with our interval
this is a limitation of htsjdk, as it checks for overlaps with records instead of alignment blocks
assert(!bf.contains("r05").isTrue)
*/
assert
(!
bf
.
contains
(
"r06"
).
isTrue
)
assert
(
bf
.
contains
(
"r01"
).
isTrue
)
assert
(
bf
.
contains
(
"r04"
).
isTrue
)
}
@Test
def
testSingleBAMFilterOutMultiNotSet
()
=
{
val
intervals
:
Iterator
[
RawInterval
]
=
Iterator
(
RawInterval
(
"chrQ"
,
291
,
320
,
"+"
),
// overlaps r01, second hit,
RawInterval
(
"chrQ"
,
451
,
480
,
"+"
),
// overlaps r04
RawInterval
(
"chrQ"
,
991
,
1000
,
"+"
)
// overlaps nothing; lies in the spliced region of r05
)
val
bf
=
makeBloomFilter
(
intervals
,
sbam01
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
filterOutMulti
=
false
)
assert
(!
bf
.
contains
(
"r02\t0\tchrQ\t50\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001\n"
).
isTrue
)
assert
(!
bf
.
contains
(
"r01\t16\tchrQ\t190\t60\t10M\t*\t0\t0\tTACGTACGTA\tEEFFGGHHII\tRG:Z:001\n"
).
isTrue
)
assert
(!
bf
.
contains
(
"r03\t16\tchrQ\t690\t60\t10M\t*\t0\t0\tCCCCCTTTTT\tHHHHHHHHHH\tRG:Z:001\n"
).
isTrue
)
assert
(!
bf
.
contains
(
"r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001\n"
).
isTrue
)
assert
(!
bf
.
contains
(
"r06\t4\t*\t0\t0\t*\t*\t0\t0\tATATATATAT\tHIHIHIHIHI\tRG:Z:001\n"
).
isTrue
)
assert
(
bf
.
contains
(
"r01\t16\tchrQ\t290\t60\t10M\t*\t0\t0\tGGGGGAAAAA\tGGGGGGGGGG\tRG:Z:001\n"
).
isTrue
)
assert
(
bf
.
contains
(
"r04\t0\tchrQ\t450\t60\t10M\t*\t0\t0\tCGTACGTACG\tEEFFGGHHII\tRG:Z:001\n"
).
isTrue
)
/* TODO: exclude r05 from set since the mapped read does not overlap with our interval
this is a limitation of htsjdk, as it checks for overlaps with records instead of alignment blocks
assert(!bf.contains("r05\t0\tchrQ\t890\t60\t5M200N5M\t*\t0\t0\tGATACGATAC\tFEFEFEFEFE\tRG:Z:001\n").isTrue)
*/
}
@Test
def
testSingleBAMFilterMinMapQ
()
=
{
val
intervals
:
Iterator
[
RawInterval
]
=
Iterator
(
RawInterval
(
"chrQ"
,
291
,
320
,
"+"
),
RawInterval
(
"chrQ"
,
451
,
480
,
"+"
)
)
val
bf
=
makeBloomFilter
(
intervals
,
sbam02
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
minMapQ
=
60
)
assert
(!
bf
.
contains
(
"r01"
).
isTrue
)
assert
(!
bf
.
contains
(
"r02"
).
isTrue
)
assert
(!
bf
.
contains
(
"r06"
).
isTrue
)
assert
(!
bf
.
contains
(
"r08"
).
isTrue
)
// only r04 is in the set since r01 is below the MAPQ threshold
assert
(
bf
.
contains
(
"r04"
).
isTrue
)
}
@Test
def
testSingleBAMFilterReadGroupIDs
()
=
{
val
intervals
:
Iterator
[
RawInterval
]
=
Iterator
(
RawInterval
(
"chrQ"
,
291
,
320
,
"+"
),
RawInterval
(
"chrQ"
,
451
,
480
,
"+"
)
)
val
bf
=
makeBloomFilter
(
intervals
,
sbam02
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
,
readGroupIDs
=
Set
(
"002"
,
"003"
))
assert
(!
bf
.
contains
(
"r02"
).
isTrue
)
assert
(!
bf
.
contains
(
"r04"
).
isTrue
)
assert
(!
bf
.
contains
(
"r06"
).
isTrue
)
assert
(!
bf
.
contains
(
"r08"
).
isTrue
)
// only r01 is in the set since it is RG 002
assert
(
bf
.
contains
(
"r01"
).
isTrue
)
}
@Test
def
testPairBAMDefault
()
=
{
val
intervals
:
Iterator
[
RawInterval
]
=
Iterator
(
RawInterval
(
"chrQ"
,
291
,
320
,
"+"
),
// overlaps r01, second hit,
RawInterval
(
"chrQ"
,
451
,
480
,
"+"
),
// overlaps r04
RawInterval
(
"chrQ"
,
991
,
1000
,
"+"
)
// overlaps nothing; lies in the spliced region of r05
)
val
bf
=
makeBloomFilter
(
intervals
,
pbam01
,
bloomSize
=
1000
,
bloomFp
=
1
e
-
10
)
// by default, set elements are SAM record read names
assert
(!
bf
.
contains
(
"r02"
).
isTrue
)
assert
(!
bf
.
contains
(
"r03"
).
isTrue
)
/* TODO: exclude r05 from set since the mapped read does not overlap with our interval
this is a limitation of htsjdk, as it checks for overlaps with records instead of alignment blocks
assert(!bf.contains("r05").isTrue)
*/
assert
(!
bf
.
contains
(
"r06"
).
isTrue
)
assert
(
bf
.
contains
(
"r01"
).
isTrue
)
assert
(
bf
.
contains
(
"r04"
).
isTrue
)
}
@Test
def
testOptMinimum
()
=
{
val
opts
=
parseOption
(
Map
(),
minArgList
)
...
...
@@ -34,7 +138,7 @@ class WipeReadsUnitTest extends Assertions {
assert
(
pathBAM
.
exists
)
val
argList
=
List
(
"--inputBAM"
,
pathBAM
.
toPath
.
toString
,
"--targetRegions"
,
bed01
.
toString
,
"--targetRegions"
,
bed01
.
getPath
,
"--outputBAM"
,
"mock.bam"
)
val
thrown
=
intercept
[
IOException
]
{
parseOption
(
Map
(),
argList
)
...
...
@@ -46,7 +150,7 @@ class WipeReadsUnitTest extends Assertions {
@Test
def
testOptMissingRegions
()
=
{
val
pathRegion
=
"/i/dont/exist.bed"
val
argList
=
List
(
"--inputBAM"
,
bam01
,
"--inputBAM"
,
s
bam01
.
getPath
,
"--targetRegions"
,
pathRegion
,
"--outputBAM"
,
"mock.bam"
)
...
...
@@ -118,7 +222,3 @@ class WipeReadsUnitTest extends Assertions {
assert
(
opts
(
"readGroup"
)
==
Vector
(
"g1"
,
"g2"
))
}
}
/*
TODO: missing whole argument
*/
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment