Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
46cfaf0e
Commit
46cfaf0e
authored
Oct 06, 2014
by
bow
Browse files
Create interval tree map lazily in WipeReads
parent
d5e6eb8c
Changes
2
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/WipeReads.scala
View file @
46cfaf0e
...
...
@@ -170,6 +170,18 @@ object WipeReads {
}
}
/** function to make IntervalTree from our RawInterval objects
*
* @param ri iterable over RawInterval objects
* @return IntervalTree objects, filled with intervals from RawInterval
*/
def
makeIntervalTree
(
ri
:
Iterable
[
RawInterval
])
:
IntervalTree
=
{
val
ivt
=
new
IntervalTree
for
(
iv
:
RawInterval
<-
ri
)
ivt
.
insert
(
new
Interval
(
iv
.
start
,
iv
.
end
))
ivt
}
/**
* Function to ensure that a SAMRecord overlaps our target regions
*
...
...
@@ -187,7 +199,7 @@ object WipeReads {
if
(
rec
.
getSAMString
.
split
(
"\t"
)(
5
).
contains
(
"N"
))
{
for
(
ab
:
AlignmentBlock
<-
rec.getAlignmentBlocks.asScala
)
{
if
(!
ivtm
(
rec
.
getReferenceName
).
findOverlapping
(
new
Interval
(
ab
.
getReferenceStart
,
ab
.
getReferenceStart
+
ab
.
getLength
-
1
,
null
)).
isEmpty
)
new
Interval
(
ab
.
getReferenceStart
,
ab
.
getReferenceStart
+
ab
.
getLength
-
1
)).
isEmpty
)
return
true
}
false
...
...
@@ -212,25 +224,28 @@ object WipeReads {
val
secondBAM
=
prepIndexedInputBAM
()
val
bfm
=
BloomFilter
(
bloomSize
,
bloomFp
,
13
)
val
intervals
=
iv
.
toList
val
queryIntervals
=
intervals
.
flatMap
(
x
=>
monadicMakeQueryInterval
(
firstBAM
,
x
)).
toArray
/** interval tree for ensuring that split reads do overlap our target regions */
val
ivtm
=
scala
.
collection
.
mutable
.
Map
.
empty
[
String
,
IntervalTree
]
for
(
iv
:
RawInterval
<-
intervals
)
{
ivtm
.
getOrElseUpdate
(
iv
.
chrom
,
new
IntervalTree
).
insert
(
new
Interval
(
iv
.
start
,
iv
.
end
))
}
/* NOTE: the interval vector here should be bypass-able if we can make
the BAM query intervals with Interval objects. This is not possible
at the moment since we can not retrieve star and end coordinates
of an Interval, so we resort to our own RawInterval vector
*/
lazy
val
intervals
=
iv
.
toVector
lazy
val
intervalTreeMap
:
Map
[
String
,
IntervalTree
]
=
intervals
.
groupBy
(
x
=>
x
.
chrom
)
.
map
({
case
(
key
,
value
)
=>
(
key
,
makeIntervalTree
(
value
))
})
lazy
val
queryIntervals
=
intervals
.
flatMap
(
x
=>
monadicMakeQueryInterval
(
firstBAM
,
x
)).
toArray
val
filteredOutSet
:
BF
=
firstBAM
.
queryOverlapping
(
queryIntervals
).
asScala
// ensure spliced reads have at least one block overlapping target region
.
filter
(
x
=>
alignmentBlockOverlaps
(
x
,
i
vtm
.
to
Map
))
.
filter
(
x
=>
alignmentBlockOverlaps
(
x
,
i
ntervalTree
Map
))
// filter for MAPQ on target region reads
.
filter
(
x
=>
x
.
getMappingQuality
>=
minMapQ
)
// filter on specific read group IDs
.
filter
(
x
=>
rgFilter
(
x
))
// TODO: how to directly get SAMRecord and its pairs without multiple flattens?
.
flatMap
(
x
=>
Vector
(
Some
(
x
),
monadicMateQuery
(
secondBAM
,
x
)).
flatten
)
// transf
r
om SAMRecord to string
// transfo
r
m SAMRecord to string
.
map
(
x
=>
SAMToElem
(
x
))
// build bloom filter using fold to prevent loading all strings to memory
.
foldLeft
(
bfm
.
create
())(
_
.+(
_
))
...
...
biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/apps/WipeReadsUnitTest.scala
View file @
46cfaf0e
...
...
@@ -35,7 +35,7 @@ class WipeReadsUnitTest extends Assertions {
val
bed01
=
new
File
(
resourcePath
(
"/rrna01.bed"
))
val
minArgList
=
List
(
"-I"
,
sbam01
.
toString
,
"-l"
,
bed01
.
toString
,
"-o"
,
"mock.bam"
)
@Test
def
r
awIntervalBED
()
=
{
@Test
def
testMakeR
awInterval
From
BED
()
=
{
val
intervals
:
Vector
[
RawInterval
]
=
makeRawIntervalFromFile
(
bed01
).
toVector
assert
(
intervals
.
length
==
3
)
assert
(
intervals
.
head
.
chrom
==
"chrQ"
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment