Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
d502cf63
Commit
d502cf63
authored
Oct 03, 2014
by
bow
Browse files
Refactor query interval builder for fewer BAM objects
parent
a748e6fa
Changes
1
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/WipeReads.scala
View file @
d502cf63
...
...
@@ -36,15 +36,15 @@ object WipeReads {
type
OptionMap
=
Map
[
String
,
Any
]
case
class
RawInterval
(
chrom
:
String
,
start
:
Int
,
end
:
Int
,
strand
:
String
)
object
Strand
extends
Enumeration
{
type
Strand
=
Value
val
Identical
,
Opposite
,
Both
=
Value
}
// TODO: check that interval chrom is in the BAM file (optionally, when prepended with 'chr' too)
def
makeQueryIntervalFromFile
(
inFile
:
File
,
inBAM
:
SAMFileReader
)
:
Iterator
[
QueryInterval
]
=
{
case
class
RawInterval
(
chrom
:
String
,
start
:
Int
,
end
:
Int
,
strand
:
String
)
def
makeRawIntervalFromFile
(
inFile
:
File
)
:
Iterator
[
RawInterval
]
=
{
def
makeRawIntervalFromBED
(
inFile
:
File
)
:
Iterator
[
RawInterval
]
=
// BED file coordinates are 0-based, half open so we need to do some conversion
...
...
@@ -73,12 +73,10 @@ object WipeReads {
throw
new
IllegalArgumentException
(
"Unexpected interval file type: "
+
inFile
.
getPath
)
iterFunc
(
inFile
)
.
filter
(
x
=>
inBAM
.
getFileHeader
.
getSequenceIndex
(
x
.
chrom
)
>
-
1
)
.
map
(
x
=>
inBAM
.
makeQueryInterval
(
x
.
chrom
,
x
.
start
,
x
.
end
))
}
// TODO: set minimum fraction for overlap
def
makeBloomFilter
(
iv
:
Iterator
[
Query
Interval
],
def
makeBloomFilter
(
iv
:
Iterator
[
Raw
Interval
],
inBAM
:
File
,
inBAMIndex
:
File
=
null
,
filterOutMulti
:
Boolean
=
true
,
minMapQ
:
Int
=
0
,
readGroupIDs
:
Set
[
String
]
=
Set
(),
...
...
@@ -96,6 +94,20 @@ object WipeReads {
sfr
}
// create objects for querying intervals, allowing for
// chromosome names with or without a "chr" prefix
def
monadicMakeQueryInterval
(
inBAM
:
SAMFileReader
,
ri
:
RawInterval
)
:
Option
[
QueryInterval
]
=
if
(
inBAM
.
getFileHeader
.
getSequenceIndex
(
ri
.
chrom
)
>
-
1
)
Some
(
inBAM
.
makeQueryInterval
(
ri
.
chrom
,
ri
.
start
,
ri
.
end
))
else
if
(
ri
.
chrom
.
startsWith
(
"chr"
)
&&
inBAM
.
getFileHeader
.
getSequenceIndex
(
ri
.
chrom
.
substring
(
3
))
>
-
1
)
Some
(
inBAM
.
makeQueryInterval
(
ri
.
chrom
.
substring
(
3
),
ri
.
start
,
ri
.
end
))
else
if
(!
ri
.
chrom
.
startsWith
(
"chr"
)
&&
inBAM
.
getFileHeader
.
getSequenceIndex
(
"chr"
+
ri
.
chrom
)
>
-
1
)
Some
(
inBAM
.
makeQueryInterval
(
"chr"
+
ri
.
chrom
,
ri
.
start
,
ri
.
end
))
else
None
// TODO: can we accumulate errors / exceptions instead of ignoring them?
def
monadicMateQuery
(
inBAM
:
SAMFileReader
,
rec
:
SAMRecord
)
:
Option
[
SAMRecord
]
=
try
{
...
...
@@ -121,8 +133,9 @@ object WipeReads {
val
firstBAM
=
prepIndexedInputBAM
()
val
secondBAM
=
prepIndexedInputBAM
()
val
bfm
=
BloomFilter
(
bloomSize
,
bloomFp
,
13
)
val
intervals
=
iv
.
flatMap
(
x
=>
monadicMakeQueryInterval
(
firstBAM
,
x
)).
toArray
firstBAM
.
queryOverlapping
(
i
v
.
toArray
).
asScala
firstBAM
.
queryOverlapping
(
i
ntervals
).
asScala
// filter for MAPQ on target region reads
.
filter
(
x
=>
x
.
getMappingQuality
>=
minMapQ
)
// filter on specific read group IDs
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment