Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
1e0a70bb
Commit
1e0a70bb
authored
Oct 02, 2014
by
bow
Browse files
Add function for set filtering in WipeReads
parent
2fe0577e
Changes
1
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/WipeReads.scala
View file @
1e0a70bb
...
...
@@ -7,6 +7,7 @@ package nl.lumc.sasc.biopet.core.apps
import
java.io.
{
File
,
IOException
}
import
scala.io.Source
import
com.twitter.algebird.
{
BF
,
BloomFilter
}
import
htsjdk.samtools.SAMFileReader
import
htsjdk.samtools.SAMFileReader.QueryInterval
import
htsjdk.samtools.SAMRecord
...
...
@@ -75,42 +76,57 @@ object WipeReads {
.
map
(
x
=>
inBAM
.
makeQueryInterval
(
x
.
chrom
,
x
.
start
,
x
.
end
))
}
// TODO: implement optional index creation
private
def
prepIndexedInputBAM
(
inFile
:
File
,
inFileIndex
:
File
=
null
)
:
SAMFileReader
=
if
(
inFileIndex
!=
null
)
new
SAMFileReader
(
inFile
,
inFileIndex
)
// TODO: set minimum fraction for overlap
def
makeBloomFilter
(
iv
:
Iterator
[
QueryInterval
],
inBAM
:
File
,
inBAMIndex
:
File
=
null
,
filterOutMulti
:
Boolean
=
true
,
minMapQ
:
Int
=
0
,
readGroupIDs
:
Set
[
String
]
=
Set
())
:
BF
=
{
// TODO: implement optional index creation
def
prepIndexedInputBAM
()
:
SAMFileReader
=
if
(
inBAMIndex
!=
null
)
new
SAMFileReader
(
inBAM
,
inBAMIndex
)
else
{
val
sfr
=
new
SAMFileReader
(
in
File
)
val
sfr
=
new
SAMFileReader
(
in
BAM
)
if
(!
sfr
.
hasIndex
)
throw
new
IllegalStateException
(
"Input BAM file must be indexed"
)
else
sfr
}
def
queryTargetRecords
(
iv
:
Iterator
[
QueryInterval
],
reader
:
SAMFileReader
,
minMapQ
:
Int
=
0
)
:
Set
[
SAMRecord
]
=
???
// TODO: set minimum fraction for overlap
// TODO: RG filtering
// query BAM files for SAM records overlapping target region
// optional: filter for MapQ value
// conditional: get mates (if records are paired)
def
queryMateRecords
(
records
:
Vector
[
SAMRecord
])
:
Set
[
SAMRecord
]
=
???
// query mates
private
def
writeWipedBAM
(
inBAM
:
SAMFileReader
,
targetNames
:
Set
[
SAMRecord
])
:
Unit
=
???
private
def
checkInputFile
(
inFile
:
File
)
:
File
=
if
(
inFile
.
exists
)
inFile
else
throw
new
IOException
(
"Input file "
+
inFile
.
getPath
+
" not found"
)
// TODO: can we accumulate errors / exceptions instead of ignoring them?
def
monadicMateQuery
(
inBAM
:
SAMFileReader
,
rec
:
SAMRecord
)
:
Option
[
SAMRecord
]
=
try
{
Some
(
inBAM
.
queryMate
(
rec
))
}
catch
{
case
e
:
Exception
=>
None
}
private
def
checkInputBAM
(
inBAM
:
File
)
:
File
=
{
// input BAM must have a .bam.bai index
if
(
new
File
(
inBAM
.
getPath
+
".bai"
).
exists
)
checkInputFile
(
inBAM
)
else
throw
new
IOException
(
"Index for input BAM file "
+
inBAM
.
getPath
+
" not found"
)
val
bloomSize
:
Int
=
100000000
val
bloomFp
:
Double
=
1
e
-
10
val
firstBAM
=
prepIndexedInputBAM
()
val
secondBAM
=
prepIndexedInputBAM
()
val
bfm
=
BloomFilter
(
bloomSize
,
bloomFp
,
13
)
// TODO: how to chain initTargets to filteredTargets directly?
val
initTargets
:
Iterator
[
Vector
[
SAMRecord
]]
=
for
{
x
<-
firstBAM
.
queryOverlapping
(
iv
.
toArray
);
// TODO: can we do without Vector here? It's only so we can flatten it later
}
yield
Vector
(
x
,
monadicMateQuery
(
secondBAM
,
x
)).
flatten
val
filteredTargets
:
Iterator
[
SAMRecord
]
=
initTargets
.
flatten
// filter on minimum MAPQ value
.
filter
(
x
=>
x
.
getMappingQuality
>=
minMapQ
)
// filter on specific read group IDs
.
filter
(
x
=>
if
(
readGroupIDs
.
size
==
0
)
true
else
readGroupIDs
.
contains
(
x
.
getReadGroup
.
getReadGroupId
))
// if filtering out multi-mapped reads, our elements is the read name
// otherwise it's the entire SAM string
filteredTargets
.
map
(
x
=>
if
(
filterOutMulti
)
x
.
getReadName
else
x
.
getSAMString
)
.
foldLeft
(
bfm
.
create
())(
_
.+(
_
))
}
def
parseOption
(
opts
:
OptionMap
,
list
:
List
[
String
])
:
OptionMap
=
...
...
@@ -151,6 +167,20 @@ object WipeReads {
throw
new
IllegalArgumentException
(
"Target regions file not supplied"
)
}
def
checkInputFile
(
inFile
:
File
)
:
File
=
if
(
inFile
.
exists
)
inFile
else
throw
new
IOException
(
"Input file "
+
inFile
.
getPath
+
" not found"
)
def
checkInputBAM
(
inBAM
:
File
)
:
File
=
{
// input BAM must have a .bam.bai index
if
(
new
File
(
inBAM
.
getPath
+
".bai"
).
exists
)
checkInputFile
(
inBAM
)
else
throw
new
IOException
(
"Index for input BAM file "
+
inBAM
.
getPath
+
" not found"
)
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
if
(
args
.
length
==
0
)
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment