Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
cbf4d93c
Commit
cbf4d93c
authored
Oct 02, 2014
by
bow
Browse files
Add initial functions to parse interval from file
parent
ec703d07
Changes
1
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/WipeReads.scala
View file @
cbf4d93c
...
...
@@ -5,7 +5,12 @@
package
nl.lumc.sasc.biopet.core.apps
import
java.io.
{
File
,
IOException
}
import
scala.io.Source
import
htsjdk.samtools.SAMFileReader
import
htsjdk.samtools.SAMFileReader.QueryInterval
import
htsjdk.samtools.SAMRecord
import
org.apache.commons.io.FilenameUtils.getExtension
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
...
...
@@ -28,13 +33,71 @@ class WipeReads(val root: Configurable) extends BiopetJavaCommandLineFunction {
object
WipeReads
{
type
OptionMap
=
Map
[
String
,
Any
]
case
class
RawInterval
(
chrom
:
String
,
start
:
Int
,
end
:
Int
,
strand
:
String
)
object
Strand
extends
Enumeration
{
type
Strand
=
Value
val
Plus
,
Minus
,
Ignore
=
Value
}
def
checkInputFile
(
inFile
:
File
)
:
File
=
private
def
makeRawIntervalFromBED
(
inFile
:
File
)
:
Iterator
[
RawInterval
]
=
// BED file coordinates are 0-based, half open so we need to do some conversion
Source
.
fromFile
(
inFile
)
.
getLines
()
.
filterNot
(
_
.
trim
.
isEmpty
)
.
dropWhile
(
_
.
matches
(
"^track | ^browser "
))
.
map
(
line
=>
line
.
trim
.
split
(
"\t"
)
match
{
case
Array
(
chrom
,
start
,
end
)
=>
new
RawInterval
(
chrom
,
start
.
toInt
+
1
,
end
.
toInt
,
""
)
case
Array
(
chrom
,
start
,
end
,
_
,
_
,
strand
,
_
*)
=>
new
RawInterval
(
chrom
,
start
.
toInt
+
1
,
end
.
toInt
,
strand
)
})
private
def
makeRawIntervalFromRefFlat
(
inFile
:
File
)
:
Iterator
[
RawInterval
]
=
???
// convert coordinate to 1-based fully closed
// parse chrom, start blocks, end blocks, strands
private
def
makeRawIntervalFromGTF
(
inFile
:
File
)
:
Iterator
[
RawInterval
]
=
???
// convert coordinate to 1-based fully closed
// parse chrom, start blocks, end blocks, strands
// TODO: check that interval chrom is in the BAM file (optionally, when prepended with 'chr' too)
def
makeQueryIntervalFromFile
(
inFile
:
File
,
inBAM
:
SAMFileReader
)
:
Iterator
[
QueryInterval
]
=
{
// detect interval file format from extension
val
iterFunc
:
(
File
=>
Iterator
[
RawInterval
])
=
if
(
getExtension
(
inFile
.
toString
.
toLowerCase
)
==
"bed"
)
makeRawIntervalFromBED
else
throw
new
IllegalArgumentException
(
"Unexpected interval file type: "
+
inFile
.
getPath
)
iterFunc
(
inFile
)
.
filter
(
x
=>
inBAM
.
getFileHeader
.
getSequenceIndex
(
x
.
chrom
)
>
-
1
)
.
map
(
x
=>
inBAM
.
makeQueryInterval
(
x
.
chrom
,
x
.
start
,
x
.
end
))
}
// TODO: implement optional index creation
private
def
prepIndexedInputBAM
(
inFile
:
File
,
inFileIndex
:
File
=
null
)
:
SAMFileReader
=
if
(
inFileIndex
!=
null
)
new
SAMFileReader
(
inFile
,
inFileIndex
)
else
{
val
sfr
=
new
SAMFileReader
(
inFile
)
if
(!
sfr
.
hasIndex
)
throw
new
IllegalStateException
(
"Input BAM file must be indexed"
)
else
sfr
}
def
queryTargetRecords
(
iv
:
Iterator
[
QueryInterval
],
reader
:
SAMFileReader
,
minMapQ
:
Int
=
0
)
:
Set
[
SAMRecord
]
=
???
// TODO: set minimum fraction for overlap
// TODO: RG filtering
// query BAM files for SAM records overlapping target region
// optional: filter for MapQ value
// conditional: get mates (if records are paired)
def
queryMateRecords
(
records
:
Vector
[
SAMRecord
])
:
Set
[
SAMRecord
]
=
???
// query mates
private
def
writeWipedBAM
(
inBAM
:
SAMFileReader
,
targetNames
:
Set
[
SAMRecord
])
:
Unit
=
???
private
def
checkInputFile
(
inFile
:
File
)
:
File
=
if
(
inFile
.
exists
)
inFile
else
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment