Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
a5b9de0f
Commit
a5b9de0f
authored
Feb 19, 2015
by
Sander Bollen
Browse files
Explode and standard in one function, no longer custom exceptions, CSQ tag by default removed.
parent
89fd555e
Changes
2
Hide whitespace changes
Inline
Side-by-side
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VEPNormalizer.scala
View file @
a5b9de0f
...
...
@@ -6,10 +6,9 @@ import htsjdk.tribble.TribbleException
import
scala.collection.JavaConversions._
import
nl.lumc.sasc.biopet.core.
{
BiopetJavaCommandLineFunction
,
ToolCommand
}
import
collection.mutable.
{
Map
=>
MMap
}
import
collection.JavaConverters._
import
htsjdk.variant.vcf._
import
htsjdk.variant.variantcontext.
{
VariantContextBuilder
,
VariantContext
}
import
htsjdk.variant.variantcontext.writer.
{
VariantContextWriter
,
VariantContextWriterBuilder
}
import
htsjdk.variant.variantcontext.writer.
{
AsyncVariantContextWriter
,
VariantContextWriter
,
VariantContextWriterBuilder
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Output
,
Input
}
...
...
@@ -53,14 +52,11 @@ object VEPNormalizer extends ToolCommand {
logger
.
info
(
s
"""Input VCF is $input"""
)
logger
.
info
(
s
"""Output VCF is $output"""
)
var
reader
:
VCFFileReader
=
null
// this can give a codec error if malformed VCF
//
try
{
reader
=
new
VCFFileReader
(
input
,
false
)
val
reader
=
try
{
new
VCFFileReader
(
input
,
false
)
}
catch
{
case
e
:
TribbleException.MalformedFeatureFile
=>
logger
.
error
(
"Malformed VCF file!
VCFv3 not supported!
"
)
logger
.
error
(
"Malformed VCF file!
Are you sure this isn't a VCFv3 file?
"
)
throw
e
}
...
...
@@ -82,80 +78,41 @@ object VEPNormalizer extends ToolCommand {
logger
.
debug
(
"Header parsing done"
)
logger
.
debug
(
"Writing header to file"
)
val
writerBuilder
=
new
VariantContextWriterBuilder
()
writerBuilder
.
val
writer
=
new
AsyncVariantContextWriter
(
new
VariantContextWriterBuilder
().
setOutputFile
(
output
).
setOutputFileType
(
VariantContextWriterBuilder
.
OutputType
.
BLOCK_COMPRESSED_VCF
).
setReferenceDictionary
(
seqDict
)
val
writer
=
writerBuilder
.
build
()
build
())
writer
.
writeHeader
(
header
)
logger
.
debug
(
"Wrote header to file"
)
if
(
commandArgs
.
mode
==
"explode"
)
{
logger
.
info
(
"You have selected explode mode"
)
explode
(
reader
,
writer
,
new_infos
)
}
else
if
(
commandArgs
.
mode
==
"standard"
)
{
logger
.
info
(
"You have selected standard mode"
)
standard
(
reader
,
writer
,
new_infos
)
}
else
{
// this should be impossible, but should nevertheless be checked
logger
.
error
(
"impossibru!"
,
new
IllegalArgumentException
)
}
}
/**
* Wrapper for mode explode
* @param reader input VCF VCFFileReader
* @param writer output VCF VariantContextWriter
* @param new_infos array of string containing names of new info fields
*/
def
explode
(
reader
:
VCFFileReader
,
writer
:
VariantContextWriter
,
new_infos
:
Array
[
String
])
=
{
logger
.
info
(
"Start processing records"
)
var
nprocessed_records
:
Int
=
0
var
nwritten_records
:
Int
=
0
for
((
record
,
i
)
<-
reader
.
iterator
().
zipWithIndex
)
{
nprocessed_records
+=
1
if
(
i
%
1000
==
0
)
{
logger
.
info
(
s
"""Read $i records \t Wrote $nwritten_records records"""
)
}
val
new_records
=
explodeTranscripts
(
record
,
new_infos
)
for
(
vc
<-
new_records
)
{
writer
.
add
(
vc
)
}
nwritten_records
+=
new_records
.
length
}
logger
.
info
(
s
"""Processed $nprocessed_records records"""
)
logger
.
info
(
"Done. Goodbye"
)
normalize
(
reader
,
writer
,
new_infos
,
commandArgs
.
mode
,
commandArgs
.
remove_CSQ
)
writer
.
close
()
logger
.
debug
(
"Closed writer"
)
reader
.
close
()
logger
.
debug
(
"Closed reader"
)
logger
.
info
(
"Done. Goodbye"
)
}
/**
*
Wrapper for mode standard
*
Normalizer
* @param reader input VCF VCFFileReader
* @param writer output VCF VariantContextWriter
* @param writer output VCF
Async
VariantContextWriter
* @param new_infos array of string containing names of new info fields
* @param mode normalizer mode (explode or standard)
* @param remove_csq remove csq tag (Boolean)
* @return
*/
def
standard
(
reader
:
VCFFileReader
,
writer
:
VariantContextWriter
,
new_infos
:
Array
[
String
])
=
{
def
normalize
(
reader
:
VCFFileReader
,
writer
:
AsyncVariantContextWriter
,
new_infos
:
Array
[
String
],
mode
:
String
,
remove_csq
:
Boolean
)
=
{
logger
.
info
(
s
"""You have selected mode $mode"""
)
logger
.
info
(
"Start processing records"
)
var
nprocessed_records
:
Int
=
0
var
nwritten_records
:
Int
=
0
for
((
record
,
i
)
<-
reader
.
iterator
().
zipWithIndex
)
{
nprocessed_records
+=
1
if
(
i
%
1000
==
0
)
{
logger
.
info
(
s
"""Read $i records \t Wrote $nwritten_records records""
"
)
for
(
record
<-
reader
)
{
mode
match
{
case
"explode"
=>
explodeTranscripts
(
record
,
new_infos
,
remove_csq
).
foreach
(
vc
=>
writer
.
add
(
vc
))
case
"standard"
=>
writer
.
add
(
standardTranscripts
(
record
,
new_infos
,
remove_csq
))
case
_
=>
throw
new
IllegalArgumentException
(
"Something odd happened!
"
)
}
writer
.
add
(
standardTranscripts
(
record
,
new_infos
))
nwritten_records
+=
1
}
logger
.
info
(
s
"""Processed $nprocessed_records records"""
)
logger
.
info
(
"Done. Goodbye"
)
writer
.
close
()
logger
.
debug
(
"Closed writer"
)
reader
.
close
()
logger
.
debug
(
"Closed reader"
)
}
/**
...
...
@@ -165,7 +122,7 @@ object VEPNormalizer extends ToolCommand {
def
csqCheck
(
header
:
VCFHeader
)
=
{
if
(!
header
.
hasInfoLine
(
"CSQ"
))
{
//logger.error("No CSQ info tag found! Is this file VEP-annotated?")
throw
new
VEP
Exception
(
"No CSQ info tag found! Is this file VEP-annotated?"
)
throw
new
IllegalArgument
Exception
(
"No CSQ info tag found! Is this file VEP-annotated?"
)
}
}
...
...
@@ -186,7 +143,7 @@ object VEPNormalizer extends ToolCommand {
val
version
=
VCFHeaderVersion
.
toHeaderVersion
(
format
)
if
(!
version
.
isAtLeastAsRecentAs
(
VCFHeaderVersion
.
VCF4_0
))
{
//logger.error(s"""version $version is not supported""")
throw
new
VEP
Exception
(
s
"""version $version is not supported"""
)
throw
new
IllegalArgument
Exception
(
s
"""version $version is not supported"""
)
}
}
...
...
@@ -197,7 +154,7 @@ object VEPNormalizer extends ToolCommand {
*/
def
parseCsq
(
header
:
VCFHeader
)
:
Array
[
String
]
=
{
header
.
getInfoHeaderLine
(
"CSQ"
).
getDescription
.
split
(
':'
)(
1
).
trim
.
split
(
'|'
)
split
(
':'
)(
1
).
trim
.
split
(
'|'
)
.
map
(
"VEP_"
+
_
)
}
/**
...
...
@@ -207,21 +164,26 @@ object VEPNormalizer extends ToolCommand {
* @param csq_infos An array with names of new info tags
* @return An array with the new records
*/
def
explodeTranscripts
(
record
:
VariantContext
,
csq_infos
:
Array
[
String
])
:
Array
[
VariantContext
]
=
{
def
explodeTranscripts
(
record
:
VariantContext
,
csq_infos
:
Array
[
String
]
,
remove_CSQ
:
Boolean
)
:
Array
[
VariantContext
]
=
{
val
csq
=
record
.
getAttributeAsString
(
"CSQ"
,
"unknown"
)
val
attributes
=
record
.
getAttributes
.
toMap
val
attributes
=
if
(
remove_CSQ
)
record
.
getAttributes
.
toMap
-
"CSQ"
else
record
.
getAttributes
.
toMap
csq
.
stripPrefix
(
"["
).
stripSuffix
(
"]"
).
split
(
","
).
map
(
x
=>
attributes
++
csq_infos
.
zip
(
x
.
split
(
"""\|"""
,
-
1
))).
map
(
x
=>
new
VariantContextBuilder
(
record
).
attributes
(
x
).
make
())
map
(
x
=>
{
if
(
remove_CSQ
)
new
VariantContextBuilder
(
record
)
.
attributes
(
x
)
.
make
()
else
new
VariantContextBuilder
(
record
).
attributes
(
x
).
make
()
})
}
def
standardTranscripts
(
record
:
VariantContext
,
csqInfos
:
Array
[
String
])
:
VariantContext
=
{
def
standardTranscripts
(
record
:
VariantContext
,
csqInfos
:
Array
[
String
]
,
remove_CSQ
:
Boolean
)
:
VariantContext
=
{
val
csq
=
record
.
getAttributeAsString
(
"CSQ"
,
"unknown"
)
val
attributes
=
record
.
getAttributes
.
toMap
val
attributes
=
if
(
remove_CSQ
)
record
.
getAttributes
.
toMap
-
"CSQ"
else
record
.
getAttributes
.
toMap
val
new_attrs
=
attributes
++
csqInfos
.
zip
(
csq
.
stripPrefix
(
"["
).
...
...
@@ -236,23 +198,13 @@ object VEPNormalizer extends ToolCommand {
).
map
(
x
=>
x
.
mkString
(
","
)))
new
VariantContextBuilder
(
record
).
attributes
(
new_attrs
).
make
()
}
/**
* This one-line class defines a new VEP-specific exception
* @param msg The error message
*/
class
VEPException
(
msg
:
String
)
extends
RuntimeException
(
msg
)
/**
* This one-line class defines a version exception
* @param msg The error message
*/
class
VCFVersionException
(
msg
:
String
)
extends
RuntimeException
(
msg
)
case
class
Args
(
inputVCF
:
File
=
null
,
outputVCF
:
File
=
null
,
mode
:
String
=
null
)
extends
AbstractArgs
mode
:
String
=
null
,
remove_CSQ
:
Boolean
=
true
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
...
...
@@ -267,7 +219,8 @@ object VEPNormalizer extends ToolCommand {
opt
[
File
](
'O'
,
"OutputFile"
)
required
()
valueName
"<vcf>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputVCF
=
x
)
}
validate
{
x
=>
if
(
x
.
exists
)
success
else
success
x
=>
if
(!
x
.
getName
.
endsWith
(
".vcf"
)
&&
(!
x
.
getName
.
endsWith
(
".vcf.gz"
))
&&(!
x
.
getName
.
endsWith
(
".bcf"
)))
failure
(
"Unsupported output file type"
)
else
success
}
text
"Output VCF file"
opt
[
String
](
'm'
,
"mode"
)
required
()
valueName
"<mode>"
action
{
(
x
,
c
)
=>
...
...
@@ -275,6 +228,9 @@ object VEPNormalizer extends ToolCommand {
}
validate
{
x
=>
if
(
x
==
"explode"
)
success
else
if
(
x
==
"standard"
)
success
else
failure
(
"Unsupported mode"
)
}
text
"Mode"
opt
[
Unit
](
"do-not-remove"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
remove_CSQ
=
false
)}
text
"Do not remove CSQ tag"
}
}
public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VEPNormalizerTest.scala
View file @
a5b9de0f
...
...
@@ -36,21 +36,21 @@ class VEPNormalizerTest extends TestNGSuite with MockitoSugar with Matchers {
val
reader
=
new
VCFFileReader
(
vepped
,
false
)
val
header
=
reader
.
getFileHeader
val
new_infos
=
parseCsq
(
header
)
explodeTranscripts
(
reader
.
iterator
().
next
(),
new_infos
).
length
should
be
(
11
)
explodeTranscripts
(
reader
.
iterator
().
next
(),
new_infos
,
true
).
length
should
be
(
11
)
}
@Test
def
testStandardVEPLength
()
=
{
val
reader
=
new
VCFFileReader
(
vepped
,
false
)
val
header
=
reader
.
getFileHeader
val
new_infos
=
parseCsq
(
header
)
Array
(
standardTranscripts
(
reader
.
iterator
().
next
(),
new_infos
)).
length
should
be
(
1
)
Array
(
standardTranscripts
(
reader
.
iterator
().
next
(),
new_infos
,
true
)).
length
should
be
(
1
)
}
@Test
def
testStandardVEPAttributeLength
()
=
{
val
reader
=
new
VCFFileReader
(
vepped
,
false
)
val
header
=
reader
.
getFileHeader
val
new_infos
=
parseCsq
(
header
)
val
record
=
standardTranscripts
(
reader
.
iterator
().
next
(),
new_infos
)
val
record
=
standardTranscripts
(
reader
.
iterator
().
next
(),
new_infos
,
true
)
def
checkItems
(
items
:
Array
[
String
])
=
{
items
.
foreach
{
check
}
}
...
...
@@ -73,7 +73,7 @@ class VEPNormalizerTest extends TestNGSuite with MockitoSugar with Matchers {
val
reader
=
new
VCFFileReader
(
vcf3
,
false
)
}
@Test
(
expectedExceptions
=
Array
(
classOf
[
VEP
Exception
]))
@Test
(
expectedExceptions
=
Array
(
classOf
[
IllegalArgument
Exception
]))
def
testNoCSQTagException
{
csqCheck
(
new
VCFFileReader
(
unvepped
,
false
).
getFileHeader
)
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment