Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
3a8de40e
Commit
3a8de40e
authored
Feb 19, 2015
by
Sander Bollen
Browse files
refactors
parent
cb6f9998
Changes
1
Hide whitespace changes
Inline
Side-by-side
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VEPNormalizer.scala
View file @
3a8de40e
package
nl.lumc.sasc.biopet.tools
import
java.io.
{
File
,
IOException
}
import
htsjdk.tribble.TribbleException
import
scala.collection.JavaConversions._
import
nl.lumc.sasc.biopet.core.
{
BiopetJavaCommandLineFunction
,
ToolCommand
}
import
collection.mutable.
{
Map
=>
MMap
}
import
htsjdk.
variant.vcf._
import
scala.collection.mutable.
{
Map
=>
MMap
}
import
htsjdk.
tribble.TribbleException
import
htsjdk.variant.variantcontext.
{
VariantContextBuilder
,
VariantContext
}
import
htsjdk.variant.variantcontext.writer.
{
AsyncVariantContextWriter
,
VariantContextWriter
,
VariantContextWriterBuilder
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
htsjdk.variant.vcf._
import
org.broadinstitute.gatk.utils.commandline.
{
Output
,
Input
}
import
nl.lumc.sasc.biopet.core.
{
BiopetJavaCommandLineFunction
,
ToolCommand
}
import
nl.lumc.sasc.biopet.core.config.Configurable
/**
* This tool parses a VEP annotated VCF into a standard VCF file.
* The VEP puts all its annotations for each variant in an CSQ string, where annotations per transcript are comma-separated
...
...
@@ -26,10 +27,10 @@ class VEPNormalizer(val root: Configurable) extends BiopetJavaCommandLineFunctio
javaMainClass
=
getClass
.
getName
@Input
(
doc
=
"Input VCF, may be indexed"
,
shortName
=
"InputFile"
,
required
=
true
)
var
inputVCF
:
File
=
_
var
inputVCF
:
File
=
null
@Output
(
doc
=
"Output VCF"
,
shortName
=
"OutputFile"
,
required
=
true
)
var
outputVCF
:
File
=
_
var
outputVCF
:
File
=
null
var
mode
:
String
=
config
(
"mode"
,
default
=
"explode"
)
...
...
@@ -67,7 +68,6 @@ object VEPNormalizer extends ToolCommand {
logger
.
debug
(
"Checkion VCF version"
)
versionCheck
(
header
)
logger
.
debug
(
"VCF version OK"
)
val
seqDict
=
header
.
getSequenceDictionary
logger
.
debug
(
"Parsing header"
)
val
new_infos
=
parseCsq
(
header
)
header
.
setWriteCommandLine
(
true
)
...
...
@@ -84,7 +84,7 @@ object VEPNormalizer extends ToolCommand {
writer
.
writeHeader
(
header
)
logger
.
debug
(
"Wrote header to file"
)
normalize
(
reader
,
writer
,
new_infos
,
commandArgs
.
mode
,
commandArgs
.
remove
_
CSQ
)
normalize
(
reader
,
writer
,
new_infos
,
commandArgs
.
mode
,
commandArgs
.
removeCSQ
)
writer
.
close
()
logger
.
debug
(
"Closed writer"
)
reader
.
close
()
...
...
@@ -96,20 +96,20 @@ object VEPNormalizer extends ToolCommand {
* Normalizer
* @param reader input VCF VCFFileReader
* @param writer output VCF AsyncVariantContextWriter
* @param new
_i
nfos array of string containing names of new info fields
* @param new
I
nfos array of string containing names of new info fields
* @param mode normalizer mode (explode or standard)
* @param remove
_c
sq remove csq tag (Boolean)
* @param remove
C
sq remove csq tag (Boolean)
* @return
*/
def
normalize
(
reader
:
VCFFileReader
,
writer
:
AsyncVariantContextWriter
,
new
_i
nfos
:
Array
[
String
],
mode
:
String
,
remove
_c
sq
:
Boolean
)
=
{
new
I
nfos
:
Array
[
String
],
mode
:
String
,
remove
C
sq
:
Boolean
)
=
{
logger
.
info
(
s
"""You have selected mode $mode"""
)
logger
.
info
(
"Start processing records"
)
for
(
record
<-
reader
)
{
mode
match
{
case
"explode"
=>
explodeTranscripts
(
record
,
new
_i
nfos
,
remove
_c
sq
).
foreach
(
vc
=>
writer
.
add
(
vc
))
case
"standard"
=>
writer
.
add
(
standardTranscripts
(
record
,
new
_i
nfos
,
remove
_c
sq
))
case
"explode"
=>
explodeTranscripts
(
record
,
new
I
nfos
,
remove
C
sq
).
foreach
(
vc
=>
writer
.
add
(
vc
))
case
"standard"
=>
writer
.
add
(
standardTranscripts
(
record
,
new
I
nfos
,
remove
C
sq
))
case
_
=>
throw
new
IllegalArgumentException
(
"Something odd happened!"
)
}
}
...
...
@@ -142,7 +142,6 @@ object VEPNormalizer extends ToolCommand {
}
val
version
=
VCFHeaderVersion
.
toHeaderVersion
(
format
)
if
(!
version
.
isAtLeastAsRecentAs
(
VCFHeaderVersion
.
VCF4_0
))
{
//logger.error(s"""version $version is not supported""")
throw
new
IllegalArgumentException
(
s
"""version $version is not supported"""
)
}
}
...
...
@@ -154,7 +153,7 @@ object VEPNormalizer extends ToolCommand {
*/
def
parseCsq
(
header
:
VCFHeader
)
:
Array
[
String
]
=
{
header
.
getInfoHeaderLine
(
"CSQ"
).
getDescription
.
split
(
':'
)(
1
).
trim
.
split
(
'|'
).
map
(
"VEP_"
+
_
)
split
(
':'
)(
1
).
trim
.
split
(
'|'
).
map
(
"VEP_"
+
_
)
}
/**
...
...
@@ -166,7 +165,7 @@ object VEPNormalizer extends ToolCommand {
*/
def
explodeTranscripts
(
record
:
VariantContext
,
csq_infos
:
Array
[
String
],
remove_CSQ
:
Boolean
)
:
Array
[
VariantContext
]
=
{
val
csq
=
record
.
getAttributeAsString
(
"CSQ"
,
"unknown"
)
val
attributes
=
if
(
remove_CSQ
)
record
.
getAttributes
.
toMap
-
"CSQ"
else
record
.
getAttributes
.
toMap
val
attributes
=
if
(
remove_CSQ
)
record
.
getAttributes
.
toMap
-
"CSQ"
else
record
.
getAttributes
.
toMap
csq
.
stripPrefix
(
"["
).
...
...
@@ -174,18 +173,18 @@ object VEPNormalizer extends ToolCommand {
split
(
","
).
map
(
x
=>
attributes
++
csq_infos
.
zip
(
x
.
split
(
"""\|"""
,
-
1
))).
map
(
x
=>
{
if
(
remove_CSQ
)
new
VariantContextBuilder
(
record
)
.
attributes
(
x
)
.
make
()
else
new
VariantContextBuilder
(
record
).
attributes
(
x
).
make
()
})
if
(
remove_CSQ
)
new
VariantContextBuilder
(
record
)
.
attributes
(
x
)
.
make
()
else
new
VariantContextBuilder
(
record
).
attributes
(
x
).
make
()
})
}
def
standardTranscripts
(
record
:
VariantContext
,
csqInfos
:
Array
[
String
],
remove
_CSQ
:
Boolean
)
:
VariantContext
=
{
def
standardTranscripts
(
record
:
VariantContext
,
csqInfos
:
Array
[
String
],
remove
Csq
:
Boolean
)
:
VariantContext
=
{
val
csq
=
record
.
getAttributeAsString
(
"CSQ"
,
"unknown"
)
val
attributes
=
if
(
remove
_CSQ
)
record
.
getAttributes
.
toMap
-
"CSQ"
else
record
.
getAttributes
.
toMap
val
attributes
=
if
(
remove
Csq
)
record
.
getAttributes
.
toMap
-
"CSQ"
else
record
.
getAttributes
.
toMap
val
new
_a
ttrs
=
attributes
++
csqInfos
.
zip
(
csq
.
val
new
A
ttrs
=
attributes
++
csqInfos
.
zip
(
csq
.
stripPrefix
(
"["
).
stripSuffix
(
"]"
).
split
(
","
).
...
...
@@ -198,13 +197,13 @@ object VEPNormalizer extends ToolCommand {
).
map
(
x
=>
x
.
mkString
(
","
)))
new
VariantContextBuilder
(
record
).
attributes
(
new_attrs
).
make
()
new
VariantContextBuilder
(
record
).
attributes
(
newAttrs
).
make
()
}
case
class
Args
(
inputVCF
:
File
=
null
,
outputVCF
:
File
=
null
,
mode
:
String
=
null
,
remove
_
CSQ
:
Boolean
=
true
)
extends
AbstractArgs
removeCSQ
:
Boolean
=
true
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
...
...
@@ -219,8 +218,9 @@ object VEPNormalizer extends ToolCommand {
opt
[
File
](
'O'
,
"OutputFile"
)
required
()
valueName
"<vcf>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputVCF
=
x
)
}
validate
{
x
=>
if
(!
x
.
getName
.
endsWith
(
".vcf"
)
&&
(!
x
.
getName
.
endsWith
(
".vcf.gz"
))
&&(!
x
.
getName
.
endsWith
(
".bcf"
)))
failure
(
"Unsupported output file type"
)
else
success
x
=>
if
(!
x
.
getName
.
endsWith
(
".vcf"
)
&&
(!
x
.
getName
.
endsWith
(
".vcf.gz"
))
&&
(!
x
.
getName
.
endsWith
(
".bcf"
)))
failure
(
"Unsupported output file type"
)
else
success
}
text
"Output VCF file"
opt
[
String
](
'm'
,
"mode"
)
required
()
valueName
"<mode>"
action
{
(
x
,
c
)
=>
...
...
@@ -230,7 +230,8 @@ object VEPNormalizer extends ToolCommand {
}
text
"Mode"
opt
[
Unit
](
"do-not-remove"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
remove_CSQ
=
false
)}
text
"Do not remove CSQ tag"
c
.
copy
(
removeCSQ
=
false
)
}
text
"Do not remove CSQ tag"
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment