Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
7dafdb1b
Commit
7dafdb1b
authored
Mar 11, 2015
by
Peter van 't Hof
Browse files
Fix output of normalizer
parent
3ec60672
Changes
2
Hide whitespace changes
Inline
Side-by-side
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VEPNormalizer.scala
View file @
7dafdb1b
...
...
@@ -180,44 +180,55 @@ object VEPNormalizer extends ToolCommand {
* Explode a single VEP-annotated record to multiple normal records
* Based on the number of annotated transcripts in the CSQ tag
* @param record the record as a VariantContext object
* @param csq
_i
nfos An array with names of new info tags
* @param csq
I
nfos An array with names of new info tags
* @return An array with the new records
*/
def
explodeTranscripts
(
record
:
VariantContext
,
csq
_i
nfos
:
Array
[
String
],
remove
_CSQ
:
Boolean
)
:
Array
[
VariantContext
]
=
{
def
explodeTranscripts
(
record
:
VariantContext
,
csq
I
nfos
:
Array
[
String
],
remove
Csq
:
Boolean
)
:
Array
[
VariantContext
]
=
{
val
csq
=
record
.
getAttributeAsString
(
"CSQ"
,
"unknown"
)
val
attributes
=
if
(
remove_CSQ
)
record
.
getAttributes
.
toMap
-
"CSQ"
else
record
.
getAttributes
.
toMap
csq
.
val
builder
=
{
if
(
removeCsq
)
new
VariantContextBuilder
(
record
).
rmAttribute
(
"CSQ"
)
else
new
VariantContextBuilder
(
record
)
}
// atributes for each transcript (transcript)(csq field index)
val
arti
=
csq
.
stripPrefix
(
"["
).
stripSuffix
(
"]"
).
split
(
","
).
map
(
x
=>
attributes
++
csq_infos
.
zip
(
x
.
split
(
"""\|"""
,
-
1
))).
map
(
x
=>
{
if
(
remove_CSQ
)
new
VariantContextBuilder
(
record
)
.
attributes
(
x
)
.
make
()
else
new
VariantContextBuilder
(
record
).
attributes
(
x
).
make
()
})
split
(
","
).
map
(
_
.
split
(
"""\|"""
))
for
(
transcript
<-
arti
)
yield
{
(
for
(
fieldId
<-
0
until
csqInfos
.
size
if
transcript
.
isDefinedAt
(
fieldId
)
&&
!
transcript
(
fieldId
).
isEmpty
)
yield
csqInfos
(
fieldId
)
->
transcript
(
fieldId
).
trim
)
.
filterNot
(
_
.
_2
.
isEmpty
)
.
foldLeft
(
builder
)((
builder
,
artibute
)
=>
builder
.
attribute
(
artibute
.
_1
,
artibute
.
_2
))
.
make
()
}
}
def
standardTranscripts
(
record
:
VariantContext
,
csqInfos
:
Array
[
String
],
removeCsq
:
Boolean
)
:
VariantContext
=
{
val
csq
=
record
.
getAttributeAsString
(
"CSQ"
,
"unknown"
)
val
attributes
=
if
(
removeCsq
)
record
.
getAttributes
.
toMap
-
"CSQ"
else
record
.
getAttributes
.
toMap
val
newAttrs
=
attributes
++
csqInfos
.
zip
(
csq
.
val
builder
=
{
if
(
removeCsq
)
new
VariantContextBuilder
(
record
).
rmAttribute
(
"CSQ"
)
else
new
VariantContextBuilder
(
record
)
}
// atributes for each transcript (transcript)(csq field index)
val
arti
=
csq
.
stripPrefix
(
"["
).
stripSuffix
(
"]"
).
split
(
","
).
// This makes a list of lists with each annotation for every transcript in a top-level list element
foldLeft
(
List
.
fill
(
csqInfos
.
length
)
{
List
.
empty
[
String
]
})(
(
acc
,
x
)
=>
{
val
broken
=
x
.
split
(
"""\|"""
,
-
1
)
acc
.
zip
(
broken
).
map
(
x
=>
x
.
_2
::
x
.
_1
)
}
).
map
(
x
=>
x
.
mkString
(
","
)))
new
VariantContextBuilder
(
record
).
attributes
(
newAttrs
).
make
()
split
(
","
).
map
(
_
.
split
(
"""\|"""
))
(
for
(
fieldId
<-
0
until
csqInfos
.
size
)
yield
csqInfos
(
fieldId
)
->
{
for
(
transcript
<-
arti
if
transcript
.
isDefinedAt
(
fieldId
)
&&
!
transcript
(
fieldId
).
isEmpty
)
yield
transcript
(
fieldId
).
trim
})
.
filterNot
(
_
.
_2
.
isEmpty
)
.
foldLeft
(
builder
)((
builder
,
artibute
)
=>
builder
.
attribute
(
artibute
.
_1
,
artibute
.
_2
))
.
make
()
}
case
class
Args
(
inputVCF
:
File
=
null
,
...
...
public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VEPNormalizerTest.scala
View file @
7dafdb1b
...
...
@@ -71,7 +71,10 @@ class VEPNormalizerTest extends TestNGSuite with MockitoSugar with Matchers {
}
def
check
(
item
:
String
)
=
{
record
.
getAttribute
(
item
).
toString
.
split
(
""","""
,
-
1
).
length
should
be
(
11
)
record
.
getAttribute
(
item
)
match
{
case
l
:
List
[
_
]
=>
l
.
length
should
be
(
11
)
case
_
=>
}
}
val
items
=
Array
(
"AA_MAF"
,
"AFR_MAF"
,
"ALLELE_NUM"
,
"AMR_MAF"
,
"ASN_MAF"
,
"Allele"
,
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment