Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
ce4d4d49
Commit
ce4d4d49
authored
Feb 19, 2015
by
bow
Browse files
Merge branch 'feature-toucan' into 'develop'
Feature toucan See merge request !70
parents
8ca96bb4
cde855df
Changes
17
Expand all
Hide whitespace changes
Inline
Side-by-side
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala
View file @
ce4d4d49
...
...
@@ -146,4 +146,4 @@ object BiopetExecutable {
val
stream
=
getClass
.
getClassLoader
.
getResourceAsStream
(
"nl/lumc/sasc/biopet/License.txt"
)
Source
.
fromInputStream
(
stream
).
getLines
().
mkString
(
"\n"
,
"\n"
,
"\n"
)
}
}
\ No newline at end of file
}
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala
0 → 100644
View file @
ce4d4d49
package
nl.lumc.sasc.biopet.extensions
import
java.io.File
import
nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Output
,
Input
}
/**
* Created by ahbbollen on 15-1-15.
*/
class
VariantEffectPredictor
(
val
root
:
Configurable
)
extends
BiopetCommandLineFunction
{
executable
=
config
(
"exe"
,
submodule
=
"perl"
,
default
=
"perl"
)
var
vep_script
:
String
=
config
(
"vep_script"
)
@Input
(
doc
=
"input VCF"
,
required
=
true
)
var
input
:
File
=
null
@Output
(
doc
=
"output file"
,
required
=
true
)
var
output
:
File
=
null
override
val
versionRegex
=
"""version (\d*)"""
.
r
override
def
versionCommand
=
executable
+
" "
+
vep_script
+
" --help"
//Boolean vars
var
v
:
Boolean
=
config
(
"v"
,
default
=
true
)
var
q
:
Boolean
=
config
(
"q"
,
default
=
false
)
var
offline
:
Boolean
=
config
(
"offline"
,
default
=
false
)
var
no_progress
:
Boolean
=
config
(
"no_progress"
,
default
=
false
)
var
everything
:
Boolean
=
config
(
"everything"
,
default
=
true
)
var
force
:
Boolean
=
config
(
"force"
,
default
=
false
)
var
no_stats
:
Boolean
=
config
(
"no_stats"
,
default
=
false
)
var
stats_text
:
Boolean
=
config
(
"stats_text"
,
default
=
false
)
var
html
:
Boolean
=
config
(
"html"
,
default
=
false
)
var
cache
:
Boolean
=
config
(
"cache"
,
default
=
false
)
var
humdiv
:
Boolean
=
config
(
"humdiv"
,
default
=
false
)
var
regulatory
:
Boolean
=
config
(
"regulatory"
,
default
=
false
)
var
cell_type
:
Boolean
=
config
(
"cell_type"
,
default
=
false
)
var
phased
:
Boolean
=
config
(
"phased"
,
default
=
false
)
var
allele_number
:
Boolean
=
config
(
"allele_number"
,
default
=
false
)
var
numbers
:
Boolean
=
config
(
"numbers"
,
default
=
false
)
var
domains
:
Boolean
=
config
(
"domains"
,
default
=
false
)
var
no_escape
:
Boolean
=
config
(
"no_escape"
,
default
=
false
)
var
hgvs
:
Boolean
=
config
(
"hgvs"
,
default
=
false
)
var
protein
:
Boolean
=
config
(
"protein"
,
default
=
false
)
var
symbol
:
Boolean
=
config
(
"symbol"
,
default
=
false
)
var
ccds
:
Boolean
=
config
(
"ccds"
,
default
=
false
)
var
uniprot
:
Boolean
=
config
(
"uniprot"
,
default
=
false
)
var
tsl
:
Boolean
=
config
(
"tsl"
,
default
=
false
)
var
canonical
:
Boolean
=
config
(
"canonical"
,
default
=
false
)
var
biotype
:
Boolean
=
config
(
"biotype"
,
default
=
false
)
var
xref_refseq
:
Boolean
=
config
(
"xref_refseq"
,
default
=
false
)
var
check_existing
:
Boolean
=
config
(
"check_existing"
,
default
=
false
)
var
check_alleles
:
Boolean
=
config
(
"check_alleles"
,
default
=
false
)
var
check_svs
:
Boolean
=
config
(
"svs"
,
default
=
false
)
var
gmaf
:
Boolean
=
config
(
"gmaf"
,
default
=
false
)
var
maf_1kg
:
Boolean
=
config
(
"maf_1kg"
,
default
=
false
)
var
maf_esp
:
Boolean
=
config
(
"maf_esp"
,
default
=
false
)
var
old_map
:
Boolean
=
config
(
"old_maf"
,
default
=
false
)
var
pubmed
:
Boolean
=
config
(
"pubmed"
,
default
=
false
)
var
failed
:
Boolean
=
config
(
"failed"
,
default
=
false
)
var
vcf
:
Boolean
=
config
(
"vcf"
,
default
=
true
)
var
json
:
Boolean
=
config
(
"json"
,
default
=
false
)
var
gvf
:
Boolean
=
config
(
"gvf"
,
default
=
false
)
var
check_ref
:
Boolean
=
config
(
"check_ref"
,
default
=
false
)
var
coding_only
:
Boolean
=
config
(
"coding_only"
,
default
=
false
)
var
no_intergenic
:
Boolean
=
config
(
"no_intergenic"
,
default
=
false
)
var
pick
:
Boolean
=
config
(
"pick"
,
default
=
false
)
var
pick_allele
:
Boolean
=
config
(
"pick_allele"
,
default
=
false
)
var
flag_pick
:
Boolean
=
config
(
"flag_pick"
,
default
=
false
)
var
flag_pick_allele
:
Boolean
=
config
(
"flag_pick_allele"
,
default
=
false
)
var
per_gene
:
Boolean
=
config
(
"per_gene"
,
default
=
false
)
var
most_severe
:
Boolean
=
config
(
"most_severe"
,
default
=
false
)
var
summary
:
Boolean
=
config
(
"summary"
,
default
=
false
)
var
filter_common
:
Boolean
=
config
(
"filter_common"
,
default
=
false
)
var
check_frequency
:
Boolean
=
config
(
"check_frequency"
,
default
=
false
)
var
allow_non_variant
:
Boolean
=
config
(
"allow_non_variant"
,
default
=
false
)
var
database
:
Boolean
=
config
(
"database"
,
default
=
false
)
var
genomes
:
Boolean
=
config
(
"genomes"
,
default
=
false
)
var
gencode_basic
:
Boolean
=
config
(
"gencode_basic"
,
default
=
false
)
var
refseq
:
Boolean
=
config
(
"refseq"
,
default
=
false
)
var
merged
:
Boolean
=
config
(
"merged"
,
default
=
false
)
var
all_refseq
:
Boolean
=
config
(
"all_refseq"
,
default
=
false
)
var
lrg
:
Boolean
=
config
(
"lrg"
,
default
=
false
)
var
no_whole_genome
:
Boolean
=
config
(
"no_whole_genome"
,
default
=
false
)
var
skip_db_check
:
Boolean
=
config
(
"skip_db_check"
,
default
=
false
)
// Textual args
var
vep_config
:
Option
[
String
]
=
config
(
"config"
)
var
species
:
Option
[
String
]
=
config
(
"species"
)
var
assembly
:
Option
[
String
]
=
config
(
"assembly"
)
var
format
:
Option
[
String
]
=
config
(
"format"
)
var
dir
:
Option
[
String
]
=
config
(
"dir"
)
var
dir_cache
:
Option
[
String
]
=
config
(
"dir_cache"
)
var
dir_plugins
:
Option
[
String
]
=
config
(
"dir_plugins"
)
var
fasta
:
Option
[
String
]
=
config
(
"fasta"
)
var
sift
:
Option
[
String
]
=
config
(
"sift"
)
var
polyphen
:
Option
[
String
]
=
config
(
"polyphen"
)
var
custom
:
Option
[
String
]
=
config
(
"custom"
)
var
plugin
:
List
[
String
]
=
config
(
"plugin"
,
default
=
Nil
)
var
individual
:
Option
[
String
]
=
config
(
"individual"
)
var
fields
:
Option
[
String
]
=
config
(
"fields"
)
var
convert
:
Option
[
String
]
=
config
(
"convert"
)
var
terms
:
Option
[
String
]
=
config
(
"terms"
)
var
chr
:
Option
[
String
]
=
config
(
"chr"
)
var
pick_order
:
Option
[
String
]
=
config
(
"pick_order"
)
var
freq_pop
:
Option
[
String
]
=
config
(
"check_pop"
)
var
freq_gt_lt
:
Option
[
String
]
=
config
(
"freq_gt_lt"
)
var
freq_filter
:
Option
[
String
]
=
config
(
"freq_filter"
)
var
filter
:
Option
[
String
]
=
config
(
"filter"
)
var
host
:
Option
[
String
]
=
config
(
"host"
)
var
user
:
Option
[
String
]
=
config
(
"user"
)
var
password
:
Option
[
String
]
=
config
(
"password"
)
var
registry
:
Option
[
String
]
=
config
(
"registry"
)
var
build
:
Option
[
String
]
=
config
(
"build"
)
var
compress
:
Option
[
String
]
=
config
(
"compress"
)
var
cache_region_size
:
Option
[
String
]
=
config
(
"cache_region_size"
)
// Numeric args
var
fork
:
Option
[
Int
]
=
config
(
"fork"
)
var
cache_version
:
Option
[
Int
]
=
config
(
"cache_version"
)
var
freq_freq
:
Option
[
Float
]
=
config
(
"freq_freq"
)
var
port
:
Option
[
Int
]
=
config
(
"port"
)
var
db_version
:
Option
[
Int
]
=
config
(
"db_version"
)
var
buffer_size
:
Option
[
Int
]
=
config
(
"buffer_size"
)
override
def
beforeGraph
:
Unit
=
{
super
.
beforeGraph
if
(!
cache
&&
!
database
)
{
throw
new
IllegalArgumentException
(
"Must supply either cache or database"
)
}
else
if
(
cache
&&
dir
.
isEmpty
)
{
throw
new
IllegalArgumentException
(
"Must supply dir to cache"
)
}
}
def
cmdLine
=
required
(
executable
)
+
required
(
vep_script
)
+
required
(
"-i"
,
input
)
+
required
(
"-o"
,
output
)
+
conditional
(
v
,
"-v"
)
+
conditional
(
q
,
"-q"
)
+
conditional
(
offline
,
"--offline"
)
+
conditional
(
no_progress
,
"--no_progress"
)
+
conditional
(
everything
,
"--everything"
)
+
conditional
(
force
,
"--force_overwrite"
)
+
conditional
(
no_stats
,
"--no_stats"
)
+
conditional
(
stats_text
,
"--stats_text"
)
+
conditional
(
html
,
"--html"
)
+
conditional
(
cache
,
"--cache"
)
+
conditional
(
humdiv
,
"--humdiv"
)
+
conditional
(
regulatory
,
"--regulatory"
)
+
conditional
(
cell_type
,
"--cel_type"
)
+
conditional
(
phased
,
"--phased"
)
+
conditional
(
allele_number
,
"--allele_number"
)
+
conditional
(
numbers
,
"--numbers"
)
+
conditional
(
domains
,
"--domains"
)
+
conditional
(
no_escape
,
"--no_escape"
)
+
conditional
(
hgvs
,
"--hgvs"
)
+
conditional
(
protein
,
"--protein"
)
+
conditional
(
symbol
,
"--symbol"
)
+
conditional
(
ccds
,
"--ccds"
)
+
conditional
(
uniprot
,
"--uniprot"
)
+
conditional
(
tsl
,
"--tsl"
)
+
conditional
(
canonical
,
"--canonical"
)
+
conditional
(
biotype
,
"--biotype"
)
+
conditional
(
xref_refseq
,
"--xref_refseq"
)
+
conditional
(
check_existing
,
"--check_existing"
)
+
conditional
(
check_alleles
,
"--check_alleles"
)
+
conditional
(
check_svs
,
"--check_svs"
)
+
conditional
(
gmaf
,
"--gmaf"
)
+
conditional
(
maf_1kg
,
"--maf_1kg"
)
+
conditional
(
maf_esp
,
"--maf_esp"
)
+
conditional
(
pubmed
,
"--pubmed"
)
+
conditional
(
failed
,
"--failed"
)
+
conditional
(
vcf
,
"--vcf"
)
+
conditional
(
json
,
"--json"
)
+
conditional
(
gvf
,
"--gvf"
)
+
conditional
(
check_ref
,
"--check_ref"
)
+
conditional
(
coding_only
,
"--coding_only"
)
+
conditional
(
no_intergenic
,
"--no_intergenic"
)
+
conditional
(
pick
,
"--pick"
)
+
conditional
(
pick_allele
,
"--pick_allele"
)
+
conditional
(
flag_pick
,
"--flag_pick"
)
+
conditional
(
flag_pick_allele
,
"--flag_pick_allele"
)
+
conditional
(
per_gene
,
"--per_gene"
)
+
conditional
(
most_severe
,
"--most_severe"
)
+
conditional
(
summary
,
"--summary"
)
+
conditional
(
filter_common
,
"--filter_common"
)
+
conditional
(
check_frequency
,
"--check_frequency"
)
+
conditional
(
allow_non_variant
,
"--allow_non_variant"
)
+
conditional
(
database
,
"--database"
)
+
conditional
(
genomes
,
"--genomes"
)
+
conditional
(
gencode_basic
,
"--gencode_basic"
)
+
conditional
(
refseq
,
"--refseq"
)
+
conditional
(
merged
,
"--merged"
)
+
conditional
(
all_refseq
,
"--all_refseq"
)
+
conditional
(
lrg
,
"--lrg"
)
+
conditional
(
no_whole_genome
,
"--no_whole_genome"
)
+
conditional
(
skip_db_check
,
"--skip_db_check"
)
+
optional
(
"--config"
,
vep_config
)
+
optional
(
"--species"
,
species
)
+
optional
(
"--assembly"
,
assembly
)
+
optional
(
"--format"
,
format
)
+
optional
(
"--dir"
,
dir
)
+
optional
(
"--dir_cache"
,
dir_cache
)
+
optional
(
"--dir_plugins"
,
dir_plugins
)
+
optional
(
"--fasta"
,
fasta
)
+
optional
(
"--sift"
,
sift
)
+
optional
(
"--polyphen"
,
polyphen
)
+
optional
(
"--custom"
,
custom
)
+
repeat
(
"--plugin"
,
plugin
)
+
optional
(
"--individual"
,
individual
)
+
optional
(
"--fields"
,
fields
)
+
optional
(
"--convert"
,
convert
)
+
optional
(
"--terms"
,
terms
)
+
optional
(
"--chr"
,
chr
)
+
optional
(
"--pick_order"
,
pick_order
)
+
optional
(
"--freq_pop"
,
freq_pop
)
+
optional
(
"--freq_gt_lt"
,
freq_gt_lt
)
+
optional
(
"--freq_filter"
,
freq_filter
)
+
optional
(
"--filter"
,
filter
)
+
optional
(
"--host"
,
host
)
+
optional
(
"--user"
,
user
)
+
optional
(
"--password"
,
password
)
+
optional
(
"--registry"
,
registry
)
+
optional
(
"--build"
,
build
)
+
optional
(
"--compress"
,
compress
)
+
optional
(
"--cache_region_size"
,
cache_region_size
)
+
optional
(
"--fork"
,
fork
)
+
optional
(
"--cache_version"
,
cache_version
)
+
optional
(
"--freq_freq"
,
freq_freq
)
+
optional
(
"--port"
,
port
)
+
optional
(
"--db_version"
,
db_version
)
+
optional
(
"--buffer_size"
,
buffer_size
)
}
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VEPNormalizer.scala
0 → 100644
View file @
ce4d4d49
package
nl.lumc.sasc.biopet.tools
import
java.io.
{
File
,
IOException
}
import
scala.collection.JavaConversions._
import
scala.collection.mutable.
{
Map
=>
MMap
}
import
htsjdk.tribble.TribbleException
import
htsjdk.variant.variantcontext.
{
VariantContextBuilder
,
VariantContext
}
import
htsjdk.variant.variantcontext.writer.
{
AsyncVariantContextWriter
,
VariantContextWriter
,
VariantContextWriterBuilder
}
import
htsjdk.variant.vcf._
import
org.broadinstitute.gatk.utils.commandline.
{
Output
,
Input
}
import
nl.lumc.sasc.biopet.core.
{
BiopetJavaCommandLineFunction
,
ToolCommand
}
import
nl.lumc.sasc.biopet.core.config.Configurable
/**
* This tool parses a VEP annotated VCF into a standard VCF file.
* The VEP puts all its annotations for each variant in an CSQ string, where annotations per transcript are comma-separated
* Annotations are then furthermore pipe-separated.
* This tool has two modes:
* 1) explode - explodes all transcripts such that each is on a unique line
* 2) standard - parse as a standard VCF, where multiple transcripts occur in the same line
* Created by ahbbollen on 10/27/14.
*/
class
VEPNormalizer
(
val
root
:
Configurable
)
extends
BiopetJavaCommandLineFunction
{
javaMainClass
=
getClass
.
getName
@Input
(
doc
=
"Input VCF, may be indexed"
,
shortName
=
"InputFile"
,
required
=
true
)
var
inputVCF
:
File
=
null
@Output
(
doc
=
"Output VCF"
,
shortName
=
"OutputFile"
,
required
=
true
)
var
outputVCF
:
File
=
null
var
mode
:
String
=
config
(
"mode"
,
default
=
"explode"
)
override
def
commandLine
=
super
.
commandLine
+
required
(
"-I"
,
inputVCF
)
+
required
(
"-O"
,
outputVCF
)
+
required
(
"-m"
,
mode
)
}
object
VEPNormalizer
extends
ToolCommand
{
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
val
commandArgs
:
Args
=
new
OptParser
()
.
parse
(
args
,
Args
())
.
getOrElse
(
sys
.
exit
(
1
))
val
input
=
commandArgs
.
inputVCF
val
output
=
commandArgs
.
outputVCF
logger
.
info
(
s
"""Input VCF is $input"""
)
logger
.
info
(
s
"""Output VCF is $output"""
)
val
reader
=
try
{
new
VCFFileReader
(
input
,
false
)
}
catch
{
case
e
:
TribbleException.MalformedFeatureFile
=>
logger
.
error
(
"Malformed VCF file! Are you sure this isn't a VCFv3 file?"
)
throw
e
}
val
header
=
reader
.
getFileHeader
logger
.
debug
(
"Checking for CSQ tag"
)
csqCheck
(
header
)
logger
.
debug
(
"CSQ tag OK"
)
logger
.
debug
(
"Checkion VCF version"
)
versionCheck
(
header
)
logger
.
debug
(
"VCF version OK"
)
logger
.
debug
(
"Parsing header"
)
val
new_infos
=
parseCsq
(
header
)
header
.
setWriteCommandLine
(
true
)
for
(
info
<-
new_infos
)
{
val
tmpheaderline
=
new
VCFInfoHeaderLine
(
info
,
VCFHeaderLineCount
.
UNBOUNDED
,
VCFHeaderLineType
.
String
,
"A VEP annotation"
)
header
.
addMetaDataLine
(
tmpheaderline
)
}
logger
.
debug
(
"Header parsing done"
)
logger
.
debug
(
"Writing header to file"
)
val
writer
=
new
AsyncVariantContextWriter
(
new
VariantContextWriterBuilder
().
setOutputFile
(
output
).
build
())
writer
.
writeHeader
(
header
)
logger
.
debug
(
"Wrote header to file"
)
normalize
(
reader
,
writer
,
new_infos
,
commandArgs
.
mode
,
commandArgs
.
removeCSQ
)
writer
.
close
()
logger
.
debug
(
"Closed writer"
)
reader
.
close
()
logger
.
debug
(
"Closed reader"
)
logger
.
info
(
"Done. Goodbye"
)
}
/**
* Normalizer
* @param reader input VCF VCFFileReader
* @param writer output VCF AsyncVariantContextWriter
* @param newInfos array of string containing names of new info fields
* @param mode normalizer mode (explode or standard)
* @param removeCsq remove csq tag (Boolean)
* @return
*/
def
normalize
(
reader
:
VCFFileReader
,
writer
:
AsyncVariantContextWriter
,
newInfos
:
Array
[
String
],
mode
:
String
,
removeCsq
:
Boolean
)
=
{
logger
.
info
(
s
"""You have selected mode $mode"""
)
logger
.
info
(
"Start processing records"
)
for
(
record
<-
reader
)
{
mode
match
{
case
"explode"
=>
explodeTranscripts
(
record
,
newInfos
,
removeCsq
).
foreach
(
vc
=>
writer
.
add
(
vc
))
case
"standard"
=>
writer
.
add
(
standardTranscripts
(
record
,
newInfos
,
removeCsq
))
case
_
=>
throw
new
IllegalArgumentException
(
"Something odd happened!"
)
}
}
}
/**
* Checks whether header has a CSQ tag
* @param header VCF header
*/
def
csqCheck
(
header
:
VCFHeader
)
=
{
if
(!
header
.
hasInfoLine
(
"CSQ"
))
{
//logger.error("No CSQ info tag found! Is this file VEP-annotated?")
throw
new
IllegalArgumentException
(
"No CSQ info tag found! Is this file VEP-annotated?"
)
}
}
/**
* Checks whether version of input VCF is at least 4.0
* VEP is known to cause issues below 4.0
* Throws exception if not
* @param header VCFHeader of input VCF
*/
def
versionCheck
(
header
:
VCFHeader
)
=
{
var
format
=
""
//HACK: getMetaDataLine does not work for fileformat
for
(
line
<-
header
.
getMetaDataInInputOrder
)
{
if
(
line
.
getKey
==
"fileformat"
||
line
.
getKey
==
"format"
)
{
format
=
line
.
getValue
}
}
val
version
=
VCFHeaderVersion
.
toHeaderVersion
(
format
)
if
(!
version
.
isAtLeastAsRecentAs
(
VCFHeaderVersion
.
VCF4_0
))
{
throw
new
IllegalArgumentException
(
s
"""version $version is not supported"""
)
}
}
/**
* Parses the CSQ tag in the header
* @param header the VCF header
* @return list of strings with new info fields
*/
def
parseCsq
(
header
:
VCFHeader
)
:
Array
[
String
]
=
{
header
.
getInfoHeaderLine
(
"CSQ"
).
getDescription
.
split
(
':'
)(
1
).
trim
.
split
(
'|'
).
map
(
"VEP_"
+
_
)
}
/**
* Explode a single VEP-annotated record to multiple normal records
* Based on the number of annotated transcripts in the CSQ tag
* @param record the record as a VariantContext object
* @param csq_infos An array with names of new info tags
* @return An array with the new records
*/
def
explodeTranscripts
(
record
:
VariantContext
,
csq_infos
:
Array
[
String
],
remove_CSQ
:
Boolean
)
:
Array
[
VariantContext
]
=
{
val
csq
=
record
.
getAttributeAsString
(
"CSQ"
,
"unknown"
)
val
attributes
=
if
(
remove_CSQ
)
record
.
getAttributes
.
toMap
-
"CSQ"
else
record
.
getAttributes
.
toMap
csq
.
stripPrefix
(
"["
).
stripSuffix
(
"]"
).
split
(
","
).
map
(
x
=>
attributes
++
csq_infos
.
zip
(
x
.
split
(
"""\|"""
,
-
1
))).
map
(
x
=>
{
if
(
remove_CSQ
)
new
VariantContextBuilder
(
record
)
.
attributes
(
x
)
.
make
()
else
new
VariantContextBuilder
(
record
).
attributes
(
x
).
make
()
})
}
def
standardTranscripts
(
record
:
VariantContext
,
csqInfos
:
Array
[
String
],
removeCsq
:
Boolean
)
:
VariantContext
=
{
val
csq
=
record
.
getAttributeAsString
(
"CSQ"
,
"unknown"
)
val
attributes
=
if
(
removeCsq
)
record
.
getAttributes
.
toMap
-
"CSQ"
else
record
.
getAttributes
.
toMap
val
newAttrs
=
attributes
++
csqInfos
.
zip
(
csq
.
stripPrefix
(
"["
).
stripSuffix
(
"]"
).
split
(
","
).
// This makes a list of lists with each annotation for every transcript in a top-level list element
foldLeft
(
List
.
fill
(
csqInfos
.
length
)
{
List
.
empty
[
String
]
})(
(
acc
,
x
)
=>
{
val
broken
=
x
.
split
(
"""\|"""
,
-
1
)
acc
.
zip
(
broken
).
map
(
x
=>
x
.
_2
::
x
.
_1
)
}
).
map
(
x
=>
x
.
mkString
(
","
)))
new
VariantContextBuilder
(
record
).
attributes
(
newAttrs
).
make
()
}
case
class
Args
(
inputVCF
:
File
=
null
,
outputVCF
:
File
=
null
,
mode
:
String
=
null
,
removeCSQ
:
Boolean
=
true
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
head
(
s
"""|$commandName - Parse VEP-annotated VCF to standard VCF format """
)
opt
[
File
](
'I'
,
"InputFile"
)
required
()
valueName
"<vcf>"
action
{
(
x
,
c
)
=>
c
.
copy
(
inputVCF
=
x
)
}
validate
{
x
=>
if
(
x
.
exists
)
success
else
failure
(
"Input VCF not found"
)
}
text
"Input VCF file"
opt
[
File
](
'O'
,
"OutputFile"
)
required
()
valueName
"<vcf>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputVCF
=
x
)
}
validate
{
x
=>
if
(!
x
.
getName
.
endsWith
(
".vcf"
)
&&
(!
x
.
getName
.
endsWith
(
".vcf.gz"
))
&&
(!
x
.
getName
.
endsWith
(
".bcf"
)))
failure
(
"Unsupported output file type"
)
else
success
}
text
"Output VCF file"
opt
[
String
](
'm'
,
"mode"
)
required
()
valueName
"<mode>"
action
{
(
x
,
c
)
=>
c
.
copy
(
mode
=
x
)
}
validate
{
x
=>
if
(
x
==
"explode"
)
success
else
if
(
x
==
"standard"
)
success
else
failure
(
"Unsupported mode"
)
}
text
"Mode"
opt
[
Unit
](
"do-not-remove"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
removeCSQ
=
false
)
}
text
"Do not remove CSQ tag"
}
}
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala
View file @
ce4d4d49
...
...
@@ -454,4 +454,4 @@ object WipeReads extends ToolCommand {
commandArgs
.
filteredOutBam
.
map
(
x
=>
prepOutBam
(
x
,
commandArgs
.
inputBam
,
writeIndex
=
!
commandArgs
.
noMakeIndex
))
)
}
}
\ No newline at end of file
}
public/biopet-framework/src/test/resources/VCFv3.vcf
0 → 100644
View file @
ce4d4d49
##fileformat=VCFv3.3
##fileDate=20090805
##source=myImputationProgramV3.1
##reference=1000GenomesPilot-NCBI36
##phasing=partial
##INFO=NS,1,Integer,"Number of Samples With Data"
##INFO=DP,1,Integer,"Total Depth"
##INFO=AF,-1,Float,"Allele Frequency"
##INFO=AA,1,String,"Ancestral Allele"
##INFO=DB,0,Flag,"dbSNP membership, build 129"
##INFO=H2,0,Flag,"HapMap2 membership"
##FILTER=q10,"Quality below 10"
##FILTER=s50,"Less than 50% of samples have data"
##FORMAT=GT,1,String,"Genotype"
##FORMAT=GQ,1,Integer,"Genotype Quality"
##FORMAT=DP,1,Integer,"Read Depth"
##FORMAT=HQ,2,Integer,"Haplotype Quality"
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
20 14370 rs6054257 G A 29 0 NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:-1,-1
20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3:-1,-1
20 1110696 rs6040355 A G,T 67 0 NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4:-1,-1
20 1230237 . T . 47 0 NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2:-1,-1
20 1234567 microsat1 G D4,IGA 50 0 NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3
public/biopet-framework/src/test/resources/VEP_oneline.vcf
0 → 100644
View file @
ce4d4d49
This diff is collapsed.
Click to expand it.
public/biopet-framework/src/test/resources/unvepped.vcf
0 → 100644
View file @
ce4d4d49
This diff is collapsed.
Click to expand it.
public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VEPNormalizerTest.scala
0 → 100644
View file @
ce4d4d49
package
nl.lumc.sasc.biopet.tools
import
org.scalatest.testng.TestNGSuite
import
org.scalatest.mock.MockitoSugar
import
org.scalatest.Matchers
import
java.io.File
import
java.nio.file.Paths
import
org.testng.annotations.Test
import
htsjdk.variant.vcf.VCFFileReader
import
htsjdk.tribble.TribbleException
import
scala.collection.JavaConversions._
import
htsjdk.variant.variantcontext.VariantContext
/**
* This class tests the VEPNormalizer
* Created by ahbbollen on 11/24/14.
*/
class
VEPNormalizerTest
extends
TestNGSuite
with
MockitoSugar
with
Matchers
{
import
VEPNormalizer._
private
def
resourcePath
(
p
:
String
)
:
String
=
{
Paths
.
get
(
getClass
.
getResource
(
p
).
toURI
).
toString
}
val
vcf3
=
new
File
(
resourcePath
(
"/VCFv3.vcf"
))
val
vepped
=
new
File
(
resourcePath
(
"/VEP_oneline.vcf"
))
val
unvepped
=
new
File
(
resourcePath
(
"/unvepped.vcf"
))