Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
d84fc9cc
Commit
d84fc9cc
authored
May 12, 2016
by
bow
Browse files
Merge branch 'feature-scatter_annotation' into 'develop'
Added scattering on annotation Fixes #305 and #324 See merge request !374
parents
bffd796d
35dca269
Changes
14
Hide whitespace changes
Inline
Side-by-side
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala
View file @
d84fc9cc
...
...
@@ -18,10 +18,9 @@ package nl.lumc.sasc.biopet.extensions
import
java.io.File
import
nl.lumc.sasc.biopet.core.summary.Summarizable
import
nl.lumc.sasc.biopet.utils.Logging
import
nl.lumc.sasc.biopet.utils.
{
Logging
,
VcfUtils
,
tryToParseNumber
}
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.core.
{
Version
,
BiopetCommandLineFunction
,
Reference
}
import
nl.lumc.sasc.biopet.utils.tryToParseNumber
import
nl.lumc.sasc.biopet.core.
{
BiopetCommandLineFunction
,
Reference
,
Version
}
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
scala.io.Source
...
...
@@ -164,147 +163,149 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
}
/** Returns command to execute */
def
cmdLine
=
required
(
executable
)
+
required
(
vepScript
)
+
required
(
"-i"
,
input
)
+
required
(
"-o"
,
output
)
+
conditional
(
v
,
"-v"
)
+
conditional
(
q
,
"-q"
)
+
conditional
(
offline
,
"--offline"
)
+
conditional
(
noProgress
,
"--no_progress"
)
+
conditional
(
everything
,
"--everything"
)
+
conditional
(
force
,
"--force_overwrite"
)
+
conditional
(
noStats
,
"--no_stats"
)
+
conditional
(
statsText
,
"--stats_text"
)
+
conditional
(
html
,
"--html"
)
+
conditional
(
cache
,
"--cache"
)
+
conditional
(
humdiv
,
"--humdiv"
)
+
conditional
(
regulatory
,
"--regulatory"
)
+
conditional
(
cellType
,
"--cel_type"
)
+
conditional
(
phased
,
"--phased"
)
+
conditional
(
alleleNumber
,
"--allele_number"
)
+
conditional
(
numbers
,
"--numbers"
)
+
conditional
(
domains
,
"--domains"
)
+
conditional
(
noEscape
,
"--no_escape"
)
+
conditional
(
hgvs
,
"--hgvs"
)
+
conditional
(
protein
,
"--protein"
)
+
conditional
(
symbol
,
"--symbol"
)
+
conditional
(
ccds
,
"--ccds"
)
+
conditional
(
uniprot
,
"--uniprot"
)
+
conditional
(
tsl
,
"--tsl"
)
+
conditional
(
canonical
,
"--canonical"
)
+
conditional
(
biotype
,
"--biotype"
)
+
conditional
(
xrefRefseq
,
"--xref_refseq"
)
+
conditional
(
checkExisting
,
"--check_existing"
)
+
conditional
(
checkAlleles
,
"--check_alleles"
)
+
conditional
(
checkSvs
,
"--check_svs"
)
+
conditional
(
gmaf
,
"--gmaf"
)
+
conditional
(
maf1kg
,
"--maf_1kg"
)
+
conditional
(
mafEsp
,
"--maf_esp"
)
+
conditional
(
pubmed
,
"--pubmed"
)
+
conditional
(
vcf
,
"--vcf"
)
+
conditional
(
json
,
"--json"
)
+
conditional
(
gvf
,
"--gvf"
)
+
conditional
(
checkRef
,
"--check_ref"
)
+
conditional
(
codingOnly
,
"--coding_only"
)
+
conditional
(
noIntergenic
,
"--no_intergenic"
)
+
conditional
(
pick
,
"--pick"
)
+
conditional
(
pickAllele
,
"--pick_allele"
)
+
conditional
(
flagPick
,
"--flag_pick"
)
+
conditional
(
flagPickAllele
,
"--flag_pick_allele"
)
+
conditional
(
perGene
,
"--per_gene"
)
+
conditional
(
mostSevere
,
"--most_severe"
)
+
conditional
(
summary
,
"--summary"
)
+
conditional
(
filterCommon
,
"--filter_common"
)
+
conditional
(
checkFrequency
,
"--check_frequency"
)
+
conditional
(
allowNonVariant
,
"--allow_non_variant"
)
+
conditional
(
database
,
"--database"
)
+
conditional
(
genomes
,
"--genomes"
)
+
conditional
(
gencodeBasic
,
"--gencode_basic"
)
+
conditional
(
refseq
,
"--refseq"
)
+
conditional
(
merged
,
"--merged"
)
+
conditional
(
allRefseq
,
"--all_refseq"
)
+
conditional
(
lrg
,
"--lrg"
)
+
conditional
(
noWholeGenome
,
"--no_whole_genome"
)
+
conditional
(
skibDbCheck
,
"--skip_db_check"
)
+
optional
(
"--config"
,
vepConfig
)
+
optional
(
"--species"
,
species
)
+
optional
(
"--assembly"
,
assembly
)
+
optional
(
"--format"
,
format
)
+
optional
(
"--dir"
,
dir
)
+
optional
(
"--dir_cache"
,
dirCache
)
+
optional
(
"--dir_plugins"
,
dirPlugins
)
+
optional
(
"--fasta"
,
fasta
)
+
optional
(
"--sift"
,
sift
)
+
optional
(
"--polyphen"
,
polyphen
)
+
repeat
(
"--custom"
,
custom
)
+
repeat
(
"--plugin"
,
plugin
)
+
optional
(
"--individual"
,
individual
)
+
optional
(
"--fields"
,
fields
)
+
optional
(
"--convert"
,
convert
)
+
optional
(
"--terms"
,
terms
)
+
optional
(
"--chr"
,
chr
)
+
optional
(
"--pick_order"
,
pickOrder
)
+
optional
(
"--freq_pop"
,
freqPop
)
+
optional
(
"--freq_gt_lt"
,
freqGtLt
)
+
optional
(
"--freq_filter"
,
freqFilter
)
+
optional
(
"--filter"
,
filter
)
+
optional
(
"--host"
,
host
)
+
optional
(
"--user"
,
user
)
+
optional
(
"--password"
,
password
)
+
optional
(
"--registry"
,
registry
)
+
optional
(
"--build"
,
build
)
+
optional
(
"--compress"
,
compress
)
+
optional
(
"--cache_region_size"
,
cacheRegionSize
)
+
optional
(
"--fork"
,
threads
)
+
optional
(
"--cache_version"
,
cacheVersion
)
+
optional
(
"--freq_freq"
,
freqFreq
)
+
optional
(
"--port"
,
port
)
+
optional
(
"--db_version"
,
dbVersion
)
+
optional
(
"--buffer_size"
,
bufferSize
)
+
optional
(
"--failed"
,
failed
)
def
cmdLine
=
{
if
(
input
.
exists
()
&&
VcfUtils
.
vcfFileIsEmpty
(
input
))
{
val
zcat
=
Zcat
(
this
,
input
,
output
)
zcat
.
cmdLine
}
else
required
(
executable
)
+
required
(
vepScript
)
+
required
(
"-i"
,
input
)
+
required
(
"-o"
,
output
)
+
conditional
(
v
,
"-v"
)
+
conditional
(
q
,
"-q"
)
+
conditional
(
offline
,
"--offline"
)
+
conditional
(
noProgress
,
"--no_progress"
)
+
conditional
(
everything
,
"--everything"
)
+
conditional
(
force
,
"--force_overwrite"
)
+
conditional
(
noStats
,
"--no_stats"
)
+
conditional
(
statsText
,
"--stats_text"
)
+
conditional
(
html
,
"--html"
)
+
conditional
(
cache
,
"--cache"
)
+
conditional
(
humdiv
,
"--humdiv"
)
+
conditional
(
regulatory
,
"--regulatory"
)
+
conditional
(
cellType
,
"--cel_type"
)
+
conditional
(
phased
,
"--phased"
)
+
conditional
(
alleleNumber
,
"--allele_number"
)
+
conditional
(
numbers
,
"--numbers"
)
+
conditional
(
domains
,
"--domains"
)
+
conditional
(
noEscape
,
"--no_escape"
)
+
conditional
(
hgvs
,
"--hgvs"
)
+
conditional
(
protein
,
"--protein"
)
+
conditional
(
symbol
,
"--symbol"
)
+
conditional
(
ccds
,
"--ccds"
)
+
conditional
(
uniprot
,
"--uniprot"
)
+
conditional
(
tsl
,
"--tsl"
)
+
conditional
(
canonical
,
"--canonical"
)
+
conditional
(
biotype
,
"--biotype"
)
+
conditional
(
xrefRefseq
,
"--xref_refseq"
)
+
conditional
(
checkExisting
,
"--check_existing"
)
+
conditional
(
checkAlleles
,
"--check_alleles"
)
+
conditional
(
checkSvs
,
"--check_svs"
)
+
conditional
(
gmaf
,
"--gmaf"
)
+
conditional
(
maf1kg
,
"--maf_1kg"
)
+
conditional
(
mafEsp
,
"--maf_esp"
)
+
conditional
(
pubmed
,
"--pubmed"
)
+
conditional
(
vcf
,
"--vcf"
)
+
conditional
(
json
,
"--json"
)
+
conditional
(
gvf
,
"--gvf"
)
+
conditional
(
checkRef
,
"--check_ref"
)
+
conditional
(
codingOnly
,
"--coding_only"
)
+
conditional
(
noIntergenic
,
"--no_intergenic"
)
+
conditional
(
pick
,
"--pick"
)
+
conditional
(
pickAllele
,
"--pick_allele"
)
+
conditional
(
flagPick
,
"--flag_pick"
)
+
conditional
(
flagPickAllele
,
"--flag_pick_allele"
)
+
conditional
(
perGene
,
"--per_gene"
)
+
conditional
(
mostSevere
,
"--most_severe"
)
+
conditional
(
summary
,
"--summary"
)
+
conditional
(
filterCommon
,
"--filter_common"
)
+
conditional
(
checkFrequency
,
"--check_frequency"
)
+
conditional
(
allowNonVariant
,
"--allow_non_variant"
)
+
conditional
(
database
,
"--database"
)
+
conditional
(
genomes
,
"--genomes"
)
+
conditional
(
gencodeBasic
,
"--gencode_basic"
)
+
conditional
(
refseq
,
"--refseq"
)
+
conditional
(
merged
,
"--merged"
)
+
conditional
(
allRefseq
,
"--all_refseq"
)
+
conditional
(
lrg
,
"--lrg"
)
+
conditional
(
noWholeGenome
,
"--no_whole_genome"
)
+
conditional
(
skibDbCheck
,
"--skip_db_check"
)
+
optional
(
"--config"
,
vepConfig
)
+
optional
(
"--species"
,
species
)
+
optional
(
"--assembly"
,
assembly
)
+
optional
(
"--format"
,
format
)
+
optional
(
"--dir"
,
dir
)
+
optional
(
"--dir_cache"
,
dirCache
)
+
optional
(
"--dir_plugins"
,
dirPlugins
)
+
optional
(
"--fasta"
,
fasta
)
+
optional
(
"--sift"
,
sift
)
+
optional
(
"--polyphen"
,
polyphen
)
+
repeat
(
"--custom"
,
custom
)
+
repeat
(
"--plugin"
,
plugin
)
+
optional
(
"--individual"
,
individual
)
+
optional
(
"--fields"
,
fields
)
+
optional
(
"--convert"
,
convert
)
+
optional
(
"--terms"
,
terms
)
+
optional
(
"--chr"
,
chr
)
+
optional
(
"--pick_order"
,
pickOrder
)
+
optional
(
"--freq_pop"
,
freqPop
)
+
optional
(
"--freq_gt_lt"
,
freqGtLt
)
+
optional
(
"--freq_filter"
,
freqFilter
)
+
optional
(
"--filter"
,
filter
)
+
optional
(
"--host"
,
host
)
+
optional
(
"--user"
,
user
)
+
optional
(
"--password"
,
password
)
+
optional
(
"--registry"
,
registry
)
+
optional
(
"--build"
,
build
)
+
optional
(
"--compress"
,
compress
)
+
optional
(
"--cache_region_size"
,
cacheRegionSize
)
+
optional
(
"--fork"
,
threads
)
+
optional
(
"--cache_version"
,
cacheVersion
)
+
optional
(
"--freq_freq"
,
freqFreq
)
+
optional
(
"--port"
,
port
)
+
optional
(
"--db_version"
,
dbVersion
)
+
optional
(
"--buffer_size"
,
bufferSize
)
+
optional
(
"--failed"
,
failed
)
}
def
summaryFiles
:
Map
[
String
,
File
]
=
Map
()
def
summaryStats
:
Map
[
String
,
Any
]
=
{
if
(
statsText
)
{
val
statsFile
:
File
=
new
File
(
output
.
getAbsolutePath
+
"_summary.txt"
)
parseStatsFile
(
statsFile
)
}
else
{
Map
()
}
val
statsFile
=
new
File
(
output
.
getAbsolutePath
+
"_summary.txt"
)
if
(
statsText
&&
statsFile
.
exists
())
parseStatsFile
(
statsFile
)
else
Map
()
}
def
parseStatsFile
(
file
:
File
)
:
Map
[
String
,
Any
]
=
{
val
contents
=
Source
.
fromFile
(
file
).
getLines
().
toList
val
headers
=
getHeadersFromStatsFile
(
contents
)
headers
.
foldLeft
(
Map
.
empty
[
String
,
Any
])((
acc
,
x
)
=>
acc
+
(
x
.
replace
(
" "
,
"_"
)
->
getBlockFromStatsFile
(
contents
,
x
)))
}
protected
val
removeOnConflict
=
Set
(
"Output_file"
,
"Command_line_options"
,
"Run_time"
,
"Start_time"
,
"End_time"
,
"Input_file_(format)"
,
"Novel_/_existing_variants"
)
protected
val
nonNumber
=
Set
(
"VEP_version_(API)"
,
"Cache/Database"
,
"Species"
)
def
getBlockFromStatsFile
(
contents
:
List
[
String
],
header
:
String
)
:
Map
[
String
,
Any
]
=
{
var
inBlock
=
false
var
theMap
:
Map
[
String
,
Any
]
=
Map
()
for
(
x
<-
contents
)
{
val
stripped
=
x
.
stripPrefix
(
"["
).
stripSuffix
(
"]"
)
if
(
stripped
==
header
)
{
inBlock
=
true
}
else
{
if
(
inBlock
)
{
val
key
=
stripped
.
split
(
'\t'
).
head
.
replace
(
" "
,
"_"
)
val
value
=
stripped
.
split
(
'\t'
).
last
theMap
++=
Map
(
key
->
tryToParseNumber
(
value
,
fallBack
=
true
).
getOrElse
(
value
))
}
}
if
(
stripped
==
""
)
{
inBlock
=
false
override
def
resolveSummaryConflict
(
v1
:
Any
,
v2
:
Any
,
key
:
String
)
:
Any
=
{
if
(
removeOnConflict
.
contains
(
key
))
None
else
if
(
nonNumber
.
contains
(
key
))
v1
else
{
(
v1
,
v2
)
match
{
case
(
x1
:
Int
,
x2
:
Int
)
=>
x1
+
x2
case
_
=>
throw
new
IllegalStateException
(
s
"Value are not Int's, unable to sum them up, key: $key, v1: $v1, v2: $v2"
)
}
}
theMap
}
def
getHeadersFromStatsFile
(
contents
:
List
[
String
])
:
List
[
String
]
=
{
// block headers are of format '[block]'
contents
.
filter
(
_
.
startsWith
(
"["
)).
filter
(
_
.
endsWith
(
"]"
)).
map
(
_
.
stripPrefix
(
"["
)).
map
(
_
.
stripSuffix
(
"]"
))
}
def
parseStatsFile
(
file
:
File
)
:
Map
[
String
,
Any
]
=
{
val
reader
=
Source
.
fromFile
(
file
)
val
contents
=
reader
.
getLines
().
filter
(
_
!=
""
).
toArray
reader
.
close
()
def
isHeader
(
line
:
String
)
=
line
.
startsWith
(
"["
)
&&
line
.
endsWith
(
"]"
)
val
headers
=
contents
.
zipWithIndex
.
filter
(
x
=>
x
.
_1
.
startsWith
(
"["
)
&&
x
.
_1
.
endsWith
(
"]"
))
(
for
((
header
,
headerIndex
)
<-
headers
)
yield
{
val
name
=
header
.
stripPrefix
(
"["
).
stripSuffix
(
"]"
)
name
.
replaceAll
(
" "
,
"_"
)
->
(
contents
.
drop
(
headerIndex
+
1
).
takeWhile
(!
isHeader
(
_
)).
map
{
line
=>
val
values
=
line
.
split
(
"\t"
,
2
)
values
.
head
.
replaceAll
(
" "
,
"_"
)
->
tryToParseNumber
(
values
.
last
).
getOrElse
(
0
)
}.
toMap
)
}).
toMap
}
}
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala
View file @
d84fc9cc
...
...
@@ -2,11 +2,11 @@ package nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import
nl.lumc.sasc.biopet.core.
{
BiopetJavaCommandLineFunction
,
Reference
}
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Argument
,
Gather
,
Input
,
Output
}
class
CatVariants
(
val
root
:
Configurable
)
extends
BiopetJavaCommandLineFunction
{
class
CatVariants
(
val
root
:
Configurable
)
extends
BiopetJavaCommandLineFunction
with
Reference
{
analysisName
=
"CatVariants"
javaMainClass
=
"org.broadinstitute.gatk.tools.CatVariants"
...
...
@@ -44,6 +44,11 @@ class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction
@Gather
(
classOf
[
org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction
])
var
log_to_file
:
File
=
_
override
def
beforeGraph
()
=
{
super
.
beforeGraph
()
if
(
reference
==
null
)
reference
=
referenceFasta
()
}
override
def
cmdLine
=
super
.
cmdLine
+
required
(
"-R"
,
reference
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-V"
,
variant
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala
View file @
d84fc9cc
...
...
@@ -24,6 +24,7 @@ import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, Variant
import
htsjdk.variant.vcf._
import
nl.lumc.sasc.biopet.utils.ToolCommand
import
nl.lumc.sasc.biopet.utils.VcfUtils.scalaListToJavaObjectArrayList
import
nl.lumc.sasc.biopet.utils.BamUtils.SamDictCheck
import
scala.collection.JavaConversions._
...
...
@@ -89,14 +90,14 @@ object VcfWithVcf extends ToolCommand {
val
header
=
reader
.
getFileHeader
val
vcfDict
=
header
.
getSequenceDictionary
match
{
case
r
if
r
!=
null
=>
r
.
assertSameDictionary
(
referenceDict
)
r
.
assertSameDictionary
(
referenceDict
,
true
)
r
case
_
=>
referenceDict
}
val
secondHeader
=
secondaryReader
.
getFileHeader
secondHeader
.
getSequenceDictionary
match
{
case
r
if
r
!=
null
=>
r
.
assertSameDictionary
(
referenceDict
)
case
r
if
r
!=
null
=>
r
.
assertSameDictionary
(
referenceDict
,
true
)
case
_
=>
}
...
...
@@ -123,6 +124,7 @@ object VcfWithVcf extends ToolCommand {
var
counter
=
0
for
(
record
<-
reader
)
{
require
(
vcfDict
.
getSequence
(
record
.
getContig
)
!=
null
,
s
"Contig ${record.getContig} does not exist on reference"
)
val
secondaryRecords
=
getSecondaryRecords
(
secondaryReader
,
record
,
commandArgs
.
matchAllele
)
val
fieldMap
=
createFieldMap
(
commandArgs
.
fields
,
secondaryRecords
)
...
...
@@ -201,7 +203,6 @@ object VcfWithVcf extends ToolCommand {
}
case
FieldMethod
.
unique
=>
scalaListToJavaObjectArrayList
(
attribute
.
_2
.
distinct
)
case
_
=>
{
print
(
attribute
.
_2
.
getClass
.
toString
)
scalaListToJavaObjectArrayList
(
attribute
.
_2
)
}
})
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala
View file @
d84fc9cc
...
...
@@ -24,7 +24,6 @@ import htsjdk.variant.vcf._
import
nl.lumc.sasc.biopet.utils.ToolCommand
import
scala.collection.JavaConversions._
import
scala.collection.mutable.
{
Map
=>
MMap
}
/**
* This tool parses a VEP annotated VCF into a standard VCF file.
...
...
@@ -57,31 +56,37 @@ object VepNormalizer extends ToolCommand {
}
val
header
=
reader
.
getFileHeader
logger
.
debug
(
"Checking for CSQ tag"
)
csqCheck
(
header
)
logger
.
debug
(
"CSQ tag OK"
)
logger
.
debug
(
"Checkion VCF version"
)
versionCheck
(
header
)
logger
.
debug
(
"VCF version OK"
)
logger
.
debug
(
"Parsing header"
)
val
newInfos
=
parseCsq
(
header
)
header
.
setWriteCommandLine
(
true
)
val
writer
=
new
AsyncVariantContextWriter
(
new
VariantContextWriterBuilder
().
setOutputFile
(
output
).
setReferenceDictionary
(
header
.
getSequenceDictionary
)
build
())
for
(
info
<-
newInfos
)
{
val
tmpheaderline
=
new
VCFInfoHeaderLine
(
info
,
VCFHeaderLineCount
.
UNBOUNDED
,
VCFHeaderLineType
.
String
,
"A VEP annotation"
)
header
.
addMetaDataLine
(
tmpheaderline
)
}
logger
.
debug
(
"Header parsing done"
)
if
(
reader
.
iterator
().
hasNext
)
{
logger
.
debug
(
"Checking for CSQ tag"
)
csqCheck
(
header
)
logger
.
debug
(
"CSQ tag OK"
)
logger
.
debug
(
"Checkion VCF version"
)
versionCheck
(
header
)
logger
.
debug
(
"VCF version OK"
)
logger
.
debug
(
"Parsing header"
)
val
newInfos
=
parseCsq
(
header
)
header
.
setWriteCommandLine
(
true
)
for
(
info
<-
newInfos
)
{
val
tmpheaderline
=
new
VCFInfoHeaderLine
(
info
,
VCFHeaderLineCount
.
UNBOUNDED
,
VCFHeaderLineType
.
String
,
"A VEP annotation"
)
header
.
addMetaDataLine
(
tmpheaderline
)
}
logger
.
debug
(
"Header parsing done"
)
logger
.
debug
(
"Writing header to file"
)
logger
.
debug
(
"Writing header to file"
)
writer
.
writeHeader
(
header
)
logger
.
debug
(
"Wrote header to file"
)
writer
.
writeHeader
(
header
)
logger
.
debug
(
"Wrote header to file"
)
normalize
(
reader
,
writer
,
newInfos
,
commandArgs
.
mode
,
commandArgs
.
removeCSQ
)
normalize
(
reader
,
writer
,
newInfos
,
commandArgs
.
mode
,
commandArgs
.
removeCSQ
)
}
else
{
logger
.
debug
(
"No variants found, skipping normalize step"
)
writer
.
writeHeader
(
header
)
}
writer
.
close
()
logger
.
debug
(
"Closed writer"
)
reader
.
close
()
...
...
@@ -91,6 +96,7 @@ object VepNormalizer extends ToolCommand {
/**
* Normalizer
*
* @param reader input VCF VCFFileReader
* @param writer output VCF AsyncVariantContextWriter
* @param newInfos array of string containing names of new info fields
...
...
@@ -118,6 +124,7 @@ object VepNormalizer extends ToolCommand {
/**
* Checks whether header has a CSQ tag
*
* @param header VCF header
*/
def
csqCheck
(
header
:
VCFHeader
)
=
{
...
...
@@ -131,6 +138,7 @@ object VepNormalizer extends ToolCommand {
* Checks whether version of input VCF is at least 4.0
* VEP is known to cause issues below 4.0
* Throws exception if not
*
* @param header VCFHeader of input VCF
*/
def
versionCheck
(
header
:
VCFHeader
)
=
{
...
...
@@ -149,6 +157,7 @@ object VepNormalizer extends ToolCommand {
/**
* Parses the CSQ tag in the header
*
* @param header the VCF header
* @return list of strings with new info fields
*/
...
...
@@ -160,6 +169,7 @@ object VepNormalizer extends ToolCommand {
/**
* Explode a single VEP-annotated record to multiple normal records
* Based on the number of annotated transcripts in the CSQ tag
*
* @param record the record as a VariantContext object
* @param csqInfos An array with names of new info tags
* @return An array with the new records
...
...
biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala
View file @
d84fc9cc
...
...
@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.utils
import
java.io.File
import
htsjdk.samtools.
{
SamReader
,
SamReaderFactory
}
import
htsjdk.samtools.
{
SAMSequenceDictionary
,
SamReader
,
SamReaderFactory
}
import
nl.lumc.sasc.biopet.utils.intervals.
{
BedRecord
,
BedRecordList
}
import
scala.collection.JavaConversions._
...
...
@@ -129,11 +129,8 @@ object BamUtils {
val
counts
=
bamInsertSizes
.
flatMap
(
x
=>
x
)
// avoid division by zero
if
(
counts
.
size
!=
0
)
{
counts
.
sum
/
counts
.
size
}
else
{
0
}
if
(
counts
.
size
!=
0
)
counts
.
sum
/
counts
.
size
else
0
}
/**
...
...
@@ -146,4 +143,21 @@ object BamUtils {
bamFile
->
sampleBamInsertSize
(
bamFile
,
samplingSize
,
binSize
)
}.
toMap
/** This class will add functionality to [[SAMSequenceDictionary]] */
implicit
class
SamDictCheck
(
samDics
:
SAMSequenceDictionary
)
{
/**
* This method will check if all contig and sizes are the same without looking at the order of the contigs
*
* @throws AssertionError
* @param that Dict to compare to
* @param ignoreOrder When true the order of the contig does not matter
*/
def
assertSameDictionary
(
that
:
SAMSequenceDictionary
,
ignoreOrder
:
Boolean
)
:
Unit
=
{
if
(
ignoreOrder
)
{
assert
(
samDics
.
getReferenceLength
==
that
.
getReferenceLength
)
val
thisContigNames
=
samDics
.
getSequences
.
map
(
x
=>
(
x
.
getSequenceName
,
x
.
getSequenceLength
)).
sorted
.
toSet
assert
(
thisContigNames
==
that
.
getSequences
.
map
(
x
=>
(
x
.
getSequenceName
,
x
.
getSequenceLength
)).
sorted
.
toSet
)
}
else
samDics
.
assertSameDictionary
(
that
)
}
}
}
biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala
View file @
d84fc9cc
...
...
@@ -134,4 +134,11 @@ object VcfUtils {
else
if
(
name
.
endsWith
(
".vcf.gz"
))
new
File
(
name
+
".tbi"
)
else
throw
new
IllegalArgumentException
(
s
"File given is no vcf file: $vcfFile"
)
}
def
vcfFileIsEmpty
(
file
:
File
)
:
Boolean
=
{
val
reader
=
new
VCFFileReader
(
file
,
false
)
val
hasNext
=
reader
.
iterator
().
hasNext
reader
.
close
()
!
hasNext
}
}
shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala
View file @
d84fc9cc
...
...
@@ -178,7 +178,7 @@ class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait
annotation
.
foreach
{
toucan
=>
toucan
.
outputDir
=
new
File
(
outputDir
,
"annotation"
)
toucan
.
inputV
CF
=
vc
.
finalFile
toucan
.
inputV
cf
=
vc
.
finalFile
add
(
toucan
)
}
})
...
...
toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
View file @
d84fc9cc
...
...
@@ -15,15 +15,20 @@
*/
package
nl.lumc.sasc.biopet.pipelines.toucan
import
java.io.File
import
htsjdk.samtools.reference.FastaSequenceFile
import
nl.lumc.sasc.biopet.core._
import
nl.lumc.sasc.biopet.core.summary.SummaryQScript
import
nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView
import
nl.lumc.sasc.biopet.extensions.bedtools.
{
BedtoolsIntersect
,
BedtoolsMerge
}
import
nl.lumc.sasc.biopet.extensions.gatk.
{
CatVariants
,
SelectVariants
}
import
nl.lumc.sasc.biopet.extensions.manwe.
{
ManweAnnotateVcf
,
ManweSamplesImport
}
import
nl.lumc.sasc.biopet.extensions.tools.
{
GvcfToBed
,
VcfWithVcf
,
VepNormalizer
}
import
nl.lumc.sasc.biopet.extensions.
{
Bgzip
,
Ln
,
VariantEffectPredictor
}
import
nl.lumc.sasc.biopet.utils.VcfUtils
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import
org.broadinstitute.gatk.queue.QScript
/**
...
...
@@ -35,22 +40,43 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
def
this
()
=
this
(
null
)
@Input
(
doc
=
"Input VCF file"
,
shortName
=
"Input"
,
required
=
true
)
var
inputV
CF
:
File
=
_
var
inputV
cf
:
File
=
_
@Input
(
doc
=
"Input GVCF file"
,
shortName
=
"gvcf"
,
required
=
false
)
var
inputGvcf
:
Option
[
File
]
=
None
var
outputVcf
:
Option
[
File
]
=
None