Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
54aeb54f
Commit
54aeb54f
authored
Mar 10, 2016
by
Peter van 't Hof
Browse files
Merge branch 'feature-toucan_summary' into 'develop'
Feature toucan summary See merge request !338
parents
00034b55
354d418f
Changes
5
Hide whitespace changes
Inline
Side-by-side
public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala
View file @
54aeb54f
...
...
@@ -93,12 +93,12 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
if
(
outputDir
.
getParentFile
.
canWrite
||
(
outputDir
.
exists
&&
outputDir
.
canWrite
))
globalConfig
.
writeReport
(
qSettings
.
runName
,
new
File
(
outputDir
,
".log/"
+
qSettings
.
runName
))
else
Logging
.
addError
(
"Parent of output dir: '"
+
outputDir
.
getParent
+
"' is not writeable, outputdir can
not be created"
)
else
Logging
.
addError
(
"Parent of output dir: '"
+
outputDir
.
getParent
+
"' is not writeable, output
dir
ectory
cannot be created"
)
inputFiles
.
foreach
{
i
=>
if
(!
i
.
file
.
exists
())
Logging
.
addError
(
s
"Input file does not exist: ${i.file}"
)
if
(!
i
.
file
.
canRead
)
Logging
.
addError
(
s
"Input file can not be read: ${i.file}"
)
if
(!
i
.
file
.
isAbsolute
)
Logging
.
addError
(
s
"Input file should be an abs
u
lute path: ${i.file}"
)
if
(!
i
.
file
.
isAbsolute
)
Logging
.
addError
(
s
"Input file should be an abs
o
lute path: ${i.file}"
)
}
functions
.
filter
(
_
.
jobOutputFile
==
null
).
foreach
(
f
=>
{
...
...
public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala
View file @
54aeb54f
...
...
@@ -17,16 +17,20 @@ package nl.lumc.sasc.biopet.extensions
import
java.io.File
import
nl.lumc.sasc.biopet.core.summary.Summarizable
import
nl.lumc.sasc.biopet.utils.Logging
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.core.
{
Version
,
BiopetCommandLineFunction
,
Reference
}
import
nl.lumc.sasc.biopet.utils.tryToParseNumber
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
scala.io.Source
/**
* Extension for VariantEffectPredictor
* Created by ahbbollen on 15-1-15.
*/
class
VariantEffectPredictor
(
val
root
:
Configurable
)
extends
BiopetCommandLineFunction
with
Reference
with
Version
{
class
VariantEffectPredictor
(
val
root
:
Configurable
)
extends
BiopetCommandLineFunction
with
Reference
with
Version
with
Summarizable
{
executable
=
config
(
"exe"
,
submodule
=
"perl"
,
default
=
"perl"
)
var
vepScript
:
String
=
config
(
"vep_script"
)
...
...
@@ -48,7 +52,7 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
var
everything
:
Boolean
=
config
(
"everything"
,
default
=
false
)
var
force
:
Boolean
=
config
(
"force"
,
default
=
false
)
var
no_stats
:
Boolean
=
config
(
"no_stats"
,
default
=
false
)
var
stats_text
:
Boolean
=
config
(
"stats_text"
,
default
=
fals
e
)
var
stats_text
:
Boolean
=
config
(
"stats_text"
,
default
=
tru
e
)
var
html
:
Boolean
=
config
(
"html"
,
default
=
false
)
var
cache
:
Boolean
=
config
(
"cache"
,
default
=
false
)
var
humdiv
:
Boolean
=
config
(
"humdiv"
,
default
=
false
)
...
...
@@ -254,4 +258,47 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
optional
(
"--buffer_size"
,
buffer_size
)
+
optional
(
"--failed"
,
failed
)
def
summaryFiles
:
Map
[
String
,
File
]
=
Map
()
def
summaryStats
:
Map
[
String
,
Any
]
=
{
if
(
stats_text
)
{
val
stats_file
:
File
=
new
File
(
output
.
getAbsolutePath
+
"_summary.txt"
)
parseStatsFile
(
stats_file
)
}
else
{
Map
()
}
}
def
parseStatsFile
(
file
:
File
)
:
Map
[
String
,
Any
]
=
{
val
contents
=
Source
.
fromFile
(
file
).
getLines
().
toList
val
headers
=
getHeadersFromStatsFile
(
contents
)
headers
.
foldLeft
(
Map
.
empty
[
String
,
Any
])((
acc
,
x
)
=>
acc
+
(
x
.
replace
(
" "
,
"_"
)
->
getBlockFromStatsFile
(
contents
,
x
)))
}
def
getBlockFromStatsFile
(
contents
:
List
[
String
],
header
:
String
)
:
Map
[
String
,
Any
]
=
{
var
inBlock
=
false
var
theMap
:
Map
[
String
,
Any
]
=
Map
()
for
(
x
<-
contents
)
{
val
stripped
=
x
.
stripPrefix
(
"["
).
stripSuffix
(
"]"
)
if
(
stripped
==
header
)
{
inBlock
=
true
}
else
{
if
(
inBlock
)
{
val
key
=
stripped
.
split
(
'\t'
).
head
.
replace
(
" "
,
"_"
)
val
value
=
stripped
.
split
(
'\t'
).
last
theMap
++=
Map
(
key
->
tryToParseNumber
(
value
,
fallBack
=
true
).
getOrElse
(
value
))
}
}
if
(
stripped
==
""
)
{
inBlock
=
false
}
}
theMap
}
def
getHeadersFromStatsFile
(
contents
:
List
[
String
])
:
List
[
String
]
=
{
// block headers are of format '[block]'
contents
.
filter
(
_
.
startsWith
(
"["
)).
filter
(
_
.
endsWith
(
"]"
)).
map
(
_
.
stripPrefix
(
"["
)).
map
(
_
.
stripSuffix
(
"]"
))
}
}
public/biopet-extensions/src/test/resources/vep.metrics
0 → 100644
View file @
54aeb54f
[VEP run statistics]
VEP version (API) 77 (77)
Cache/Database /usr/local/Genomes/H.Sapiens/hg19/vep_cache_75/homo_sapiens/75
Species homo_sapiens
Command line options -i test.vcf -o test.vep.vcf -v --everything --stats_text --cache --vcf --allow_non_variant --species homo_sapiens --dir /usr/local/Genomes/H.Sapiens/hg19/vep_cache_75 --fasta /usr/local/Genomes/H.Sapiens/hg19/vep_cache_75/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa --fork 2 --cache_version 75 --db_version 75 --failed 1 --offline
Start time 2016-03-03 14:37:44
End time 2016-03-03 14:37:54
Run time 10 seconds
Input file (format) test.vcf (VCF)
Output file test.vep.vcf [text]
[General statistics]
Lines of input read 1000
Variants processed 874
Variants remaining after filtering 874
Lines of output written 874
Novel / existing variants 105 (12.0%) / 769 (88.0%)
Overlapped genes 132
Overlapped transcripts 661
Overlapped regulatory features 164
[Variant classes]
insertion 29
deletion 51
SNV 794
[Consequences (most severe)]
splice_donor_variant 3
frameshift_variant 1
inframe_insertion 2
inframe_deletion 1
missense_variant 39
splice_region_variant 11
synonymous_variant 46
5_prime_UTR_variant 19
3_prime_UTR_variant 36
non_coding_transcript_exon_variant 95
intron_variant 518
upstream_gene_variant 48
downstream_gene_variant 27
intergenic_variant 28
[Consequences (all)]
splice_donor_variant 6
frameshift_variant 2
inframe_insertion 15
inframe_deletion 6
missense_variant 101
splice_region_variant 60
synonymous_variant 173
coding_sequence_variant 1
5_prime_UTR_variant 33
3_prime_UTR_variant 108
non_coding_transcript_exon_variant 279
intron_variant 3311
NMD_transcript_variant 437
non_coding_transcript_variant 1102
upstream_gene_variant 1486
downstream_gene_variant 1779
TF_binding_site_variant 12
feature_elongation 97
regulatory_region_variant 253
feature_truncation 278
intergenic_variant 28
[Coding consequences]
frameshift_variant 2
inframe_insertion 15
inframe_deletion 6
missense_variant 101
synonymous_variant 173
coding_sequence_variant 1
[SIFT summary]
deleterious 22
tolerated 76
[PolyPhen summary]
possibly damaging 7
unknown 12
probably damaging 12
benign 70
[Variants by chromosome]
1 874
[Distribution of variants on chromosome 1]
0 225
1 407
2 242
3 0
4 0
5 0
6 0
7 0
8 0
9 0
10 0
11 0
12 0
13 0
14 0
15 0
16 0
17 0
18 0
19 0
20 0
21 0
22 0
23 0
24 0
25 0
26 0
27 0
28 0
29 0
30 0
31 0
32 0
33 0
34 0
35 0
36 0
37 0
38 0
39 0
40 0
41 0
42 0
43 0
44 0
45 0
46 0
47 0
48 0
49 0
50 0
51 0
52 0
53 0
54 0
55 0
56 0
57 0
58 0
59 0
60 0
61 0
62 0
63 0
64 0
65 0
66 0
67 0
68 0
69 0
70 0
71 0
72 0
73 0
74 0
75 0
76 0
77 0
78 0
79 0
80 0
81 0
82 0
83 0
84 0
85 0
86 0
87 0
88 0
89 0
90 0
91 0
92 0
93 0
94 0
95 0
96 0
97 0
98 0
99 0
100 0
101 0
102 0
103 0
104 0
105 0
106 0
107 0
108 0
109 0
110 0
111 0
112 0
113 0
114 0
115 0
116 0
117 0
118 0
119 0
120 0
121 0
122 0
123 0
124 0
125 0
126 0
127 0
128 0
129 0
130 0
131 0
132 0
133 0
134 0
135 0
136 0
137 0
138 0
139 0
140 0
141 0
142 0
143 0
144 0
145 0
146 0
147 0
148 0
149 0
150 0
151 0
152 0
153 0
154 0
155 0
156 0
157 0
158 0
159 0
160 0
161 0
162 0
163 0
164 0
165 0
166 0
167 0
168 0
169 0
170 0
171 0
172 0
173 0
174 0
175 0
176 0
177 0
178 0
179 0
180 0
181 0
182 0
183 0
184 0
185 0
186 0
187 0
188 0
189 0
190 0
191 0
192 0
193 0
194 0
195 0
196 0
197 0
198 0
199 0
200 0
201 0
202 0
203 0
204 0
205 0
206 0
207 0
208 0
209 0
210 0
211 0
212 0
213 0
214 0
215 0
216 0
217 0
218 0
219 0
220 0
221 0
222 0
223 0
224 0
225 0
226 0
227 0
228 0
229 0
230 0
231 0
232 0
233 0
234 0
235 0
236 0
237 0
238 0
239 0
240 0
241 0
242 0
243 0
244 0
245 0
246 0
247 0
248 0
249 0
[Position in protein]
00-10% 29
10-20% 63
20-30% 34
30-40% 20
40-50% 23
50-60% 25
60-70% 31
70-80% 11
80-90% 32
90-100% 30
public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictorTest.scala
0 → 100644
View file @
54aeb54f
package
nl.lumc.sasc.biopet.extensions
import
java.io.File
import
java.nio.file.Paths
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
/**
* Created by ahbbollen on 3-3-16.
*/
class
VariantEffectPredictorTest
extends
TestNGSuite
with
Matchers
{
@Test
def
testSummaryStats
=
{
val
file
=
new
File
(
Paths
.
get
(
getClass
.
getResource
(
"/vep.metrics"
).
toURI
).
toString
)
val
vep
=
new
VariantEffectPredictor
(
null
)
val
stats
=
vep
.
parseStatsFile
(
file
)
stats
.
contains
(
"VEP_run_statistics"
)
shouldBe
true
stats
.
contains
(
"General_statistics"
)
shouldBe
true
stats
.
contains
(
"Consequences_(most_severe)"
)
shouldBe
true
stats
.
contains
(
"Consequences_(all)"
)
shouldBe
true
stats
.
contains
(
"Coding_consequences"
)
shouldBe
true
stats
.
contains
(
"SIFT_summary"
)
shouldBe
true
stats
.
contains
(
"PolyPhen_summary"
)
shouldBe
true
stats
.
contains
(
"Variants_by_chromosome"
)
shouldBe
true
stats
.
contains
(
"Distribution_of_variants_on_chromosome_1"
)
shouldBe
true
stats
.
contains
(
"Position_in_protein"
)
shouldBe
true
}
}
public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
View file @
54aeb54f
...
...
@@ -68,6 +68,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
vep
.
output
=
new
File
(
outputDir
,
inputVCF
.
getName
.
stripSuffix
(
".gz"
).
stripSuffix
(
".vcf"
)
+
".vep.vcf"
)
vep
.
isIntermediate
=
true
add
(
vep
)
addSummarizable
(
vep
,
"variant_effect_predictor"
)
val
normalizer
=
new
VepNormalizer
(
this
)
normalizer
.
inputVCF
=
vep
.
output
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment