Commit 54aeb54f authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'feature-toucan_summary' into 'develop'

Feature toucan summary



See merge request !338
parents 00034b55 354d418f
......@@ -93,12 +93,12 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
if (outputDir.getParentFile.canWrite || (outputDir.exists && outputDir.canWrite))
globalConfig.writeReport(qSettings.runName, new File(outputDir, ".log/" + qSettings.runName))
else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, outputdir can not be created")
else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, output directory cannot be created")
inputFiles.foreach { i =>
if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}")
if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
if (!i.file.isAbsolute) Logging.addError(s"Input file should be an absulute path: ${i.file}")
if (!i.file.isAbsolute) Logging.addError(s"Input file should be an absolute path: ${i.file}")
}
functions.filter(_.jobOutputFile == null).foreach(f => {
......
......@@ -17,16 +17,20 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, Reference }
import nl.lumc.sasc.biopet.utils.tryToParseNumber
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.io.Source
/**
* Extension for VariantEffectPredictor
* Created by ahbbollen on 15-1-15.
*/
class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version with Summarizable {
executable = config("exe", submodule = "perl", default = "perl")
var vepScript: String = config("vep_script")
......@@ -48,7 +52,7 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
var everything: Boolean = config("everything", default = false)
var force: Boolean = config("force", default = false)
var no_stats: Boolean = config("no_stats", default = false)
var stats_text: Boolean = config("stats_text", default = false)
var stats_text: Boolean = config("stats_text", default = true)
var html: Boolean = config("html", default = false)
var cache: Boolean = config("cache", default = false)
var humdiv: Boolean = config("humdiv", default = false)
......@@ -254,4 +258,47 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
optional("--buffer_size", buffer_size) +
optional("--failed", failed)
def summaryFiles: Map[String, File] = Map()
def summaryStats: Map[String, Any] = {
if (stats_text) {
val stats_file: File = new File(output.getAbsolutePath + "_summary.txt")
parseStatsFile(stats_file)
} else {
Map()
}
}
def parseStatsFile(file: File): Map[String, Any] = {
val contents = Source.fromFile(file).getLines().toList
val headers = getHeadersFromStatsFile(contents)
headers.foldLeft(Map.empty[String, Any])((acc, x) => acc + (x.replace(" ", "_") -> getBlockFromStatsFile(contents, x)))
}
def getBlockFromStatsFile(contents: List[String], header: String): Map[String, Any] = {
var inBlock = false
var theMap: Map[String, Any] = Map()
for (x <- contents) {
val stripped = x.stripPrefix("[").stripSuffix("]")
if (stripped == header) {
inBlock = true
} else {
if (inBlock) {
val key = stripped.split('\t').head.replace(" ", "_")
val value = stripped.split('\t').last
theMap ++= Map(key -> tryToParseNumber(value, fallBack = true).getOrElse(value))
}
}
if (stripped == "") {
inBlock = false
}
}
theMap
}
def getHeadersFromStatsFile(contents: List[String]): List[String] = {
// block headers are of format '[block]'
contents.filter(_.startsWith("[")).filter(_.endsWith("]")).map(_.stripPrefix("[")).map(_.stripSuffix("]"))
}
}
[VEP run statistics]
VEP version (API) 77 (77)
Cache/Database /usr/local/Genomes/H.Sapiens/hg19/vep_cache_75/homo_sapiens/75
Species homo_sapiens
Command line options -i test.vcf -o test.vep.vcf -v --everything --stats_text --cache --vcf --allow_non_variant --species homo_sapiens --dir /usr/local/Genomes/H.Sapiens/hg19/vep_cache_75 --fasta /usr/local/Genomes/H.Sapiens/hg19/vep_cache_75/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa --fork 2 --cache_version 75 --db_version 75 --failed 1 --offline
Start time 2016-03-03 14:37:44
End time 2016-03-03 14:37:54
Run time 10 seconds
Input file (format) test.vcf (VCF)
Output file test.vep.vcf [text]
[General statistics]
Lines of input read 1000
Variants processed 874
Variants remaining after filtering 874
Lines of output written 874
Novel / existing variants 105 (12.0%) / 769 (88.0%)
Overlapped genes 132
Overlapped transcripts 661
Overlapped regulatory features 164
[Variant classes]
insertion 29
deletion 51
SNV 794
[Consequences (most severe)]
splice_donor_variant 3
frameshift_variant 1
inframe_insertion 2
inframe_deletion 1
missense_variant 39
splice_region_variant 11
synonymous_variant 46
5_prime_UTR_variant 19
3_prime_UTR_variant 36
non_coding_transcript_exon_variant 95
intron_variant 518
upstream_gene_variant 48
downstream_gene_variant 27
intergenic_variant 28
[Consequences (all)]
splice_donor_variant 6
frameshift_variant 2
inframe_insertion 15
inframe_deletion 6
missense_variant 101
splice_region_variant 60
synonymous_variant 173
coding_sequence_variant 1
5_prime_UTR_variant 33
3_prime_UTR_variant 108
non_coding_transcript_exon_variant 279
intron_variant 3311
NMD_transcript_variant 437
non_coding_transcript_variant 1102
upstream_gene_variant 1486
downstream_gene_variant 1779
TF_binding_site_variant 12
feature_elongation 97
regulatory_region_variant 253
feature_truncation 278
intergenic_variant 28
[Coding consequences]
frameshift_variant 2
inframe_insertion 15
inframe_deletion 6
missense_variant 101
synonymous_variant 173
coding_sequence_variant 1
[SIFT summary]
deleterious 22
tolerated 76
[PolyPhen summary]
possibly damaging 7
unknown 12
probably damaging 12
benign 70
[Variants by chromosome]
1 874
[Distribution of variants on chromosome 1]
0 225
1 407
2 242
3 0
4 0
5 0
6 0
7 0
8 0
9 0
10 0
11 0
12 0
13 0
14 0
15 0
16 0
17 0
18 0
19 0
20 0
21 0
22 0
23 0
24 0
25 0
26 0
27 0
28 0
29 0
30 0
31 0
32 0
33 0
34 0
35 0
36 0
37 0
38 0
39 0
40 0
41 0
42 0
43 0
44 0
45 0
46 0
47 0
48 0
49 0
50 0
51 0
52 0
53 0
54 0
55 0
56 0
57 0
58 0
59 0
60 0
61 0
62 0
63 0
64 0
65 0
66 0
67 0
68 0
69 0
70 0
71 0
72 0
73 0
74 0
75 0
76 0
77 0
78 0
79 0
80 0
81 0
82 0
83 0
84 0
85 0
86 0
87 0
88 0
89 0
90 0
91 0
92 0
93 0
94 0
95 0
96 0
97 0
98 0
99 0
100 0
101 0
102 0
103 0
104 0
105 0
106 0
107 0
108 0
109 0
110 0
111 0
112 0
113 0
114 0
115 0
116 0
117 0
118 0
119 0
120 0
121 0
122 0
123 0
124 0
125 0
126 0
127 0
128 0
129 0
130 0
131 0
132 0
133 0
134 0
135 0
136 0
137 0
138 0
139 0
140 0
141 0
142 0
143 0
144 0
145 0
146 0
147 0
148 0
149 0
150 0
151 0
152 0
153 0
154 0
155 0
156 0
157 0
158 0
159 0
160 0
161 0
162 0
163 0
164 0
165 0
166 0
167 0
168 0
169 0
170 0
171 0
172 0
173 0
174 0
175 0
176 0
177 0
178 0
179 0
180 0
181 0
182 0
183 0
184 0
185 0
186 0
187 0
188 0
189 0
190 0
191 0
192 0
193 0
194 0
195 0
196 0
197 0
198 0
199 0
200 0
201 0
202 0
203 0
204 0
205 0
206 0
207 0
208 0
209 0
210 0
211 0
212 0
213 0
214 0
215 0
216 0
217 0
218 0
219 0
220 0
221 0
222 0
223 0
224 0
225 0
226 0
227 0
228 0
229 0
230 0
231 0
232 0
233 0
234 0
235 0
236 0
237 0
238 0
239 0
240 0
241 0
242 0
243 0
244 0
245 0
246 0
247 0
248 0
249 0
[Position in protein]
00-10% 29
10-20% 63
20-30% 34
30-40% 20
40-50% 23
50-60% 25
60-70% 31
70-80% 11
80-90% 32
90-100% 30
package nl.lumc.sasc.biopet.extensions
import java.io.File
import java.nio.file.Paths
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by ahbbollen on 3-3-16.
*/
class VariantEffectPredictorTest extends TestNGSuite with Matchers {
@Test
def testSummaryStats = {
val file = new File(Paths.get(getClass.getResource("/vep.metrics").toURI).toString)
val vep = new VariantEffectPredictor(null)
val stats = vep.parseStatsFile(file)
stats.contains("VEP_run_statistics") shouldBe true
stats.contains("General_statistics") shouldBe true
stats.contains("Consequences_(most_severe)") shouldBe true
stats.contains("Consequences_(all)") shouldBe true
stats.contains("Coding_consequences") shouldBe true
stats.contains("SIFT_summary") shouldBe true
stats.contains("PolyPhen_summary") shouldBe true
stats.contains("Variants_by_chromosome") shouldBe true
stats.contains("Distribution_of_variants_on_chromosome_1") shouldBe true
stats.contains("Position_in_protein") shouldBe true
}
}
......@@ -68,6 +68,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
vep.output = new File(outputDir, inputVCF.getName.stripSuffix(".gz").stripSuffix(".vcf") + ".vep.vcf")
vep.isIntermediate = true
add(vep)
addSummarizable(vep, "variant_effect_predictor")
val normalizer = new VepNormalizer(this)
normalizer.inputVCF = vep.output
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment