Commit ade0c25c authored by Sander Bollen's avatar Sander Bollen
Browse files

summary for vep

parent 4fc971d4
......@@ -17,16 +17,19 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, Reference }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.io.Source
/**
* Extension for VariantEffectPredictor
* Created by ahbbollen on 15-1-15.
*/
class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version with Summarizable {
executable = config("exe", submodule = "perl", default = "perl")
var vepScript: String = config("vep_script")
......@@ -48,7 +51,7 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
var everything: Boolean = config("everything", default = false)
var force: Boolean = config("force", default = false)
var no_stats: Boolean = config("no_stats", default = false)
var stats_text: Boolean = config("stats_text", default = false)
var stats_text: Boolean = config("stats_text", default = true)
var html: Boolean = config("html", default = false)
var cache: Boolean = config("cache", default = false)
var humdiv: Boolean = config("humdiv", default = false)
......@@ -254,4 +257,48 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
optional("--buffer_size", buffer_size) +
optional("--failed", failed)
def summaryFiles: Map[String, File] = Map()
def summaryStats: Map[String, Any] = {
if (stats_text) {
val stats_file: File = new File(output.getAbsolutePath + "_summary.txt")
parseStatsFile(stats_file)
} else {
Map()
}
}
def parseStatsFile(file: File): Map[String, Any] = {
val contents = Source.fromFile(file).getLines().toList
val headers = getHeadersFromStatsFile(contents)
headers.foldLeft(Map[String, Any]())((acc, x) => getBlockFromStatsFile(contents, x))
}
def getBlockFromStatsFile(contents: List[String], header: String): Map[String, Any] = {
var inBlock = false
var theMap: Map[String, Any] = Map()
for (x <- contents) {
val stripped = x.stripPrefix("[").stripSuffix("]")
if (stripped == header) {
inBlock = true
}
if (stripped == "") {
inBlock = false
}
if (inBlock) {
val key = stripped.split('\t').head.replace(" ", "_")
val value = stripped.split('\t').last
theMap ++= Map(key -> value)
}
}
theMap
}
def getHeadersFromStatsFile(contents: List[String]): List[String] = {
// block headers are of format '[block]'
contents.filter(_.startsWith("[")).filter(_.endsWith("]")).map(_.stripPrefix("[")).map(_.stripSuffix("]"))
}
}
......@@ -89,10 +89,10 @@ object SeqStat extends ToolCommand {
(qual_low_boundery < 59, qual_high_boundery > 74) match {
case (false, true) => phredEncoding = Solexa
// TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa
// but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
// New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K)
// TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa
// but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
// New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K)
case (true, true) => phredEncoding = Solexa
// this is definite a sanger sequence, the lower end is sanger only
case (true, false) => phredEncoding = Sanger
......@@ -181,7 +181,7 @@ object SeqStat extends ToolCommand {
quals ++= mutable.ArrayBuffer.fill(baseStats(pos).qual.length - quals.length)(0)
}
if (nucs.length <= baseStats(pos).nucs.length) {
nucs ++= mutable.ArrayBuffer.fill( baseStats(pos).nucs.length - nucs.length )(0)
nucs ++= mutable.ArrayBuffer.fill(baseStats(pos).nucs.length - nucs.length)(0)
}
// count into the quals
baseStats(pos).qual.zipWithIndex foreach { case (value, index) => quals(index) += value }
......
......@@ -68,6 +68,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
vep.output = new File(outputDir, inputVCF.getName.stripSuffix(".gz").stripSuffix(".vcf") + ".vep.vcf")
vep.isIntermediate = true
add(vep)
addSummarizable(vep, "variant_effect_predictor")
val normalizer = new VepNormalizer(this)
normalizer.inputVCF = vep.output
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment