Commit 83af6688 authored by akaljuvee's avatar akaljuvee

changes Bowo and Peter suggested

parent ec7fe052
...@@ -18,11 +18,14 @@ import htsjdk.variant.vcf.VCFFileReader ...@@ -18,11 +18,14 @@ import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript } import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference } import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
import nl.lumc.sasc.biopet.extensions.Pysvtools import nl.lumc.sasc.biopet.extensions.Pysvtools
import nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCallingReport.histogramBinBoundaries
import nl.lumc.sasc.biopet.pipelines.shiva.svcallers._ import nl.lumc.sasc.biopet.pipelines.shiva.svcallers._
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
import scala.collection.JavaConversions._
/** /**
* Common trait for ShivaVariantcalling * Common trait for ShivaVariantcalling
* *
...@@ -98,7 +101,7 @@ class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScri ...@@ -98,7 +101,7 @@ class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScri
// sample tagging is however not available within this pipeline // sample tagging is however not available within this pipeline
for ((sample, mergedResultFile) <- outputMergedVCFbySample) { for ((sample, mergedResultFile) <- outputMergedVCFbySample) {
lazy val counts = getVariantCounts(mergedResultFile, ShivaSvCallingReport.histogramBinBoundaries) lazy val counts = getVariantCounts(mergedResultFile)
addSummarizable(new Summarizable { addSummarizable(new Summarizable {
def summaryFiles = Map("output_vcf" -> mergedResultFile) def summaryFiles = Map("output_vcf" -> mergedResultFile)
def summaryStats = counts def summaryStats = counts
...@@ -112,35 +115,35 @@ class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScri ...@@ -112,35 +115,35 @@ class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScri
protected def callersList: List[SvCaller] = List(new Breakdancer(this), new Clever(this), new Delly(this), new Pindel(this)) protected def callersList: List[SvCaller] = List(new Breakdancer(this), new Clever(this), new Delly(this), new Pindel(this))
/** Settings for the summary */ /** Settings for the summary */
def summarySettings = Map("sv_callers" -> configCallers.toList, "hist_bin_boundaries" -> ShivaSvCallingReport.histogramBinBoundaries) def summarySettings = Map("sv_callers" -> configCallers.toList, "hist_bin_boundaries" -> histogramBinBoundaries)
/** Files for the summary */ /** Files for the summary */
def summaryFiles: Map[String, File] = if (inputBams.size > 1) Map("final_mergedvcf" -> outputMergedVCF) else Map.empty def summaryFiles: Map[String, File] = if (inputBams.size > 1) Map("final_mergedvcf" -> outputMergedVCF) else Map.empty
def getVariantCounts(vcfFile: File, breaks: Array[Int]): Map[String, Any] = { /** Parses a vcf-file and counts sv-s by type and size. Sv-s are divided to different size classes, the boundaries between these classes are those given in ShivaSvCallingReport.histogramBinBoundaries. */
val delCounts, insCounts, dupCounts, invCounts = Array.fill(breaks.size + 1) { 0 } def getVariantCounts(vcfFile: File): Map[String, Any] = {
val delCounts, insCounts, dupCounts, invCounts = Array.fill(histogramBinBoundaries.size + 1) { 0 }
var traCount = 0 var traCount = 0
val iterator = new VCFFileReader(vcfFile, false).iterator val reader = new VCFFileReader(vcfFile, false)
while (iterator.hasNext) { for (record <- reader) {
val record = iterator.next record.getAttributeAsString("SVTYPE", "") match {
val svType = record.getAttributeAsString("SVTYPE", "") case "TRA" | "CTX" | "ITX" => traCount += 1
if (svType == "TRA" || svType == "CTX" || svType == "ITX") { case svType => {
traCount += 1 val size = record.getEnd - record.getStart
} else { var i = 0
val size = record.getEnd - record.getStart while (i < histogramBinBoundaries.size && size > histogramBinBoundaries(i)) i += 1
var i = 0 svType match {
while (i < breaks.size && size > breaks(i)) i += 1 case "DEL" => delCounts(i) += 1
svType match { case "INS" => insCounts(i) += 1
case "DEL" => delCounts(i) += 1 case "DUP" => dupCounts(i) += 1
case "INS" => insCounts(i) += 1 case "INV" => invCounts(i) += 1
case "DUP" => dupCounts(i) += 1 case _ => logger.warn(s"Vcf file contains a record of unknown type: file-$vcfFile, type-$svType")
case "INV" => invCounts(i) += 1 }
case _ => logger.warn(s"Vcf file contains a record of unknown type: file-$vcfFile, type-$svType")
} }
} }
} }
iterator.close() reader.close()
Map("DEL" -> delCounts, "INS" -> insCounts, "DUP" -> dupCounts, "INV" -> invCounts, "TRA" -> traCount) Map("DEL" -> delCounts, "INS" -> insCounts, "DUP" -> dupCounts, "INV" -> invCounts, "TRA" -> traCount)
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment