Commit d1017747 authored by akaljuvee's avatar akaljuvee

plots showing size distribution for different sv types

parent 144ccd5d
......@@ -51,9 +51,12 @@ trait ShivaReportTrait extends MultisampleMappingReportTrait {
override def pipelineName = "shiva"
override def additionalSections = super.additionalSections ++ (if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp",
Map("showPlot" -> true, "showTable" -> false)))
else Nil)
override def additionalSections = {
val params = Map("showPlot" -> true, "showTable" -> false)
super.additionalSections ++
(if (variantcallingExecuted) List("SNV Calling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp", params)) else Nil) ++
(if (svCallingExecuted) List("SV Calling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariantsSv.ssp", params)) else Nil)
}
/** Root page for the shiva report */
override def indexPage = {
......
......@@ -14,8 +14,10 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva
import java.io.File
import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.Pysvtools
import nl.lumc.sasc.biopet.pipelines.shiva.svcallers._
......@@ -97,6 +99,18 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
// group by "tags"
// sample tagging is however not available within this pipeline
for ((sample, mergedResultFile) <- outputMergedVCFbySample) {
lazy val counts = getVariantCounts(mergedResultFile, ShivaSvCalling.histogramBinBoundaries)
addSummarizable(new Summarizable {
def summaryFiles = Map.empty
def summaryStats = counts
}, "variantsBySizeAndType", Some(sample))
}
addSummarizable(new Summarizable {
def summaryFiles = Map.empty
def summaryStats = ShivaSvCalling.histogramBinBoundaries
}, "histBreaksForCounts")
addSummaryJobs()
}
......@@ -112,7 +126,7 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
/** Files for the summary */
def summaryFiles: Map[String, File] = outputMergedVCFbySample ++ (if (inputBams.size > 1) Map("final_mergedvcf" -> outputMergedVCF) else Nil)
def getVariantCounts(vcfFile: File, breaks: Array[Int]): (Map[String, Array[Int]], Option[Int]) = {
def getVariantCounts(vcfFile: File, breaks: Array[Int]): Map[String, Any] = {
val delCounts, insCounts, dupCounts, invCounts = Array.fill(breaks.size + 1) { 0 }
var traCount = 0
......@@ -137,13 +151,13 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
}
iterator.close()
var counts: Map[String, Array[Int]] = Map()
var counts: Map[String, Any] = Map()
if (isConfiguredForType("DEL")) counts = Map("DEL" -> delCounts)
if (isConfiguredForType("INS")) counts = counts + ("INS" -> insCounts)
if (isConfiguredForType("DUP")) counts = counts + ("DUP" -> dupCounts)
if (isConfiguredForType("INV")) counts = counts + ("INV" -> invCounts)
(counts, if (isConfiguredForType("TRA")) Some(traCount) else None)
if (isConfiguredForType("TRA")) counts = counts + ("TRA" -> traCount)
counts
}
def isConfiguredForType(svType: String): Boolean = {
......@@ -152,7 +166,9 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
}
object ShivaSvCalling extends PipelineCommand
object ShivaSvCalling extends PipelineCommand {
val histogramBinBoundaries: Array[Int] = Array(100, 1000, 10000, 100000, 1000000, 10000000)
}
object StructuralVariantType extends Enumeration {
val Deletion = Value("DEL")
......
package nl.lumc.sasc.biopet.pipelines.shiva
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.utils.rscript.LinePlot
import nl.lumc.sasc.biopet.utils.summary.Summary
object ShivaSvCallingReport {
def parseSummaryForSvCounts(summary: Summary): Map[String, Map[String, Array[Any]]] = {
var result: Map[String, Map[String, Array[Any]]] = Map()
for (sampleName <- summary.samples) {
var sampleCounts: Map[String, Any] = summary.getSampleValue(sampleName, "shivasvcalling", "stats", "variantsBySizeAndType").get.asInstanceOf[Map[String, Any]]
result = result + (sampleName -> sampleCounts.collect({ case (k, v: List[_]) => (k, v.toArray[Any]) }))
}
result
}
def writeTsvForPlots(counts: Map[String, Map[String, Array[Any]]], svTypes: List[SvTypeForReport], outDir: File): Unit = {
val sampleNames = counts.keys
for (sv <- svTypes) {
val tsvFile = new File(outDir, sv.tsvFileName)
val tsvWriter = new PrintWriter(tsvFile)
tsvWriter.print("binMax")
sampleNames.foreach(sampleName => tsvWriter.print("\t" + sampleName))
tsvWriter.println()
for (i <- ShivaSvCalling.histogramBinBoundaries.indices) {
tsvWriter.print(ShivaSvCalling.histogramBinBoundaries(i))
sampleNames.foreach(sampleName => tsvWriter.print("\t" + counts.get(sampleName).get.get(sv.svType).get(i)))
tsvWriter.println()
}
val i = ShivaSvCalling.histogramBinBoundaries.length
tsvWriter.print(ShivaSvCalling.histogramBinBoundaries(i - 1) * 10)
sampleNames.foreach(sampleName => tsvWriter.print("\t" + counts.get(sampleName).get.get(sv.svType).get(i)))
tsvWriter.println()
tsvWriter.close()
}
}
def createPlots(svTypes: List[SvTypeForReport], outDir: File): Unit = {
for (sv <- svTypes) {
val tsvFile = new File(outDir, sv.tsvFileName)
val pngFile: File = new File(outDir, sv.pngFileName)
val plot = LinePlot(tsvFile, pngFile,
xlabel = Some(s"${sv.displayText} size"),
ylabel = Some("Number of loci"),
title = Some(sv.displayText + "s"),
width = 600,
removeZero = false)
plot.llabel = Some("Sample")
plot.xLog10 = true
plot.yLog10 = true
plot.runLocal()
}
}
}
case class SvTypeForReport(svType: String, displayText: String, tsvFileName: String, pngFileName: String)
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment