ShivaSvCallingReport.scala 4.35 KB
Newer Older
1
2
3
4
5
6
7
8
9
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.{ File, PrintWriter }

import nl.lumc.sasc.biopet.utils.rscript.LinePlot
import nl.lumc.sasc.biopet.utils.summary.Summary

object ShivaSvCallingReport {

10
  val histogramBinBoundaries: Array[Int] = Array(100, 1000, 10000, 100000, 1000000, 10000000)
11
  val histogramPlotTicks: Array[Int] = Array(100, 1000, 10000, 100000, 1000000, 10000000, 100000000)
12
13
  val histogramText: List[String] = List("<=100bp", "0.1-1kb", "1-10kb", "10-100kb", "0.1-1Mb", "1-10Mb", ">10Mb")

akaljuvee's avatar
akaljuvee committed
14
15
  def parseSummaryForSvCounts(summary: Summary): Map[String, Map[String, Array[Long]]] = {
    var delCounts, insCounts, dupCounts, invCounts: Map[String, Array[Long]] = Map()
16

17
18
    for (sampleName <- summary.samples) {
      var sampleCounts: Map[String, Any] = summary.getSampleValue(sampleName, "shivasvcalling", "stats", "variantsBySizeAndType").get.asInstanceOf[Map[String, Any]]
19
      for ((svType, counts) <- sampleCounts.collect({ case (k, v: List[_]) => (k, v.toArray[Any]) })) {
akaljuvee's avatar
akaljuvee committed
20
        val elem: Tuple2[String, Array[Long]] = (sampleName, counts.collect({ case x: Long => x }))
21
        svType match {
akaljuvee's avatar
akaljuvee committed
22
23
24
25
          case "DEL" => delCounts += elem
          case "INS" => insCounts += elem
          case "DUP" => dupCounts += elem
          case "INV" => invCounts += elem
26
27
        }
      }
28
    }
29

akaljuvee's avatar
akaljuvee committed
30
31
32
33
34
    var result: Map[String, Map[String, Array[Long]]] = Map()
    if (delCounts.exists(elem => (elem._2.sum > 0))) result = Map("DEL" -> delCounts)
    if (insCounts.exists(elem => (elem._2.sum > 0))) result += ("INS" -> insCounts)
    if (dupCounts.exists(elem => (elem._2.sum > 0))) result += ("DUP" -> dupCounts)
    if (invCounts.exists(elem => (elem._2.sum > 0))) result += ("INV" -> invCounts)
35
36
37
    result
  }

38
39
40
41
42
43
44
45
46
  def parseSummaryForTranslocations(summary: Summary): Map[String, Long] = {
    var traCounts: Map[String, Long] = Map()
    for (sampleName <- summary.samples) {
      var counts: Map[String, Any] = summary.getSampleValue(sampleName, "shivasvcalling", "stats", "variantsBySizeAndType").get.asInstanceOf[Map[String, Any]]
      traCounts += (sampleName -> counts.get("TRA").get.asInstanceOf[Long])
    }
    if (traCounts.exists(elem => elem._2 > 0)) traCounts else Map.empty
  }

akaljuvee's avatar
akaljuvee committed
47
  def writeTsvFiles(sampleNames: List[String], counts: Map[String, Map[String, Array[Long]]], svTypes: List[SvTypeForReport], outFileAllTypes: String, outDir: File): Unit = {
48
49
50
51
52
53

    val tsvWriter = new PrintWriter(new File(outDir, outFileAllTypes))
    tsvWriter.print("sv_type\tsample")
    histogramText.foreach(bin => tsvWriter.print("\t" + bin))
    tsvWriter.println()

54
    for (sv <- svTypes) {
akaljuvee's avatar
akaljuvee committed
55
      val countsForSvType: Map[String, Array[Long]] = counts.get(sv.svType).get
56

57
      writeTsvFileForSvType(sv, countsForSvType, sampleNames, outDir)
58

59
60
61
      for (sampleName <- sampleNames) {
        tsvWriter.print(sv.svType + "\t" + sampleName + "\t")
        tsvWriter.println(countsForSvType.get(sampleName).get.mkString("\t"))
62
      }
63
64
65
    }
    tsvWriter.close()
  }
66

akaljuvee's avatar
akaljuvee committed
67
  def writeTsvFileForSvType(svType: SvTypeForReport, counts: Map[String, Array[Long]], sampleNames: List[String], outDir: File): Unit = {
68
69
70
71
72
73
    val tsvWriter = new PrintWriter(new File(outDir, svType.tsvFileName))

    tsvWriter.print("histogramBin")
    sampleNames.foreach(sampleName => tsvWriter.print("\t" + sampleName))
    tsvWriter.println()

74
75
    for (i <- histogramPlotTicks.indices) {
      tsvWriter.print(histogramPlotTicks(i))
76
77
      sampleNames.foreach(sampleName => tsvWriter.print("\t" + counts.get(sampleName).get(i)))
      tsvWriter.println()
78
    }
79
80

    tsvWriter.close()
81
82
83
84
85
86
87
  }

  def createPlots(svTypes: List[SvTypeForReport], outDir: File): Unit = {
    for (sv <- svTypes) {
      val tsvFile = new File(outDir, sv.tsvFileName)
      val pngFile: File = new File(outDir, sv.pngFileName)
      val plot = LinePlot(tsvFile, pngFile,
88
        xlabel = Some(s"${sv.displayText.substring(0, sv.displayText.length - 1)} size"),
89
        ylabel = Some("Number of loci"),
90
91
        title = Some(sv.displayText),
        width = 400,
92
        removeZero = false)
93
      plot.height = Some(300)
94
95
96
      plot.llabel = Some("Sample")
      plot.xLog10 = true
      plot.yLog10 = true
97
98
      plot.xLog10AxisTicks = histogramPlotTicks.collect({ case x => x.toString() })
      plot.xLog10AxisLabels = histogramText
99
100
101
102
103
104
105
106
      plot.runLocal()
    }

  }

}

case class SvTypeForReport(svType: String, displayText: String, tsvFileName: String, pngFileName: String)