ShivaSvCallingReport.scala 5.72 KB
Newer Older
1
2
3
4
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.{ File, PrintWriter }

5
import nl.lumc.sasc.biopet.utils.Logging
6
import nl.lumc.sasc.biopet.utils.rscript.LinePlot
akaljuvee's avatar
akaljuvee committed
7
8
9
10
11
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.{ ModuleName, PipelineName, SampleName }

import scala.concurrent.Await
import scala.concurrent.duration.Duration
12

13
14
15
object ShivaSvCallingReport extends ShivaSvCallingReportTrait

trait ShivaSvCallingReportTrait extends Logging {
16

17
  val histogramBinBoundaries: Array[Int] = Array(100, 1000, 10000, 100000, 1000000, 10000000)
18
  val histogramPlotTicks: Array[Int] = Array(100, 1000, 10000, 100000, 1000000, 10000000, 100000000)
19
20
  val histogramText: List[String] = List("<=100bp", "0.1-1kb", "1-10kb", "10-100kb", "0.1-1Mb", "1-10Mb", ">10Mb")

akaljuvee's avatar
akaljuvee committed
21
  def parseSummaryForSvCounts(summary: SummaryDb, runId: Int, sampleNames: Seq[String]): Map[String, Map[String, Array[Long]]] = {
akaljuvee's avatar
akaljuvee committed
22
    var delCounts, insCounts, dupCounts, invCounts: Map[String, Array[Long]] = Map()
23

akaljuvee's avatar
akaljuvee committed
24
    for (sampleName <- sampleNames) {
akaljuvee's avatar
akaljuvee committed
25
      val sampleCounts: Map[String, Any] = Await.result(summary.getStat(runId, PipelineName("shivasvcalling"), ModuleName("vcfstats-sv"), SampleName(sampleName)), Duration.Inf).get
26
      for ((svType, counts) <- sampleCounts.collect({ case (k, v: List[_]) => (k, v.toArray[Any]) })) {
akaljuvee's avatar
akaljuvee committed
27
        val elem: Tuple2[String, Array[Long]] = (sampleName, counts.collect({ case x: Long => x }))
28
        svType match {
akaljuvee's avatar
akaljuvee committed
29
30
31
32
          case "DEL" => delCounts += elem
          case "INS" => insCounts += elem
          case "DUP" => dupCounts += elem
          case "INV" => invCounts += elem
33
34
        }
      }
35
    }
36

akaljuvee's avatar
akaljuvee committed
37
38
39
40
41
    var result: Map[String, Map[String, Array[Long]]] = Map()
    if (delCounts.exists(elem => (elem._2.sum > 0))) result = Map("DEL" -> delCounts)
    if (insCounts.exists(elem => (elem._2.sum > 0))) result += ("INS" -> insCounts)
    if (dupCounts.exists(elem => (elem._2.sum > 0))) result += ("DUP" -> dupCounts)
    if (invCounts.exists(elem => (elem._2.sum > 0))) result += ("INV" -> invCounts)
42
43
44
    result
  }

akaljuvee's avatar
akaljuvee committed
45
  def parseSummaryForTranslocations(summary: SummaryDb, runId: Int, sampleNames: Seq[String]): Map[String, Long] = {
46
    var traCounts: Map[String, Long] = Map()
akaljuvee's avatar
akaljuvee committed
47
    for (sampleName <- sampleNames) {
akaljuvee's avatar
akaljuvee committed
48
      val counts: Map[String, Any] = Await.result(summary.getStat(runId, PipelineName("shivasvcalling"), ModuleName("vcfstats-sv"), SampleName(sampleName)), Duration.Inf).get
49
50
51
52
53
54
      counts.get("TRA") match {
        case Some(c: Long) => traCounts += (sampleName -> c)
        case Some(c)       => logger.error(s"Unable to parse translocation counts from summary db for sample $sampleName (type mismatch, type in the db: ${c.getClass})")
        case _             => logger.error(s"Summary db doesn't have translocation counts for sample $sampleName")
      }

55
56
57
58
    }
    if (traCounts.exists(elem => elem._2 > 0)) traCounts else Map.empty
  }

akaljuvee's avatar
akaljuvee committed
59
  def writeTsvFiles(sampleNames: Seq[String], counts: Map[String, Map[String, Array[Long]]], svTypes: List[SvTypeForReport], outFileAllTypes: String, outDir: File): Unit = {
60
61
62
63
64
65

    val tsvWriter = new PrintWriter(new File(outDir, outFileAllTypes))
    tsvWriter.print("sv_type\tsample")
    histogramText.foreach(bin => tsvWriter.print("\t" + bin))
    tsvWriter.println()

66
67
    val missingCounts: Array[String] = Array.fill(ShivaSvCallingReport.histogramText.size) { "-" }

68
    for (sv <- svTypes) {
69
70
71
      val countsForSvType: Map[String, Array[Long]] = counts.getOrElse(sv.svType, Map.empty)

      if (countsForSvType.nonEmpty) {
72

73
        writeTsvFileForSvType(sv, countsForSvType, sampleNames, outDir)
74

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
        for (sampleName <- sampleNames) {
          val sampleCounts: Array[String] = countsForSvType.get(sampleName) match {
            case Some(c) => c.collect({ case x => x.toString() })
            case None => {
              logger.error(s"Internal error, missing sv counts, sample-$sampleName, sv type-${sv.svType}")
              missingCounts
            }
          }

          tsvWriter.print(sv.svType + "\t" + sampleName + "\t")
          tsvWriter.println(sampleCounts.mkString("\t"))
        }

      } else {
        logger.error(s"Internal error, skipping writing the tsv-file for sv type ${sv.svType}")
90
      }
91

92
93
94
    }
    tsvWriter.close()
  }
95

akaljuvee's avatar
akaljuvee committed
96
  def writeTsvFileForSvType(svType: SvTypeForReport, counts: Map[String, Array[Long]], sampleNames: Seq[String], outDir: File): Unit = {
97
98
99
    val tsvWriter = new PrintWriter(new File(outDir, svType.tsvFileName))

    tsvWriter.print("histogramBin")
100
101
    val samplesWithCounts: Seq[String] = sampleNames.filter(x => counts.contains(x))
    samplesWithCounts.foreach(sampleName => tsvWriter.print("\t" + sampleName))
102
103
    tsvWriter.println()

104
105
    for (i <- histogramPlotTicks.indices) {
      tsvWriter.print(histogramPlotTicks(i))
106
      samplesWithCounts.foreach(sampleName => tsvWriter.print("\t" + counts.get(sampleName).get(i)))
107
      tsvWriter.println()
108
    }
109
110

    tsvWriter.close()
111
112
113
114
115
116
117
  }

  def createPlots(svTypes: List[SvTypeForReport], outDir: File): Unit = {
    for (sv <- svTypes) {
      val tsvFile = new File(outDir, sv.tsvFileName)
      val pngFile: File = new File(outDir, sv.pngFileName)
      val plot = LinePlot(tsvFile, pngFile,
118
        xlabel = Some(s"${sv.displayText.substring(0, sv.displayText.length - 1)} size"),
119
        ylabel = Some("Number of loci"),
120
121
        title = Some(sv.displayText),
        width = 400,
122
        removeZero = false)
123
      plot.height = Some(300)
124
125
126
      plot.llabel = Some("Sample")
      plot.xLog10 = true
      plot.yLog10 = true
127
128
      plot.xLog10AxisTicks = histogramPlotTicks.collect({ case x => x.toString() })
      plot.xLog10AxisLabels = histogramText
129
130
131
132
133
134
135
136
      plot.runLocal()
    }

  }

}

case class SvTypeForReport(svType: String, displayText: String, tsvFileName: String, pngFileName: String)