BammetricsReport.scala 17.2 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{File, PrintWriter}
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
20
21
22
23
24
25
26
27
import nl.lumc.sasc.biopet.core.report.{ReportBuilder, ReportBuilderExtension, ReportPage, ReportSection}
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.rscript.{LinePlot, StackedBarPlot}
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Await
import scala.concurrent.duration.Duration
28

Peter van 't Hof's avatar
Peter van 't Hof committed
29
class BammetricsReport(val parent: Configurable) extends ReportBuilderExtension {
30
  def builder = BammetricsReport
31
}
32

33
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
 * Object to create a report for [[BamMetrics]]
 *
36
37
38
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
39

40
  /** Name of report */
41
42
  val reportName = "Bam Metrics"

43
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
44
45
46
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
47
48
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
49
50
51
52
53
54
55
56
57
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
58

59
  /** Generates a page with alignment stats */
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
62
  def bamMetricsPage(summary: SummaryDb,
                     sampleId: Option[Int],
                     libId: Option[Int],
63
                     metricsTag: String = "bammetrics") = {
Peter van 't Hof's avatar
Peter van 't Hof committed
64

Peter van 't Hof's avatar
Peter van 't Hof committed
65
    //val pipelineId: Int = summary.getPipelineId(runId, metricsTag).map(_.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
66

Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
69
70
71
72
    val wgsExecuted = summary.getStatsSize(runId, Right(metricsTag), Some(Right("wgs")), sample = sampleId.map(Left(_)), library = libId.map(Left(_))) >= 1
    val rnaExecuted = summary.getStatsSize(runId, Right(metricsTag), Some(Right("rna")), sample = sampleId.map(Left(_)), library = libId.map(Left(_))) >= 1

    val insertsizeMetrics = summary.getStatKeys(runId, Right(metricsTag), Some(Right("CollectInsertSizeMetrics")),
      sample = sampleId.map(Left(_)), library = libId.map(Left(_)), Map("metrics" -> List("metrics")))
        .exists(_._2.isDefined)
73

Peter van 't Hof's avatar
Peter van 't Hof committed
74
75
76

    val targetSettings = summary.getSettingKeys(runId, Right(metricsTag),None, sample = sampleId.map(Left(_)), library = libId.map(Left(_)),
      Map("amplicon_name" -> List("amplicon_name"), "roi_name" -> List("roi_name")))
77
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
      targetSettings("amplicon_name"),
      targetSettings("roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
82
83
84
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
85
86

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
89
        List(),
90
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
91
        Map())),
92
      List(
93
94
95
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp", Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp", Map("showPlot" -> true))) ++
96
        (if (insertsizeMetrics) List("Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
Peter van 't Hof's avatar
Peter van 't Hof committed
97
98
        )
        else Nil) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
99
100
101
102
103
          Map("showPlot" -> true)))
        else Nil) ++
        (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
          Map("showPlot" -> true)))
        else Nil),
104
      Map("metricsTag" -> metricsTag)
105
106
107
    )
  }

108
109
  /**
   * Generate a stackbar plot for alignment stats
Peter van 't Hof's avatar
Peter van 't Hof committed
110
   *
111
112
113
114
115
116
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
117
118
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
119
                           summary: SummaryDb,
120
                           libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
121
                           sampleId: Option[Int] = None): Unit = {
122
123
124
125
126
127
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
    val statsPaths = Map(
      "Mapped" -> List("flagstats", "Mapped"),
      "Duplicates" -> List("flagstats", "Duplicates"),
      "All" -> List("flagstats", "All"),
      "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
    )

    val pipelineId: Int = summary.getPipelineId(runId, "bammetrics").map(_.get)
    val moduleId: Option[Int] = summary.getmoduleId(runId, "bamstats", pipelineId)

    val results: Map[(Int, Option[Int]), Map[String, Option[Any]]] = if (libraryLevel) {
      summary.getStatsForLibraries(runId = runId, pipelineName = "bammetrics", moduleName = Some("bamstats"), sampleId = sampleId, keyValues = statsPaths).map(x => (x._1._1, Some(x._1._2)) -> x._2)
    } else summary.getStatsForSamples(runId, pipelineId, moduleId, sample = sampleId, keyValues = statsPaths).map(x => (x._1, None) -> x._2)

    for (((s,l),result) <- results) {
      val sampleName: String = summary.getSampleName(s).map(_.get)
      val libName: Option[String] = l.flatMap(x => Await.result(summary.getLibraryName(x), Duration.Inf))
145
      val sb = new StringBuffer()
Peter van 't Hof's avatar
Peter van 't Hof committed
146
147
148
149
150
      if (libName.isDefined) sb.append(sampleName + "-" + libName.get + "\t") else sb.append(sampleName + "\t")
      val mapped = ConfigUtils.any2long(result("Mapped"))
      val duplicates = ConfigUtils.any2long(result("Duplicates"))
      val total = ConfigUtils.any2long(result("All"))
      val secondary = ConfigUtils.any2long(result("NotPrimaryAlignment"))
Peter van 't Hof's avatar
Peter van 't Hof committed
151
      sb.append((mapped - duplicates - secondary) + "\t")
152
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
153
      sb.append((total - mapped) + "\t")
154
      sb.append(secondary)
Peter van 't Hof's avatar
Peter van 't Hof committed
155
      tsvWriter.println(sb.toString)
156
157
158
159
160
161
162
163
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
164
    if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
      plot.width = Some(200 + (libraries.filter(s => sampleId.getOrElse(s.id) == s.id).size) * 10)
    } else plot.width = Some(200 + (samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
167
    plot.title = Some("Aligned reads")
168
169
170
    plot.runLocal()
  }

171
172
  /**
   * Generate a line plot for insertsize
Peter van 't Hof's avatar
Peter van 't Hof committed
173
   *
174
175
176
177
178
179
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
180
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
181
                     prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
182
                     summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
183
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
                     sampleId: Option[Int] = None,
                     libId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
186
187
188
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
189
190
191
    val statsPaths = Map(
      "insert_size" -> List("histogram", "insert_size"),
      "All_Reads.fr_count" -> List("histogram", "All_Reads.fr_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
192
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
193

Peter van 't Hof's avatar
Peter van 't Hof committed
194
195
196
197
198
199
200
201
202
    val pipelineId: Int = summary.getPipelineId(runId, "bammetrics").map(_.get)
    val moduleId: Option[Int] = summary.getmoduleId(runId, "CollectInsertSizeMetrics", pipelineId)

    val results: Map[(Int, Option[Int]), Map[String, Option[Array[Any]]]] = if (libraryLevel) {
      summary.getStatsForLibraries(runId, pipelineId, moduleId, sampleId = sampleId, keyValues = statsPaths)
        .map(x => (x._1._1, Some(x._1._2)) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
    } else summary.getStatsForSamples(runId, pipelineId, moduleId, sample = sampleId, keyValues = statsPaths)
      .map(x => (x._1, None) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))

Peter van 't Hof's avatar
Peter van 't Hof committed
203
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
Peter van 't Hof's avatar
Peter van 't Hof committed
204
205
206
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
207
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
208
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "insert_size"), "insert_size")
Peter van 't Hof's avatar
Peter van 't Hof committed
209

Peter van 't Hof's avatar
Peter van 't Hof committed
210
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
211
212
213
214
      xlabel = Some("Insert size"),
      ylabel = Some("Reads"),
      title = Some("Insert size"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
215
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
216

217
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
218
                         prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
219
                         summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
220
                         libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
221
222
                         sampleId: Option[Int] = None,
                         libId: Option[Int] = None): Unit = {
223
224
225
226
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    def paths(name: String) = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
227
228
      "mapping_quality" -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "values"),
      name -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "counts")
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
    )

    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
      }
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "mapping_quality"), "mapping_quality")

    LinePlot(tsvFile, pngFile,
      xlabel = Some("Mapping Quality"),
      ylabel = Some("Reads"),
      title = Some("Mapping Quality"),
      removeZero = true).runLocal()
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
246
                   prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
247
                   summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
248
                   libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
249
250
                   sampleId: Option[Int] = None,
                   libId: Option[Int] = None): Unit = {
251
252
253
254
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    def paths(name: String) = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
255
256
      "clipping" -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "values"),
      name -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "counts")
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
    )

    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
      }
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "clipping"), "clipping")

    LinePlot(tsvFile, pngFile,
      xlabel = Some("Clipping"),
      ylabel = Some("Reads"),
      title = Some("Clipping"),
      removeZero = true).runLocal()
  }

273
274
  /**
   * Generate a line plot for wgs coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
275
   *
276
277
278
279
280
281
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
282
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
283
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
284
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
285
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
286
287
                       sampleId: Option[Int] = None,
                       libId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
288
289
290
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
291
292
293
294
    def paths(name: String) = Map(
      "coverage" -> List("bammetrics", "stats", "wgs", "histogram", "coverage"),
      name -> List("bammetrics", "stats", "wgs", "histogram", "count")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
295

Peter van 't Hof's avatar
Peter van 't Hof committed
296
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
Peter van 't Hof's avatar
Peter van 't Hof committed
297
298
299
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
300
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
301
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "coverage"), "coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
302

Peter van 't Hof's avatar
Peter van 't Hof committed
303
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
304
305
306
307
      xlabel = Some("Coverage"),
      ylabel = Some("Bases"),
      title = Some("Whole genome coverage"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
308
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
309
310

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
311
   * Generate a line plot for rna coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
312
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
313
314
315
316
317
318
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
319
320
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
321
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
322
323
324
325
326
327
                       libraryLevel: Boolean = false,
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
328
329
330
331
    def paths(name: String) = Map(
      "normalized_position" -> List("bammetrics", "stats", "rna", "histogram", "normalized_position"),
      name -> List("bammetrics", "stats", "rna", "histogram", "All_Reads.normalized_coverage")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
332

Peter van 't Hof's avatar
Peter van 't Hof committed
333
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
Peter van 't Hof's avatar
Peter van 't Hof committed
334
335
336
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
337
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
338
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "normalized_position"), "normalized_position")
Peter van 't Hof's avatar
Peter van 't Hof committed
339

Peter van 't Hof's avatar
Peter van 't Hof committed
340
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
341
342
343
344
      xlabel = Some("Relative position"),
      ylabel = Some("Coverage"),
      title = Some("Rna coverage"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
345
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
346

Peter van 't Hof's avatar
Peter van 't Hof committed
347
348
349
350
  private def getSampleLibraries(summary: SummaryDb,
                                 sampleId: Option[Int] = None,
                                 LibId: Option[Int] = None,
                                 libraryLevel: Boolean = false): List[(Int, Option[Int])] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
351
352
353
354
355
356
    if (LibId.isDefined) require(sampleId.isDefined)
    if (libraryLevel || LibId.isDefined)
      for ((sample, libs) <- summary.libraries.toList; lib <- libs) yield (sample, Some(lib))
    else for ((sample, libs) <- summary.libraries.toList) yield (sample, None)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
357
  def getTableFromSummary(summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
358
                          paths: Map[String, List[String]],
Peter van 't Hof's avatar
Peter van 't Hof committed
359
360
                          sampleId: Option[Int] = None,
                          libId: Option[Int] = None): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
361
362
363
    val pathValues: Map[String, Array[Any]] = paths.map {
      case (key, path) =>
        val value = summary.getValueAsArray(sampleId, libId, path: _*)
Pappas's avatar
Pappas committed
364
        key -> value.getOrElse(Array())
Peter van 't Hof's avatar
Peter van 't Hof committed
365
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
366
    require(pathValues.map(_._2.size).toList.distinct.size == 1, s"Arrays in summary does not have the same number of values, $paths")
Peter van 't Hof's avatar
Peter van 't Hof committed
367
368
369
    pathValues
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
370
371
  def mergeTables(tables: Array[Map[String, Array[Any]]],
                  mergeColumn: String, defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
372
373
374
375
376
377
378
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
      columnKey -> keys.map(x => table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
379
    require(table.map(_._2.size).toList.distinct.size == 1, "Not all values has the same number or rows")
Peter van 't Hof's avatar
Peter van 't Hof committed
380
381
382
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
383
384
385
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
386
387
388
    }
    writer.close()
  }
389
}