BammetricsReport.scala 17.2 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{File, PrintWriter}
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
20 21 22 23 24 25 26 27
import nl.lumc.sasc.biopet.core.report.{ReportBuilder, ReportBuilderExtension, ReportPage, ReportSection}
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.rscript.{LinePlot, StackedBarPlot}
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Await
import scala.concurrent.duration.Duration
28

Peter van 't Hof's avatar
Peter van 't Hof committed
29
class BammetricsReport(val parent: Configurable) extends ReportBuilderExtension {
30
  def builder = BammetricsReport
31
}
32

33
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
34 35
 * Object to create a report for [[BamMetrics]]
 *
36 37 38
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
39

40
  /** Name of report */
41 42
  val reportName = "Bam Metrics"

43
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
44 45 46
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
47 48
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
49 50 51 52 53 54 55 56 57
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
58

59
  /** Generates a page with alignment stats */
Peter van 't Hof's avatar
Peter van 't Hof committed
60 61 62
  def bamMetricsPage(summary: SummaryDb,
                     sampleId: Option[Int],
                     libId: Option[Int],
63
                     metricsTag: String = "bammetrics") = {
Peter van 't Hof's avatar
Peter van 't Hof committed
64

Peter van 't Hof's avatar
Peter van 't Hof committed
65
    //val pipelineId: Int = summary.getPipelineId(runId, metricsTag).map(_.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
66

Peter van 't Hof's avatar
Peter van 't Hof committed
67 68 69 70 71 72
    val wgsExecuted = summary.getStatsSize(runId, Right(metricsTag), Some(Right("wgs")), sample = sampleId.map(Left(_)), library = libId.map(Left(_))) >= 1
    val rnaExecuted = summary.getStatsSize(runId, Right(metricsTag), Some(Right("rna")), sample = sampleId.map(Left(_)), library = libId.map(Left(_))) >= 1

    val insertsizeMetrics = summary.getStatKeys(runId, Right(metricsTag), Some(Right("CollectInsertSizeMetrics")),
      sample = sampleId.map(Left(_)), library = libId.map(Left(_)), Map("metrics" -> List("metrics")))
        .exists(_._2.isDefined)
73

Peter van 't Hof's avatar
Peter van 't Hof committed
74 75 76

    val targetSettings = summary.getSettingKeys(runId, Right(metricsTag),None, sample = sampleId.map(Left(_)), library = libId.map(Left(_)),
      Map("amplicon_name" -> List("amplicon_name"), "roi_name" -> List("roi_name")))
77
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
78 79
      targetSettings("amplicon_name"),
      targetSettings("roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
80 81 82 83 84
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
85 86

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
87 88
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
89
        List(),
90
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
91
        Map())),
92
      List(
93 94 95
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp", Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp", Map("showPlot" -> true))) ++
96
        (if (insertsizeMetrics) List("Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
Peter van 't Hof's avatar
Peter van 't Hof committed
97 98
        )
        else Nil) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
99 100 101 102 103
          Map("showPlot" -> true)))
        else Nil) ++
        (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
          Map("showPlot" -> true)))
        else Nil),
104
      Map("metricsTag" -> metricsTag)
105 106 107
    )
  }

108 109
  /**
   * Generate a stackbar plot for alignment stats
Peter van 't Hof's avatar
Peter van 't Hof committed
110
   *
111 112 113 114 115 116
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
117 118
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
119
                           summary: SummaryDb,
120
                           libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
121
                           sampleId: Option[Int] = None): Unit = {
122 123 124 125 126 127
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

Peter van 't Hof's avatar
Peter van 't Hof committed
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
    val statsPaths = Map(
      "Mapped" -> List("flagstats", "Mapped"),
      "Duplicates" -> List("flagstats", "Duplicates"),
      "All" -> List("flagstats", "All"),
      "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
    )

    val pipelineId: Int = summary.getPipelineId(runId, "bammetrics").map(_.get)
    val moduleId: Option[Int] = summary.getmoduleId(runId, "bamstats", pipelineId)

    val results: Map[(Int, Option[Int]), Map[String, Option[Any]]] = if (libraryLevel) {
      summary.getStatsForLibraries(runId = runId, pipelineName = "bammetrics", moduleName = Some("bamstats"), sampleId = sampleId, keyValues = statsPaths).map(x => (x._1._1, Some(x._1._2)) -> x._2)
    } else summary.getStatsForSamples(runId, pipelineId, moduleId, sample = sampleId, keyValues = statsPaths).map(x => (x._1, None) -> x._2)

    for (((s,l),result) <- results) {
      val sampleName: String = summary.getSampleName(s).map(_.get)
      val libName: Option[String] = l.flatMap(x => Await.result(summary.getLibraryName(x), Duration.Inf))
145
      val sb = new StringBuffer()
Peter van 't Hof's avatar
Peter van 't Hof committed
146 147 148 149 150
      if (libName.isDefined) sb.append(sampleName + "-" + libName.get + "\t") else sb.append(sampleName + "\t")
      val mapped = ConfigUtils.any2long(result("Mapped"))
      val duplicates = ConfigUtils.any2long(result("Duplicates"))
      val total = ConfigUtils.any2long(result("All"))
      val secondary = ConfigUtils.any2long(result("NotPrimaryAlignment"))
Peter van 't Hof's avatar
Peter van 't Hof committed
151
      sb.append((mapped - duplicates - secondary) + "\t")
152
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
153
      sb.append((total - mapped) + "\t")
154
      sb.append(secondary)
Peter van 't Hof's avatar
Peter van 't Hof committed
155
      tsvWriter.println(sb.toString)
156 157 158 159 160 161 162 163
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
164
    if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
165 166
      plot.width = Some(200 + (libraries.filter(s => sampleId.getOrElse(s.id) == s.id).size) * 10)
    } else plot.width = Some(200 + (samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
167
    plot.title = Some("Aligned reads")
168 169 170
    plot.runLocal()
  }

171 172
  /**
   * Generate a line plot for insertsize
Peter van 't Hof's avatar
Peter van 't Hof committed
173
   *
174 175 176 177 178 179
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
180
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
181
                     prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
182
                     summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
183
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
184 185
                     sampleId: Option[Int] = None,
                     libId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
186 187 188
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
189 190 191
    val statsPaths = Map(
      "insert_size" -> List("histogram", "insert_size"),
      "All_Reads.fr_count" -> List("histogram", "All_Reads.fr_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
192
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
193

Peter van 't Hof's avatar
Peter van 't Hof committed
194 195 196 197 198 199 200 201 202
    val pipelineId: Int = summary.getPipelineId(runId, "bammetrics").map(_.get)
    val moduleId: Option[Int] = summary.getmoduleId(runId, "CollectInsertSizeMetrics", pipelineId)

    val results: Map[(Int, Option[Int]), Map[String, Option[Array[Any]]]] = if (libraryLevel) {
      summary.getStatsForLibraries(runId, pipelineId, moduleId, sampleId = sampleId, keyValues = statsPaths)
        .map(x => (x._1._1, Some(x._1._2)) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
    } else summary.getStatsForSamples(runId, pipelineId, moduleId, sample = sampleId, keyValues = statsPaths)
      .map(x => (x._1, None) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))

Peter van 't Hof's avatar
Peter van 't Hof committed
203
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
Peter van 't Hof's avatar
Peter van 't Hof committed
204 205 206
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
207
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
208
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "insert_size"), "insert_size")
Peter van 't Hof's avatar
Peter van 't Hof committed
209

Peter van 't Hof's avatar
Peter van 't Hof committed
210
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
211 212 213 214
      xlabel = Some("Insert size"),
      ylabel = Some("Reads"),
      title = Some("Insert size"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
215
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
216

217
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
218
                         prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
219
                         summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
220
                         libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
221 222
                         sampleId: Option[Int] = None,
                         libId: Option[Int] = None): Unit = {
223 224 225 226
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    def paths(name: String) = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
227 228
      "mapping_quality" -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "values"),
      name -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "counts")
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
    )

    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
      }
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "mapping_quality"), "mapping_quality")

    LinePlot(tsvFile, pngFile,
      xlabel = Some("Mapping Quality"),
      ylabel = Some("Reads"),
      title = Some("Mapping Quality"),
      removeZero = true).runLocal()
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
246
                   prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
247
                   summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
248
                   libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
249 250
                   sampleId: Option[Int] = None,
                   libId: Option[Int] = None): Unit = {
251 252 253 254
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    def paths(name: String) = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
255 256
      "clipping" -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "values"),
      name -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "counts")
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
    )

    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
      }
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "clipping"), "clipping")

    LinePlot(tsvFile, pngFile,
      xlabel = Some("Clipping"),
      ylabel = Some("Reads"),
      title = Some("Clipping"),
      removeZero = true).runLocal()
  }

273 274
  /**
   * Generate a line plot for wgs coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
275
   *
276 277 278 279 280 281
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
282
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
283
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
284
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
285
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
286 287
                       sampleId: Option[Int] = None,
                       libId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
288 289 290
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
291 292 293 294
    def paths(name: String) = Map(
      "coverage" -> List("bammetrics", "stats", "wgs", "histogram", "coverage"),
      name -> List("bammetrics", "stats", "wgs", "histogram", "count")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
295

Peter van 't Hof's avatar
Peter van 't Hof committed
296
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
Peter van 't Hof's avatar
Peter van 't Hof committed
297 298 299
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
300
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
301
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "coverage"), "coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
302

Peter van 't Hof's avatar
Peter van 't Hof committed
303
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
304 305 306 307
      xlabel = Some("Coverage"),
      ylabel = Some("Bases"),
      title = Some("Whole genome coverage"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
308
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
309 310

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
311
   * Generate a line plot for rna coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
312
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
313 314 315 316 317 318
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
319 320
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
321
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
322 323 324 325 326 327
                       libraryLevel: Boolean = false,
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
328 329 330 331
    def paths(name: String) = Map(
      "normalized_position" -> List("bammetrics", "stats", "rna", "histogram", "normalized_position"),
      name -> List("bammetrics", "stats", "rna", "histogram", "All_Reads.normalized_coverage")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
332

Peter van 't Hof's avatar
Peter van 't Hof committed
333
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
Peter van 't Hof's avatar
Peter van 't Hof committed
334 335 336
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
337
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
338
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "normalized_position"), "normalized_position")
Peter van 't Hof's avatar
Peter van 't Hof committed
339

Peter van 't Hof's avatar
Peter van 't Hof committed
340
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
341 342 343 344
      xlabel = Some("Relative position"),
      ylabel = Some("Coverage"),
      title = Some("Rna coverage"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
345
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
346

Peter van 't Hof's avatar
Peter van 't Hof committed
347 348 349 350
  private def getSampleLibraries(summary: SummaryDb,
                                 sampleId: Option[Int] = None,
                                 LibId: Option[Int] = None,
                                 libraryLevel: Boolean = false): List[(Int, Option[Int])] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
351 352 353 354 355 356
    if (LibId.isDefined) require(sampleId.isDefined)
    if (libraryLevel || LibId.isDefined)
      for ((sample, libs) <- summary.libraries.toList; lib <- libs) yield (sample, Some(lib))
    else for ((sample, libs) <- summary.libraries.toList) yield (sample, None)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
357
  def getTableFromSummary(summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
358
                          paths: Map[String, List[String]],
Peter van 't Hof's avatar
Peter van 't Hof committed
359 360
                          sampleId: Option[Int] = None,
                          libId: Option[Int] = None): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
361 362 363
    val pathValues: Map[String, Array[Any]] = paths.map {
      case (key, path) =>
        val value = summary.getValueAsArray(sampleId, libId, path: _*)
Pappas's avatar
Pappas committed
364
        key -> value.getOrElse(Array())
Peter van 't Hof's avatar
Peter van 't Hof committed
365
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
366
    require(pathValues.map(_._2.size).toList.distinct.size == 1, s"Arrays in summary does not have the same number of values, $paths")
Peter van 't Hof's avatar
Peter van 't Hof committed
367 368 369
    pathValues
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
370 371
  def mergeTables(tables: Array[Map[String, Array[Any]]],
                  mergeColumn: String, defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
372 373 374 375 376 377 378
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
      columnKey -> keys.map(x => table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
379
    require(table.map(_._2.size).toList.distinct.size == 1, "Not all values has the same number or rows")
Peter van 't Hof's avatar
Peter van 't Hof committed
380 381 382
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
383 384 385
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
386 387 388
    }
    writer.close()
  }
389
}