BammetricsReport.scala 19.6 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
/**
2 3 4 5 6 7 8 9 10 11 12 13 14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

17
import java.io.{File, PrintWriter}
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
20
import nl.lumc.sasc.biopet.core.report.{ReportBuilder, ReportBuilderExtension, ReportPage, ReportSection}
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.ConfigUtils
22
import nl.lumc.sasc.biopet.utils.rscript.{LinePlot, StackedBarPlot}
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
Peter van 't Hof's avatar
Peter van 't Hof committed
24 25
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._
26 27
import nl.lumc.sasc.biopet.utils.summary.db.Schema._
import scala.collection.mutable.ArrayBuffer
28
import scala.concurrent.{Await, Future}
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import scala.concurrent.duration.Duration
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
class BammetricsReport(val parent: Configurable) extends ReportBuilderExtension {
32
  def builder = BammetricsReport
33
}
34

35
/**
36 37 38 39
  * Object to create a report for [[BamMetrics]]
  *
  * Created by pjvan_thof on 3/30/15.
  */
40
object BammetricsReport extends ReportBuilder {
41

42
  /** Name of report */
43 44
  val reportName = "Bam Metrics"

45 46
  def pipelineName = "bammetrics"

47
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
48 49
  def indexPage: Future[ReportPage] =
    bamMetricsPage(summary, sampleId, libId).map { bamMetricsPage =>
50 51 52 53 54 55 56 57 58 59 60 61 62
      ReportPage(
        bamMetricsPage.subPages ::: List(
          "Versions" -> Future(
            ReportPage(List(),
                       List("Executables" -> ReportSection(
                         "/nl/lumc/sasc/biopet/core/report/executables.ssp")),
                       Map())),
          "Files" -> filesPage(sampleId, libId)
        ),
        List(
          "Report" -> ReportSection(
            "/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
        ) ::: bamMetricsPage.sections,
Peter van 't Hof's avatar
Peter van 't Hof committed
63 64 65
        Map()
      )
    }
66

67
  /** Generates a page with alignment stats */
Peter van 't Hof's avatar
Peter van 't Hof committed
68 69 70
  def bamMetricsPage(summary: SummaryDb,
                     sampleId: Option[Int],
                     libId: Option[Int],
Peter van 't Hof's avatar
Peter van 't Hof committed
71
                     metricsTag: String = "bammetrics"): Future[ReportPage] = Future {
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
    val wgsExecuted = summary.getStatsSize(runId,
                                           metricsTag,
                                           "wgs",
                                           sample = sampleId.map(SampleId),
                                           library = libId.map(LibraryId)) >= 1
    val rnaExecuted = summary.getStatsSize(runId,
                                           metricsTag,
                                           "rna",
                                           sample = sampleId.map(SampleId),
                                           library = libId.map(LibraryId)) >= 1

    val insertsizeMetrics = summary
      .getStatKeys(
        runId,
        metricsTag,
        "CollectInsertSizeMetrics",
        sampleId.map(SampleId).getOrElse(NoSample),
        libId.map(LibraryId).getOrElse(NoLibrary),
        Map("metrics" -> List("metrics"))
      )
Peter van 't Hof's avatar
Peter van 't Hof committed
92
      .exists(_._2.isDefined)
93

94 95 96 97 98 99 100 101
    val targetSettings = summary.getSettingKeys(
      runId,
      metricsTag,
      NoModule,
      sample = sampleId.map(SampleId).getOrElse(NoSample),
      library = libId.map(LibraryId).getOrElse(NoLibrary),
      Map("amplicon_name" -> List("amplicon_name"), "roi_name" -> List("roi_name"))
    )
102
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
103 104
      targetSettings("amplicon_name"),
      targetSettings("roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
105
    ) match {
106 107 108 109
      case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
      case (_, Some(roi: List[_])) => roi.map(_.toString)
      case _ => Nil
    }
110 111

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
112
      if (targets.isEmpty) List()
113 114 115 116 117 118 119 120 121
      else
        List(
          "Targets" -> Future.successful(
            ReportPage(
              List(),
              targets.map(t =>
                t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp",
                                   Map("target" -> Some(t)))),
              Map()))),
122
      List(
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
        "Summary" -> ReportSection(
          "/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection(
          "/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp",
          Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp",
                                    Map("showPlot" -> true))
      ) ++
        (if (insertsizeMetrics)
           List(
             "Insert Size" -> ReportSection(
               "/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp",
               Map("showPlot" -> true)))
         else Nil) ++ (if (wgsExecuted)
                         List(
                           "Whole genome coverage" -> ReportSection(
                             "/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
                             Map("showPlot" -> true)))
                       else Nil) ++
        (if (rnaExecuted)
           List(
             "Rna coverage" -> ReportSection(
               "/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
               Map("showPlot" -> true)))
         else Nil),
148
      Map("metricsTag" -> metricsTag)
149 150 151
    )
  }

152
  /**
153
    * Generates the lines for alignmentSummaryPlot
154 155 156
    *
    * @param summary Summary class
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
157
    *                     * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
158
    */
159 160 161
  def alignmentSummaryPlotLines(summary: SummaryDb,
                       sampleId: Option[Int] = None,
                                libraryLevel: Boolean = false): Seq[String] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
162 163 164 165 166 167 168 169
    val statsPaths = Map(
      "Mapped" -> List("flagstats", "Mapped"),
      "Duplicates" -> List("flagstats", "Duplicates"),
      "All" -> List("flagstats", "All"),
      "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
    )

    val results: Map[(Int, Option[Int]), Map[String, Option[Any]]] = if (libraryLevel) {
170 171
      summary
        .getStatsForLibraries(runId,
172 173 174 175
          "bammetrics",
          "bamstats",
          sampleId = sampleId,
          keyValues = statsPaths)
176 177 178 179
        .map(x => (x._1._1, Some(x._1._2)) -> x._2)
    } else
      summary
        .getStatsForSamples(runId,
180 181 182 183
          "bammetrics",
          "bamstats",
          sample = sampleId.map(SampleId),
          keyValues = statsPaths)
184
        .map(x => (x._1, None) -> x._2)
185
    val summaryPlotLines = ArrayBuffer[String]()
Peter van 't Hof's avatar
Peter van 't Hof committed
186

Peter van 't Hof's avatar
Peter van 't Hof committed
187
    for (((s, l), result) <- results) {
Peter van 't Hof's avatar
Peter van 't Hof committed
188
      val sampleName: String = summary.getSampleName(s).map(_.get)
189 190
      val libName: Option[String] =
        l.flatMap(x => Await.result(summary.getLibraryName(x), Duration.Inf))
191
      val sb = new StringBuffer()
192 193
      if (libName.isDefined) sb.append(sampleName + "-" + libName.get + "\t")
      else sb.append(sampleName + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
194 195 196 197
      val mapped = ConfigUtils.any2long(result("Mapped"))
      val duplicates = ConfigUtils.any2long(result("Duplicates"))
      val total = ConfigUtils.any2long(result("All"))
      val secondary = ConfigUtils.any2long(result("NotPrimaryAlignment"))
Peter van 't Hof's avatar
Peter van 't Hof committed
198
      sb.append((mapped - duplicates - secondary) + "\t")
199
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
200
      sb.append((total - mapped) + "\t")
201
      sb.append(secondary)
202
      summaryPlotLines += sb.toString
203
    }
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
    summaryPlotLines
  }

  /**
    * Generate a stackbar plot for alignment stats
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summaryPlotLines A sequence of strings written to the summary tsv
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    */
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
                           summaryPlotLines: Seq[String],
                           libraryLevel: Boolean = false
                           ): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")
225

226 227 228
    for (line <- summaryPlotLines) {
      tsvWriter.println(line)
    }
229 230 231 232 233 234
    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
235
    plot.width = Some(200 + (summaryPlotLines.size * 10))
Peter van 't Hof's avatar
WIP  
Peter van 't Hof committed
236
    plot.title = Some("Aligned_reads")
237 238 239
    plot.runLocal()
  }

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
  /**
    * This is a generic method to create plots
    * @param outputDir Outputdir of the plot
    * @param prefix Files will start with this name
    * @param summary Summary where the data is
    * @param libraryLevel If enabled the plots will show data per library
    * @param sampleId If set only this sample is shown
    * @param libraryId If set onlt this library is shown
    * @param statsPaths Paths in summary where the tables can be found
    * @param yKeyList Keys to search from, first has prio over second one
    * @param xKeyList Keys to search from, first has prio over second one
    * @param pipeline Query for the pipeline
    * @param module Query for the module
    * @param xlabel X label shown on the plot
    * @param ylabel Y label shown on the plot
    * @param title Title of the plot
    * @param removeZero
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
258
  def writePlotFromSummary(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
259 260 261 262 263 264
                           prefix: String,
                           summary: SummaryDb,
                           libraryLevel: Boolean = false,
                           sampleId: Option[Int] = None,
                           libraryId: Option[Int] = None,
                           statsPaths: Map[String, List[String]],
265 266
                           yKeyList: List[String],
                           xKeyList: List[String],
Peter van 't Hof's avatar
Peter van 't Hof committed
267 268
                           pipeline: PipelineQuery,
                           module: ModuleQuery,
Peter van 't Hof's avatar
Peter van 't Hof committed
269 270 271 272
                           xlabel: Option[String] = None,
                           ylabel: Option[String] = None,
                           title: Option[String] = None,
                           removeZero: Boolean = true): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
273 274 275 276
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    val results: Map[(Int, Option[Int]), Map[String, Option[Array[Any]]]] = if (libraryLevel) {
277 278 279 280 281 282 283 284 285 286 287 288 289
      summary
        .getStatsForLibraries(runId, pipeline, module, sampleId = sampleId, keyValues = statsPaths)
        .map(x =>
          (x._1._1, Some(x._1._2)) -> x._2.map(x =>
            x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
    } else
      summary
        .getStatsForSamples(runId,
                            pipeline,
                            module,
                            sample = sampleId.map(SampleId),
                            keyValues = statsPaths)
        .map(x => (x._1, None) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
Peter van 't Hof's avatar
Peter van 't Hof committed
290

Peter van 't Hof's avatar
Peter van 't Hof committed
291 292
    val tables: Array[Map[String, Array[Any]]] = results.map {
      case ((sample, library), map) =>
293 294
        val sampleName = Await
          .result(summary.getSampleName(sample), Duration.Inf)
Peter van 't Hof's avatar
Peter van 't Hof committed
295
          .getOrElse(throw new IllegalStateException("Sample must be there"))
296 297
        val libraryName =
          library.flatMap(l => Await.result(summary.getLibraryName(l), Duration.Inf))
Peter van 't Hof's avatar
Peter van 't Hof committed
298 299
        val yKey = yKeyList.find(x => map.contains(x) && map(x).isDefined).getOrElse("none")
        val xKey = xKeyList.find(x => map.contains(x) && map(x).isDefined).getOrElse("none")
Peter van 't Hof's avatar
Peter van 't Hof committed
300
        Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
301 302 303 304
          yKeyList.head -> map.getOrElse(yKey, None).getOrElse(Array()),
          (sampleName + libraryName.map("-" + _).getOrElse("")) -> map
            .getOrElse(xKey, None)
            .getOrElse(Array())
Peter van 't Hof's avatar
Peter van 't Hof committed
305
        )
Peter van 't Hof's avatar
Peter van 't Hof committed
306 307
    }.toArray

308
    writeTableToTsv(tsvFile, mergeTables(tables, yKeyList.head), yKeyList.head)
Peter van 't Hof's avatar
Peter van 't Hof committed
309

310 311 312 313 314 315 316
    LinePlot(tsvFile,
             pngFile,
             xlabel = xlabel,
             ylabel = ylabel,
             title = title,
             hideLegend = results.size > 40,
             removeZero = removeZero).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
317 318
  }

319
  /**
320 321 322 323 324 325 326 327
    * Generate a line plot for insertsize
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
328
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
329
                     prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
330
                     summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
331
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
332
                     sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
333
                     libraryId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
334 335
    val statsPaths = Map(
      "insert_size" -> List("histogram", "insert_size"),
Peter van 't Hof's avatar
Peter van 't Hof committed
336
      "count" -> List("histogram", "All_Reads.fr_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
337
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
338

339 340 341 342 343 344 345 346
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
347 348
      "insert_size" :: Nil,
      "count" :: Nil,
349 350 351 352 353 354
      "bammetrics",
      "CollectInsertSizeMetrics",
      "Insert size",
      "Reads",
      "Insert size"
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
355
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
356

357
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
358
                         prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
359
                         summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
360
                         libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
361
                         sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
362 363 364 365
                         libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "mapping_quality" -> List("mapping_quality", "histogram", "values"),
      "count" -> List("mapping_quality", "histogram", "counts")
366 367
    )

368 369 370 371 372 373 374 375
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
376 377
      "mapping_quality" :: Nil,
      "count" :: Nil,
378 379 380 381 382 383
      "bammetrics",
      "bamstats",
      "Mapping quality",
      "Reads",
      "Mapping quality"
    )
384 385 386
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
387
                   prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
388
                   summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
389
                   libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
390
                   sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
391 392 393 394
                   libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "clipping" -> List("clipping", "histogram", "values"),
      "count" -> List("clipping", "histogram", "counts")
395 396
    )

397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
      "clipping" :: Nil,
      "count" :: Nil,
      "bammetrics",
      "bamstats",
      "Clipping",
      "Reads",
      "Clipping"
    )
413 414
  }

415
  /**
416 417 418 419 420 421 422 423
    * Generate a line plot for wgs coverage
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
424
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
425
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
426
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
427
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
428
                       sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
429 430
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
431
      "coverage" -> List("histogram", "coverage"),
432 433
      "count" -> List("histogram", "count"),
      "high_quality_coverage_count" -> List("histogram", "high_quality_coverage_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
434
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
435

436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
      "coverage" :: Nil,
      "count" :: "high_quality_coverage_count" :: Nil,
      "bammetrics",
      "wgs",
      "Coverage",
      "Bases",
      "Whole genome coverage"
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
452
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
453 454

  /**
455 456 457 458 459 460 461 462
    * Generate a line plot for rna coverage
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
463 464
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
465
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
466
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
467 468 469
                       sampleId: Option[Int] = None,
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
470 471
      "position" -> List("histogram", "normalized_position"),
      "count" -> List("histogram", "All_Reads.normalized_coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
472
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
473

474 475 476 477 478 479 480 481
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
482 483
      "position" :: Nil,
      "count" :: Nil,
484 485 486 487 488 489
      "bammetrics",
      "rna",
      "Relative position",
      "Coverage",
      "Rna coverage"
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
490 491
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
492
  def mergeTables(tables: Array[Map[String, Array[Any]]],
493 494
                  mergeColumn: String,
                  defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
495 496
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
497 498
      columnKey -> keys.map(x =>
        table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
Peter van 't Hof's avatar
Peter van 't Hof committed
499 500 501 502
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
503
    require(table.map(_._2.length).toList.distinct.size == 1,
504
            "Not all values has the same number or rows")
Peter van 't Hof's avatar
Peter van 't Hof committed
505 506 507
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
508 509 510
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
511 512 513
    }
    writer.close()
  }
514
}
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538

object BamMetricsAlignmentSummary {
  def values(summary: SummaryDb,
             runId: Int,
             allSamples: Seq[Sample],
             allLibraries: Seq[Library],
             sampleId: Option[Int] = None,
             libId: Option[Int],
             sampleLevel: Boolean = false): Map[String,Any] = {

    val statsPaths = Map(
        "All" -> List("flagstats", "All"),
        "Mapped" -> List("flagstats", "Mapped"),
        "Duplicates" -> List("flagstats", "Duplicates"),
        "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
      )
    val alignmentSummaryResults = summary.getStatsForLibraries(runId,"bammetrics","bamstats", sampleId, statsPaths)
    val alignmentSummaryPlotLines = BammetricsReport.alignmentSummaryPlotLines(summary,sampleId,!sampleLevel)
  Map(
    "alignmentSummaryResults" -> alignmentSummaryResults,
    "alignmentSummaryPlotLines" -> alignmentSummaryPlotLines
  )
  }
}