BammetricsReport.scala 18.1 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
/**
2 3 4 5 6 7 8 9 10 11 12 13 14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

17
import java.io.{File, PrintWriter}
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
20 21 22 23 24 25
import nl.lumc.sasc.biopet.core.report.{
  ReportBuilder,
  ReportBuilderExtension,
  ReportPage,
  ReportSection
}
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.utils.ConfigUtils
27
import nl.lumc.sasc.biopet.utils.rscript.{LinePlot, StackedBarPlot}
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
29 30
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._
Peter van 't Hof's avatar
Peter van 't Hof committed
31

32
import scala.concurrent.{Await, Future}
Peter van 't Hof's avatar
Peter van 't Hof committed
33
import scala.concurrent.duration.Duration
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35
class BammetricsReport(val parent: Configurable) extends ReportBuilderExtension {
36
  def builder = BammetricsReport
37
}
38

39
/**
40 41 42 43
  * Object to create a report for [[BamMetrics]]
  *
  * Created by pjvan_thof on 3/30/15.
  */
44
object BammetricsReport extends ReportBuilder {
45

46
  /** Name of report */
47 48
  val reportName = "Bam Metrics"

49 50
  def pipelineName = "bammetrics"

51
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
52 53
  def indexPage: Future[ReportPage] =
    bamMetricsPage(summary, sampleId, libId).map { bamMetricsPage =>
54 55 56 57 58 59 60 61 62 63 64 65 66
      ReportPage(
        bamMetricsPage.subPages ::: List(
          "Versions" -> Future(
            ReportPage(List(),
                       List("Executables" -> ReportSection(
                         "/nl/lumc/sasc/biopet/core/report/executables.ssp")),
                       Map())),
          "Files" -> filesPage(sampleId, libId)
        ),
        List(
          "Report" -> ReportSection(
            "/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
        ) ::: bamMetricsPage.sections,
Peter van 't Hof's avatar
Peter van 't Hof committed
67 68 69
        Map()
      )
    }
70

71
  /** Generates a page with alignment stats */
Peter van 't Hof's avatar
Peter van 't Hof committed
72 73 74
  def bamMetricsPage(summary: SummaryDb,
                     sampleId: Option[Int],
                     libId: Option[Int],
Peter van 't Hof's avatar
Peter van 't Hof committed
75
                     metricsTag: String = "bammetrics"): Future[ReportPage] = Future {
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
    val wgsExecuted = summary.getStatsSize(runId,
                                           metricsTag,
                                           "wgs",
                                           sample = sampleId.map(SampleId),
                                           library = libId.map(LibraryId)) >= 1
    val rnaExecuted = summary.getStatsSize(runId,
                                           metricsTag,
                                           "rna",
                                           sample = sampleId.map(SampleId),
                                           library = libId.map(LibraryId)) >= 1

    val insertsizeMetrics = summary
      .getStatKeys(
        runId,
        metricsTag,
        "CollectInsertSizeMetrics",
        sampleId.map(SampleId).getOrElse(NoSample),
        libId.map(LibraryId).getOrElse(NoLibrary),
        Map("metrics" -> List("metrics"))
      )
Peter van 't Hof's avatar
Peter van 't Hof committed
96
      .exists(_._2.isDefined)
97

98 99 100 101 102 103 104 105
    val targetSettings = summary.getSettingKeys(
      runId,
      metricsTag,
      NoModule,
      sample = sampleId.map(SampleId).getOrElse(NoSample),
      library = libId.map(LibraryId).getOrElse(NoLibrary),
      Map("amplicon_name" -> List("amplicon_name"), "roi_name" -> List("roi_name"))
    )
106
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
107 108
      targetSettings("amplicon_name"),
      targetSettings("roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
109
    ) match {
110 111 112 113
      case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
      case (_, Some(roi: List[_])) => roi.map(_.toString)
      case _ => Nil
    }
114 115

    ReportPage(
116
      if (targets.isEmpty) List()
117 118 119 120 121 122 123 124 125
      else
        List(
          "Targets" -> Future.successful(
            ReportPage(
              List(),
              targets.map(t =>
                t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp",
                                   Map("target" -> Some(t)))),
              Map()))),
126
      List(
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
        "Summary" -> ReportSection(
          "/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection(
          "/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp",
          Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp",
                                    Map("showPlot" -> true))
      ) ++
        (if (insertsizeMetrics)
           List(
             "Insert Size" -> ReportSection(
               "/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp",
               Map("showPlot" -> true)))
         else Nil) ++ (if (wgsExecuted)
                         List(
                           "Whole genome coverage" -> ReportSection(
                             "/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
                             Map("showPlot" -> true)))
                       else Nil) ++
        (if (rnaExecuted)
           List(
             "Rna coverage" -> ReportSection(
               "/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
               Map("showPlot" -> true)))
         else Nil),
152
      Map("metricsTag" -> metricsTag)
153 154 155
    )
  }

156
  /**
157 158 159 160 161 162 163 164
    * Generate a stackbar plot for alignment stats
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
165 166
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
167
                           summary: SummaryDb,
168
                           libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
169
                           sampleId: Option[Int] = None): Unit = {
170 171 172 173 174 175
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

Peter van 't Hof's avatar
Peter van 't Hof committed
176 177 178 179 180 181 182 183
    val statsPaths = Map(
      "Mapped" -> List("flagstats", "Mapped"),
      "Duplicates" -> List("flagstats", "Duplicates"),
      "All" -> List("flagstats", "All"),
      "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
    )

    val results: Map[(Int, Option[Int]), Map[String, Option[Any]]] = if (libraryLevel) {
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
      summary
        .getStatsForLibraries(runId,
                              "bammetrics",
                              "bamstats",
                              sampleId = sampleId,
                              keyValues = statsPaths)
        .map(x => (x._1._1, Some(x._1._2)) -> x._2)
    } else
      summary
        .getStatsForSamples(runId,
                            "bammetrics",
                            "bamstats",
                            sample = sampleId.map(SampleId),
                            keyValues = statsPaths)
        .map(x => (x._1, None) -> x._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
199

Peter van 't Hof's avatar
Peter van 't Hof committed
200
    for (((s, l), result) <- results) {
Peter van 't Hof's avatar
Peter van 't Hof committed
201
      val sampleName: String = summary.getSampleName(s).map(_.get)
202 203
      val libName: Option[String] =
        l.flatMap(x => Await.result(summary.getLibraryName(x), Duration.Inf))
204
      val sb = new StringBuffer()
205 206
      if (libName.isDefined) sb.append(sampleName + "-" + libName.get + "\t")
      else sb.append(sampleName + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
207 208 209 210
      val mapped = ConfigUtils.any2long(result("Mapped"))
      val duplicates = ConfigUtils.any2long(result("Duplicates"))
      val total = ConfigUtils.any2long(result("All"))
      val secondary = ConfigUtils.any2long(result("NotPrimaryAlignment"))
211
      sb.append((mapped - duplicates - secondary) + "\t")
212
      sb.append(duplicates + "\t")
213
      sb.append((total - mapped) + "\t")
214
      sb.append(secondary)
Peter van 't Hof's avatar
Peter van 't Hof committed
215
      tsvWriter.println(sb.toString)
216 217 218 219 220 221 222 223
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
224
    plot.width = Some(200 + (results.size * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
225
    plot.title = Some("Aligned_reads")
226 227 228
    plot.runLocal()
  }

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
  /**
    * This is a generic method to create plots
    * @param outputDir Outputdir of the plot
    * @param prefix Files will start with this name
    * @param summary Summary where the data is
    * @param libraryLevel If enabled the plots will show data per library
    * @param sampleId If set only this sample is shown
    * @param libraryId If set onlt this library is shown
    * @param statsPaths Paths in summary where the tables can be found
    * @param yKeyList Keys to search from, first has prio over second one
    * @param xKeyList Keys to search from, first has prio over second one
    * @param pipeline Query for the pipeline
    * @param module Query for the module
    * @param xlabel X label shown on the plot
    * @param ylabel Y label shown on the plot
    * @param title Title of the plot
    * @param removeZero
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
247
  def writePlotFromSummary(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
248 249 250 251 252 253
                           prefix: String,
                           summary: SummaryDb,
                           libraryLevel: Boolean = false,
                           sampleId: Option[Int] = None,
                           libraryId: Option[Int] = None,
                           statsPaths: Map[String, List[String]],
254 255
                           yKeyList: List[String],
                           xKeyList: List[String],
256 257
                           pipeline: PipelineQuery,
                           module: ModuleQuery,
Peter van 't Hof's avatar
Peter van 't Hof committed
258 259 260 261
                           xlabel: Option[String] = None,
                           ylabel: Option[String] = None,
                           title: Option[String] = None,
                           removeZero: Boolean = true): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
262 263 264 265
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    val results: Map[(Int, Option[Int]), Map[String, Option[Array[Any]]]] = if (libraryLevel) {
266 267 268 269 270 271 272 273 274 275 276 277 278
      summary
        .getStatsForLibraries(runId, pipeline, module, sampleId = sampleId, keyValues = statsPaths)
        .map(x =>
          (x._1._1, Some(x._1._2)) -> x._2.map(x =>
            x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
    } else
      summary
        .getStatsForSamples(runId,
                            pipeline,
                            module,
                            sample = sampleId.map(SampleId),
                            keyValues = statsPaths)
        .map(x => (x._1, None) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
Peter van 't Hof's avatar
Peter van 't Hof committed
279

Peter van 't Hof's avatar
Peter van 't Hof committed
280 281
    val tables: Array[Map[String, Array[Any]]] = results.map {
      case ((sample, library), map) =>
282 283
        val sampleName = Await
          .result(summary.getSampleName(sample), Duration.Inf)
Peter van 't Hof's avatar
Peter van 't Hof committed
284
          .getOrElse(throw new IllegalStateException("Sample must be there"))
285 286
        val libraryName =
          library.flatMap(l => Await.result(summary.getLibraryName(l), Duration.Inf))
287 288
        val yKey = yKeyList.find(x => map.contains(x) && map(x).isDefined).getOrElse("none")
        val xKey = xKeyList.find(x => map.contains(x) && map(x).isDefined).getOrElse("none")
Peter van 't Hof's avatar
Peter van 't Hof committed
289
        Map(
290 291 292 293
          yKeyList.head -> map.getOrElse(yKey, None).getOrElse(Array()),
          (sampleName + libraryName.map("-" + _).getOrElse("")) -> map
            .getOrElse(xKey, None)
            .getOrElse(Array())
Peter van 't Hof's avatar
Peter van 't Hof committed
294
        )
Peter van 't Hof's avatar
Peter van 't Hof committed
295 296
    }.toArray

297
    writeTableToTsv(tsvFile, mergeTables(tables, yKeyList.head), yKeyList.head)
Peter van 't Hof's avatar
Peter van 't Hof committed
298

299 300 301 302 303 304 305
    LinePlot(tsvFile,
             pngFile,
             xlabel = xlabel,
             ylabel = ylabel,
             title = title,
             hideLegend = results.size > 40,
             removeZero = removeZero).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
306 307
  }

308
  /**
309 310 311 312 313 314 315 316
    * Generate a line plot for insertsize
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
317
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
318
                     prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
319
                     summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
320
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
321
                     sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
322
                     libraryId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
323 324
    val statsPaths = Map(
      "insert_size" -> List("histogram", "insert_size"),
Peter van 't Hof's avatar
Peter van 't Hof committed
325
      "count" -> List("histogram", "All_Reads.fr_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
326
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
327

328 329 330 331 332 333 334 335
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
336 337
      "insert_size" :: Nil,
      "count" :: Nil,
338 339 340 341 342 343
      "bammetrics",
      "CollectInsertSizeMetrics",
      "Insert size",
      "Reads",
      "Insert size"
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
344
  }
345

346
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
347
                         prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
348
                         summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
349
                         libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
350
                         sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
351 352 353 354
                         libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "mapping_quality" -> List("mapping_quality", "histogram", "values"),
      "count" -> List("mapping_quality", "histogram", "counts")
355 356
    )

357 358 359 360 361 362 363 364
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
365 366
      "mapping_quality" :: Nil,
      "count" :: Nil,
367 368 369 370 371 372
      "bammetrics",
      "bamstats",
      "Mapping quality",
      "Reads",
      "Mapping quality"
    )
373 374 375
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
376
                   prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
377
                   summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
378
                   libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
379
                   sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
380 381 382 383
                   libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "clipping" -> List("clipping", "histogram", "values"),
      "count" -> List("clipping", "histogram", "counts")
384 385
    )

386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
      "clipping" :: Nil,
      "count" :: Nil,
      "bammetrics",
      "bamstats",
      "Clipping",
      "Reads",
      "Clipping"
    )
402 403
  }

404
  /**
405 406 407 408 409 410 411 412
    * Generate a line plot for wgs coverage
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
413
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
414
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
415
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
416
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
417
                       sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
418 419
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
420
      "coverage" -> List("histogram", "coverage"),
421 422
      "count" -> List("histogram", "count"),
      "high_quality_coverage_count" -> List("histogram", "high_quality_coverage_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
423
    )
424

425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
      "coverage" :: Nil,
      "count" :: "high_quality_coverage_count" :: Nil,
      "bammetrics",
      "wgs",
      "Coverage",
      "Bases",
      "Whole genome coverage"
    )
441
  }
442 443

  /**
444 445 446 447 448 449 450 451
    * Generate a line plot for rna coverage
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
452 453
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
454
                       summary: SummaryDb,
455
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
456 457 458
                       sampleId: Option[Int] = None,
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
459 460
      "position" -> List("histogram", "normalized_position"),
      "count" -> List("histogram", "All_Reads.normalized_coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
461
    )
462

463 464 465 466 467 468 469 470
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
471 472
      "position" :: Nil,
      "count" :: Nil,
473 474 475 476 477 478
      "bammetrics",
      "rna",
      "Relative position",
      "Coverage",
      "Rna coverage"
    )
479 480
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
481
  def mergeTables(tables: Array[Map[String, Array[Any]]],
482 483
                  mergeColumn: String,
                  defaultValue: Any = 0): Map[String, Array[Any]] = {
484 485
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
486 487
      columnKey -> keys.map(x =>
        table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
488 489 490 491
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
492
    require(table.map(_._2.length).toList.distinct.size == 1,
493
            "Not all values has the same number or rows")
494 495 496
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
497 498 499
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
500 501 502
    }
    writer.close()
  }
503
}