BammetricsReport.scala 17.3 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
/**
2 3 4 5 6 7 8 9 10 11 12 13 14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

17
import java.io.{File, PrintWriter}
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
20 21 22 23 24 25
import nl.lumc.sasc.biopet.core.report.{
  ReportBuilder,
  ReportBuilderExtension,
  ReportPage,
  ReportSection
}
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.utils.ConfigUtils
27
import nl.lumc.sasc.biopet.utils.rscript.{LinePlot, StackedBarPlot}
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
Peter van 't Hof's avatar
Peter van 't Hof committed
29 30
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._
Peter van 't Hof's avatar
Peter van 't Hof committed
31

32
import scala.concurrent.{Await, Future}
Peter van 't Hof's avatar
Peter van 't Hof committed
33
import scala.concurrent.duration.Duration
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35
class BammetricsReport(val parent: Configurable) extends ReportBuilderExtension {
36
  def builder = BammetricsReport
37
}
38

39
/**
40 41 42 43
  * Object to create a report for [[BamMetrics]]
  *
  * Created by pjvan_thof on 3/30/15.
  */
44
object BammetricsReport extends ReportBuilder {
45

46
  /** Name of report */
47 48
  val reportName = "Bam Metrics"

49 50
  def pipelineName = "bammetrics"

51
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
52 53
  def indexPage: Future[ReportPage] =
    bamMetricsPage(summary, sampleId, libId).map { bamMetricsPage =>
54 55 56 57 58 59 60 61 62 63 64 65 66
      ReportPage(
        bamMetricsPage.subPages ::: List(
          "Versions" -> Future(
            ReportPage(List(),
                       List("Executables" -> ReportSection(
                         "/nl/lumc/sasc/biopet/core/report/executables.ssp")),
                       Map())),
          "Files" -> filesPage(sampleId, libId)
        ),
        List(
          "Report" -> ReportSection(
            "/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
        ) ::: bamMetricsPage.sections,
Peter van 't Hof's avatar
Peter van 't Hof committed
67 68 69
        Map()
      )
    }
70

71
  /** Generates a page with alignment stats */
Peter van 't Hof's avatar
Peter van 't Hof committed
72 73 74
  def bamMetricsPage(summary: SummaryDb,
                     sampleId: Option[Int],
                     libId: Option[Int],
Peter van 't Hof's avatar
Peter van 't Hof committed
75
                     metricsTag: String = "bammetrics"): Future[ReportPage] = Future {
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
    val wgsExecuted = summary.getStatsSize(runId,
                                           metricsTag,
                                           "wgs",
                                           sample = sampleId.map(SampleId),
                                           library = libId.map(LibraryId)) >= 1
    val rnaExecuted = summary.getStatsSize(runId,
                                           metricsTag,
                                           "rna",
                                           sample = sampleId.map(SampleId),
                                           library = libId.map(LibraryId)) >= 1

    val insertsizeMetrics = summary
      .getStatKeys(
        runId,
        metricsTag,
        "CollectInsertSizeMetrics",
        sampleId.map(SampleId).getOrElse(NoSample),
        libId.map(LibraryId).getOrElse(NoLibrary),
        Map("metrics" -> List("metrics"))
      )
Peter van 't Hof's avatar
Peter van 't Hof committed
96
      .exists(_._2.isDefined)
97

98 99 100 101 102 103 104 105
    val targetSettings = summary.getSettingKeys(
      runId,
      metricsTag,
      NoModule,
      sample = sampleId.map(SampleId).getOrElse(NoSample),
      library = libId.map(LibraryId).getOrElse(NoLibrary),
      Map("amplicon_name" -> List("amplicon_name"), "roi_name" -> List("roi_name"))
    )
106
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
107 108
      targetSettings("amplicon_name"),
      targetSettings("roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
109
    ) match {
110 111 112 113
      case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
      case (_, Some(roi: List[_])) => roi.map(_.toString)
      case _ => Nil
    }
114 115

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
116
      if (targets.isEmpty) List()
117 118 119 120 121 122 123 124 125
      else
        List(
          "Targets" -> Future.successful(
            ReportPage(
              List(),
              targets.map(t =>
                t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp",
                                   Map("target" -> Some(t)))),
              Map()))),
126
      List(
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
        "Summary" -> ReportSection(
          "/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection(
          "/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp",
          Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp",
                                    Map("showPlot" -> true))
      ) ++
        (if (insertsizeMetrics)
           List(
             "Insert Size" -> ReportSection(
               "/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp",
               Map("showPlot" -> true)))
         else Nil) ++ (if (wgsExecuted)
                         List(
                           "Whole genome coverage" -> ReportSection(
                             "/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
                             Map("showPlot" -> true)))
                       else Nil) ++
        (if (rnaExecuted)
           List(
             "Rna coverage" -> ReportSection(
               "/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
               Map("showPlot" -> true)))
         else Nil),
152
      Map("metricsTag" -> metricsTag)
153 154 155
    )
  }

156
  /**
157 158 159 160 161 162 163 164
    * Generate a stackbar plot for alignment stats
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
165 166
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
167
                           summary: SummaryDb,
168
                           libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
169
                           sampleId: Option[Int] = None): Unit = {
170 171 172 173 174 175
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

Peter van 't Hof's avatar
Peter van 't Hof committed
176 177 178 179 180 181 182 183
    val statsPaths = Map(
      "Mapped" -> List("flagstats", "Mapped"),
      "Duplicates" -> List("flagstats", "Duplicates"),
      "All" -> List("flagstats", "All"),
      "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
    )

    val results: Map[(Int, Option[Int]), Map[String, Option[Any]]] = if (libraryLevel) {
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
      summary
        .getStatsForLibraries(runId,
                              "bammetrics",
                              "bamstats",
                              sampleId = sampleId,
                              keyValues = statsPaths)
        .map(x => (x._1._1, Some(x._1._2)) -> x._2)
    } else
      summary
        .getStatsForSamples(runId,
                            "bammetrics",
                            "bamstats",
                            sample = sampleId.map(SampleId),
                            keyValues = statsPaths)
        .map(x => (x._1, None) -> x._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
199

Peter van 't Hof's avatar
Peter van 't Hof committed
200
    for (((s, l), result) <- results) {
Peter van 't Hof's avatar
Peter van 't Hof committed
201
      val sampleName: String = summary.getSampleName(s).map(_.get)
202 203
      val libName: Option[String] =
        l.flatMap(x => Await.result(summary.getLibraryName(x), Duration.Inf))
204
      val sb = new StringBuffer()
205 206
      if (libName.isDefined) sb.append(sampleName + "-" + libName.get + "\t")
      else sb.append(sampleName + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
207 208 209 210
      val mapped = ConfigUtils.any2long(result("Mapped"))
      val duplicates = ConfigUtils.any2long(result("Duplicates"))
      val total = ConfigUtils.any2long(result("All"))
      val secondary = ConfigUtils.any2long(result("NotPrimaryAlignment"))
Peter van 't Hof's avatar
Peter van 't Hof committed
211
      sb.append((mapped - duplicates - secondary) + "\t")
212
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
213
      sb.append((total - mapped) + "\t")
214
      sb.append(secondary)
Peter van 't Hof's avatar
Peter van 't Hof committed
215
      tsvWriter.println(sb.toString)
216 217 218 219 220 221 222 223
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
224
    plot.width = Some(200 + (results.size * 10))
Peter van 't Hof's avatar
WIP  
Peter van 't Hof committed
225
    plot.title = Some("Aligned_reads")
226 227 228
    plot.runLocal()
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
229
  def writePlotFromSummary(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
230 231 232 233 234 235 236
                           prefix: String,
                           summary: SummaryDb,
                           libraryLevel: Boolean = false,
                           sampleId: Option[Int] = None,
                           libraryId: Option[Int] = None,
                           statsPaths: Map[String, List[String]],
                           yKey: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
237
                           xKey: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
238 239
                           pipeline: PipelineQuery,
                           module: ModuleQuery,
Peter van 't Hof's avatar
Peter van 't Hof committed
240 241 242 243
                           xlabel: Option[String] = None,
                           ylabel: Option[String] = None,
                           title: Option[String] = None,
                           removeZero: Boolean = true): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
244 245 246 247
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    val results: Map[(Int, Option[Int]), Map[String, Option[Array[Any]]]] = if (libraryLevel) {
248 249 250 251 252 253 254 255 256 257 258 259 260
      summary
        .getStatsForLibraries(runId, pipeline, module, sampleId = sampleId, keyValues = statsPaths)
        .map(x =>
          (x._1._1, Some(x._1._2)) -> x._2.map(x =>
            x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
    } else
      summary
        .getStatsForSamples(runId,
                            pipeline,
                            module,
                            sample = sampleId.map(SampleId),
                            keyValues = statsPaths)
        .map(x => (x._1, None) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
Peter van 't Hof's avatar
Peter van 't Hof committed
261

Peter van 't Hof's avatar
Peter van 't Hof committed
262 263
    val tables: Array[Map[String, Array[Any]]] = results.map {
      case ((sample, library), map) =>
264 265
        val sampleName = Await
          .result(summary.getSampleName(sample), Duration.Inf)
Peter van 't Hof's avatar
Peter van 't Hof committed
266
          .getOrElse(throw new IllegalStateException("Sample must be there"))
267 268
        val libraryName =
          library.flatMap(l => Await.result(summary.getLibraryName(l), Duration.Inf))
Peter van 't Hof's avatar
Peter van 't Hof committed
269 270
        Map(
          yKey -> map(yKey).getOrElse(Array()),
Peter van 't Hof's avatar
Peter van 't Hof committed
271
          (sampleName + libraryName.map("-" + _).getOrElse("")) -> map(xKey).getOrElse(Array())
Peter van 't Hof's avatar
Peter van 't Hof committed
272
        )
Peter van 't Hof's avatar
Peter van 't Hof committed
273 274 275 276
    }.toArray

    writeTableToTsv(tsvFile, mergeTables(tables, yKey), yKey)

277 278 279 280 281 282 283
    LinePlot(tsvFile,
             pngFile,
             xlabel = xlabel,
             ylabel = ylabel,
             title = title,
             hideLegend = results.size > 40,
             removeZero = removeZero).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
284 285
  }

286
  /**
287 288 289 290 291 292 293 294
    * Generate a line plot for insertsize
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
295
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
296
                     prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
297
                     summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
298
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
299
                     sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
300
                     libraryId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
301 302
    val statsPaths = Map(
      "insert_size" -> List("histogram", "insert_size"),
Peter van 't Hof's avatar
Peter van 't Hof committed
303
      "count" -> List("histogram", "All_Reads.fr_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
304
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
305

306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
      "insert_size",
      "count",
      "bammetrics",
      "CollectInsertSizeMetrics",
      "Insert size",
      "Reads",
      "Insert size"
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
322
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
323

324
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
325
                         prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
326
                         summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
327
                         libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
328
                         sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
329 330 331 332
                         libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "mapping_quality" -> List("mapping_quality", "histogram", "values"),
      "count" -> List("mapping_quality", "histogram", "counts")
333 334
    )

335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
      "mapping_quality",
      "count",
      "bammetrics",
      "bamstats",
      "Mapping quality",
      "Reads",
      "Mapping quality"
    )
351 352 353
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
354
                   prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
355
                   summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
356
                   libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
357
                   sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
358 359 360 361
                   libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "clipping" -> List("clipping", "histogram", "values"),
      "count" -> List("clipping", "histogram", "counts")
362 363
    )

364 365 366 367 368 369 370 371 372 373 374 375 376 377
    writePlotFromSummary(outputDir,
                         prefix,
                         summary,
                         libraryLevel,
                         sampleId,
                         libraryId,
                         statsPaths,
                         "clipping",
                         "count",
                         "bammetrics",
                         "bamstats",
                         "Clipping",
                         "Reads",
                         "Clipping")
378 379
  }

380
  /**
381 382 383 384 385 386 387 388
    * Generate a line plot for wgs coverage
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
389
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
390
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
391
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
392
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
393
                       sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
394 395
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
396 397
      "coverage" -> List("histogram", "coverage"),
      "count" -> List("histogram", "count")
Peter van 't Hof's avatar
Peter van 't Hof committed
398
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
399

400 401 402 403 404 405 406 407 408 409 410 411 412 413
    writePlotFromSummary(outputDir,
                         prefix,
                         summary,
                         libraryLevel,
                         sampleId,
                         libraryId,
                         statsPaths,
                         "coverage",
                         "count",
                         "bammetrics",
                         "wgs",
                         "Coverage",
                         "Bases",
                         "Whole genome coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
414
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
415 416

  /**
417 418 419 420 421 422 423 424
    * Generate a line plot for rna coverage
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
425 426
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
427
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
428
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
429 430 431
                       sampleId: Option[Int] = None,
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
432 433
      "position" -> List("histogram", "normalized_position"),
      "count" -> List("histogram", "All_Reads.normalized_coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
434
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
435

436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
      "position",
      "count",
      "bammetrics",
      "rna",
      "Relative position",
      "Coverage",
      "Rna coverage"
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
452 453
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
454
  def mergeTables(tables: Array[Map[String, Array[Any]]],
455 456
                  mergeColumn: String,
                  defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
457 458
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
459 460
      columnKey -> keys.map(x =>
        table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
Peter van 't Hof's avatar
Peter van 't Hof committed
461 462 463 464
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
465 466
    require(table.map(_._2.size).toList.distinct.size == 1,
            "Not all values has the same number or rows")
Peter van 't Hof's avatar
Peter van 't Hof committed
467 468 469
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
470 471 472
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
473 474 475
    }
    writer.close()
  }
476
}