BammetricsReport.scala 16 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, PrintWriter }
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportBuilder, ReportPage, ReportSection }
21
import nl.lumc.sasc.biopet.utils.summary.{ Summary, SummaryValue }
22
import nl.lumc.sasc.biopet.utils.rscript.{ StackedBarPlot, LinePlot }
23

24
class BammetricsReport(val root: Configurable) extends ReportBuilderExtension {
25
  def builder = BammetricsReport
26
}
27

28
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
29 30
 * Object to create a report for [[BamMetrics]]
 *
31 32 33
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
34

35
  /** Name of report */
36 37
  val reportName = "Bam Metrics"

38
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
39 40 41
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
42 43
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
44 45 46 47 48 49 50 51 52
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
53

54 55 56 57 58
  /** Generates a page with alignment stats */
  def bamMetricsPage(summary: Summary,
                     sampleId: Option[String],
                     libId: Option[String],
                     metricsTag: String = "bammetrics") = {
Peter van 't Hof's avatar
Peter van 't Hof committed
59 60 61 62

    val wgsExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "wgs").isDefined
    val rnaExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "rna").isDefined

Peter van 't Hof's avatar
Peter van 't Hof committed
63 64
    val insertsizeMetrics = summary.getValue(sampleId, libId, metricsTag, "stats", "CollectInsertSizeMetrics", "metrics") match {
      case Some(None) => false
Peter van 't Hof's avatar
Peter van 't Hof committed
65 66
      case Some(_)    => true
      case _          => false
Peter van 't Hof's avatar
Peter van 't Hof committed
67
    }
68

69
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
70 71
      summary.getValue(sampleId, libId, metricsTag, "settings", "amplicon_name"),
      summary.getValue(sampleId, libId, metricsTag, "settings", "roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
72 73 74 75 76
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
77 78

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
79 80
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
81
        List(),
82
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
83
        Map())),
84
      List(
85 86 87
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp", Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp", Map("showPlot" -> true))) ++
88
        (if (insertsizeMetrics) List("Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
Peter van 't Hof's avatar
Peter van 't Hof committed
89 90
        )
        else Nil) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
91 92 93 94 95
          Map("showPlot" -> true)))
        else Nil) ++
        (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
          Map("showPlot" -> true)))
        else Nil),
96
      Map("metricsTag" -> metricsTag)
97 98 99
    )
  }

100 101
  /**
   * Generate a stackbar plot for alignment stats
Peter van 't Hof's avatar
Peter van 't Hof committed
102
   *
103 104 105 106 107 108
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
                           summary: Summary,
                           libraryLevel: Boolean = false,
                           sampleId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

    def getLine(summary: Summary, sample: String, lib: Option[String] = None): String = {
      val mapped = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Mapped"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val duplicates = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Duplicates"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val total = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "All"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val secondary = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "NotPrimaryAlignment"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val sb = new StringBuffer()
      if (lib.isDefined) sb.append(sample + "-" + lib.get + "\t") else sb.append(sample + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
131
      sb.append((mapped - duplicates - secondary) + "\t")
132
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
133
      sb.append((total - mapped) + "\t")
134 135 136 137 138
      sb.append(secondary)
      sb.toString
    }

    if (libraryLevel) {
139
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
140
        sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get;
141 142
        lib <- summary.libraries(sample)
      ) {
143 144 145
        tsvWriter.println(getLine(summary, sample, Some(lib)))
      }
    } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
146
      for (sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get) {
147 148 149 150 151 152 153 154 155 156
        tsvWriter.println(getLine(summary, sample))
      }
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
157 158
    if (libraryLevel) {
      plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
159
    } else plot.width = Some(200 + (summary.samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
160
    plot.title = Some("Aligned reads")
161 162 163
    plot.runLocal()
  }

164 165
  /**
   * Generate a line plot for insertsize
Peter van 't Hof's avatar
Peter van 't Hof committed
166
   *
167 168 169 170 171 172
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
173
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
174 175 176
                     prefix: String,
                     summary: Summary,
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
177 178
                     sampleId: Option[String] = None,
                     libId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
179 180 181
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
182 183 184 185
    def paths(name: String) = Map(
      "insert_size" -> List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "insert_size"),
      name -> List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "All_Reads.fr_count")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
186

Peter van 't Hof's avatar
Peter van 't Hof committed
187
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
Peter van 't Hof's avatar
Peter van 't Hof committed
188 189 190
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
191
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
192
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "insert_size"), "insert_size")
Peter van 't Hof's avatar
Peter van 't Hof committed
193

Peter van 't Hof's avatar
Peter van 't Hof committed
194
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
195 196 197 198
      xlabel = Some("Insert size"),
      ylabel = Some("Reads"),
      title = Some("Insert size"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
199
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
200

201
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
202 203 204 205 206
                         prefix: String,
                         summary: Summary,
                         libraryLevel: Boolean = false,
                         sampleId: Option[String] = None,
                         libId: Option[String] = None): Unit = {
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    def paths(name: String) = Map(
      "mapping_quality" -> List("bammetrics", "stats", "bamstats", "mapping_quality", "value"),
      name -> List("bammetrics", "stats", "bamstats", "mapping_quality", "count")
    )

    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
      }
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "mapping_quality"), "mapping_quality")

    LinePlot(tsvFile, pngFile,
      xlabel = Some("Mapping Quality"),
      ylabel = Some("Reads"),
      title = Some("Mapping Quality"),
      removeZero = true).runLocal()
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
230 231 232 233 234
                   prefix: String,
                   summary: Summary,
                   libraryLevel: Boolean = false,
                   sampleId: Option[String] = None,
                   libId: Option[String] = None): Unit = {
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    def paths(name: String) = Map(
      "clipping" -> List("bammetrics", "stats", "bamstats", "clipping", "value"),
      name -> List("bammetrics", "stats", "bamstats", "clipping", "count")
    )

    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
      }
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "clipping"), "clipping")

    LinePlot(tsvFile, pngFile,
      xlabel = Some("Clipping"),
      ylabel = Some("Reads"),
      title = Some("Clipping"),
      removeZero = true).runLocal()
  }

257 258
  /**
   * Generate a line plot for wgs coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
259
   *
260 261 262 263 264 265
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
266
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
267 268 269
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
270 271
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
272 273 274
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
275 276 277 278
    def paths(name: String) = Map(
      "coverage" -> List("bammetrics", "stats", "wgs", "histogram", "coverage"),
      name -> List("bammetrics", "stats", "wgs", "histogram", "count")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
279

Peter van 't Hof's avatar
Peter van 't Hof committed
280
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
Peter van 't Hof's avatar
Peter van 't Hof committed
281 282 283
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
284
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
285
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "coverage"), "coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
286

Peter van 't Hof's avatar
Peter van 't Hof committed
287
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
288 289 290 291
      xlabel = Some("Coverage"),
      ylabel = Some("Bases"),
      title = Some("Whole genome coverage"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
292
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
293 294

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
295
   * Generate a line plot for rna coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
296
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
297 298 299 300 301 302
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
303 304 305 306 307 308 309 310 311
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
312 313 314 315
    def paths(name: String) = Map(
      "normalized_position" -> List("bammetrics", "stats", "rna", "histogram", "normalized_position"),
      name -> List("bammetrics", "stats", "rna", "histogram", "All_Reads.normalized_coverage")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
316

Peter van 't Hof's avatar
Peter van 't Hof committed
317
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
Peter van 't Hof's avatar
Peter van 't Hof committed
318 319 320
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
321
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
322
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "normalized_position"), "normalized_position")
Peter van 't Hof's avatar
Peter van 't Hof committed
323

Peter van 't Hof's avatar
Peter van 't Hof committed
324
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
325 326 327 328
      xlabel = Some("Relative position"),
      ylabel = Some("Coverage"),
      title = Some("Rna coverage"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
329
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
330

Peter van 't Hof's avatar
Peter van 't Hof committed
331 332 333 334 335 336 337 338 339 340
  private def getSampleLibraries(summary: Summary,
                                 sampleId: Option[String] = None,
                                 LibId: Option[String] = None,
                                 libraryLevel: Boolean = false): List[(String, Option[String])] = {
    if (LibId.isDefined) require(sampleId.isDefined)
    if (libraryLevel || LibId.isDefined)
      for ((sample, libs) <- summary.libraries.toList; lib <- libs) yield (sample, Some(lib))
    else for ((sample, libs) <- summary.libraries.toList) yield (sample, None)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
341 342 343 344
  def getTableFromSummary(summary: Summary,
                          paths: Map[String, List[String]],
                          sampleId: Option[String] = None,
                          libId: Option[String] = None): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
345 346 347 348 349
    val pathValues: Map[String, Array[Any]] = paths.map {
      case (key, path) =>
        val value = summary.getValueAsArray(sampleId, libId, path: _*)
        require(value.isDefined, s"Sample: $sampleId, library: $libId on path: '${path.mkString(",")}' does not exist in summary")
        key -> value.get
Peter van 't Hof's avatar
Peter van 't Hof committed
350
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
351
    require(pathValues.map(_._2.size).toList.distinct.size == 1, s"Arrays in summary does not have the same number of values, $paths")
Peter van 't Hof's avatar
Peter van 't Hof committed
352 353 354
    pathValues
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
355 356
  def mergeTables(tables: Array[Map[String, Array[Any]]],
                  mergeColumn: String, defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
357 358 359 360 361 362 363
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
      columnKey -> keys.map(x => table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
364
    require(table.map(_._2.size).toList.distinct.size == 1, "Not all values has the same number or rows")
Peter van 't Hof's avatar
Peter van 't Hof committed
365 366 367
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
368 369 370
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
371 372 373
    }
    writer.close()
  }
374
}