BammetricsReport.scala 15 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{File, PrintWriter}
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
20 21 22 23 24 25 26 27
import nl.lumc.sasc.biopet.core.report.{ReportBuilder, ReportBuilderExtension, ReportPage, ReportSection}
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.rscript.{LinePlot, StackedBarPlot}
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Await
import scala.concurrent.duration.Duration
28

Peter van 't Hof's avatar
Peter van 't Hof committed
29
class BammetricsReport(val parent: Configurable) extends ReportBuilderExtension {
30
  def builder = BammetricsReport
31
}
32

33
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
34 35
 * Object to create a report for [[BamMetrics]]
 *
36 37 38
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
39

40
  /** Name of report */
41 42
  val reportName = "Bam Metrics"

43
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
44 45 46
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
47 48
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
49 50 51 52 53 54 55 56 57
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
58

59
  /** Generates a page with alignment stats */
Peter van 't Hof's avatar
Peter van 't Hof committed
60 61 62
  def bamMetricsPage(summary: SummaryDb,
                     sampleId: Option[Int],
                     libId: Option[Int],
63
                     metricsTag: String = "bammetrics") = {
Peter van 't Hof's avatar
Peter van 't Hof committed
64

Peter van 't Hof's avatar
Peter van 't Hof committed
65
    //val pipelineId: Int = summary.getPipelineId(runId, metricsTag).map(_.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
66

Peter van 't Hof's avatar
Peter van 't Hof committed
67 68 69 70 71 72
    val wgsExecuted = summary.getStatsSize(runId, Right(metricsTag), Some(Right("wgs")), sample = sampleId.map(Left(_)), library = libId.map(Left(_))) >= 1
    val rnaExecuted = summary.getStatsSize(runId, Right(metricsTag), Some(Right("rna")), sample = sampleId.map(Left(_)), library = libId.map(Left(_))) >= 1

    val insertsizeMetrics = summary.getStatKeys(runId, Right(metricsTag), Some(Right("CollectInsertSizeMetrics")),
      sample = sampleId.map(Left(_)), library = libId.map(Left(_)), Map("metrics" -> List("metrics")))
        .exists(_._2.isDefined)
73

Peter van 't Hof's avatar
Peter van 't Hof committed
74 75 76

    val targetSettings = summary.getSettingKeys(runId, Right(metricsTag),None, sample = sampleId.map(Left(_)), library = libId.map(Left(_)),
      Map("amplicon_name" -> List("amplicon_name"), "roi_name" -> List("roi_name")))
77
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
78 79
      targetSettings("amplicon_name"),
      targetSettings("roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
80 81 82 83 84
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
85 86

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
87 88
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
89
        List(),
90
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
91
        Map())),
92
      List(
93 94 95
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp", Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp", Map("showPlot" -> true))) ++
96
        (if (insertsizeMetrics) List("Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
Peter van 't Hof's avatar
Peter van 't Hof committed
97 98
        )
        else Nil) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
99 100 101 102 103
          Map("showPlot" -> true)))
        else Nil) ++
        (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
          Map("showPlot" -> true)))
        else Nil),
104
      Map("metricsTag" -> metricsTag)
105 106 107
    )
  }

108 109
  /**
   * Generate a stackbar plot for alignment stats
Peter van 't Hof's avatar
Peter van 't Hof committed
110
   *
111 112 113 114 115 116
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
117 118
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
119
                           summary: SummaryDb,
120
                           libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
121
                           sampleId: Option[Int] = None): Unit = {
122 123 124 125 126 127
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

Peter van 't Hof's avatar
Peter van 't Hof committed
128 129 130 131 132 133 134 135
    val statsPaths = Map(
      "Mapped" -> List("flagstats", "Mapped"),
      "Duplicates" -> List("flagstats", "Duplicates"),
      "All" -> List("flagstats", "All"),
      "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
    )

    val results: Map[(Int, Option[Int]), Map[String, Option[Any]]] = if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
136 137 138 139
      summary.getStatsForLibraries(runId, Right("bammetrics"), Some(Right("bamstats")),
        sampleId = sampleId, keyValues = statsPaths).map(x => (x._1._1, Some(x._1._2)) -> x._2)
    } else summary.getStatsForSamples(runId, Right("bammetrics"), Some(Right("bamstats")),
       sample = sampleId.map(Left(_)), keyValues = statsPaths).map(x => (x._1, None) -> x._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
140 141 142 143

    for (((s,l),result) <- results) {
      val sampleName: String = summary.getSampleName(s).map(_.get)
      val libName: Option[String] = l.flatMap(x => Await.result(summary.getLibraryName(x), Duration.Inf))
144
      val sb = new StringBuffer()
Peter van 't Hof's avatar
Peter van 't Hof committed
145 146 147 148 149
      if (libName.isDefined) sb.append(sampleName + "-" + libName.get + "\t") else sb.append(sampleName + "\t")
      val mapped = ConfigUtils.any2long(result("Mapped"))
      val duplicates = ConfigUtils.any2long(result("Duplicates"))
      val total = ConfigUtils.any2long(result("All"))
      val secondary = ConfigUtils.any2long(result("NotPrimaryAlignment"))
Peter van 't Hof's avatar
Peter van 't Hof committed
150
      sb.append((mapped - duplicates - secondary) + "\t")
151
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
152
      sb.append((total - mapped) + "\t")
153
      sb.append(secondary)
Peter van 't Hof's avatar
Peter van 't Hof committed
154
      tsvWriter.println(sb.toString)
155 156 157 158 159 160 161 162
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
163
    if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
164 165
      plot.width = Some(200 + (libraries.filter(s => sampleId.getOrElse(s.id) == s.id).size) * 10)
    } else plot.width = Some(200 + (samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
166
    plot.title = Some("Aligned reads")
167 168 169
    plot.runLocal()
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
  def writePlotFromSummary(outputDir: File,
           prefix: String,
           summary: SummaryDb,
           libraryLevel: Boolean = false,
           sampleId: Option[Int] = None,
           libraryId: Option[Int] = None,
           statsPaths: Map[String, List[String]],
           xKey: String,
           yKey: String,
           pipeline: Either[Int,String],
           module: Option[Either[Int,String]],
           xlabel: Option[String] = None,
           ylabel: Option[String] = None,
           title: Option[String] = None,
           removeZero: Boolean = true): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    val results: Map[(Int, Option[Int]), Map[String, Option[Array[Any]]]] = if (libraryLevel) {
      summary.getStatsForLibraries(runId, pipeline, module, sampleId = sampleId, keyValues = statsPaths)
        .map(x => (x._1._1, Some(x._1._2)) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
    } else summary.getStatsForSamples(runId, pipeline, module, sample = sampleId.map(Left(_)), keyValues = statsPaths)
      .map(x => (x._1, None) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))

    val tables: Array[Map[String, Array[Any]]] = results.map { case ((sample,library), map) =>
        val sampleName = Await.result(summary.getSampleName(sample), Duration.Inf)
          .getOrElse(throw new IllegalStateException("Sample must be there"))
        val libraryName = library.flatMap(l => Await.result(summary.getLibraryName(l), Duration.Inf))
      Map(
        yKey -> map(yKey).getOrElse(Array()),
        (sampleName + libraryName.map("-" + _)getOrElse("")) -> map(xKey).getOrElse(Array())
      )
    }.toArray

    writeTableToTsv(tsvFile, mergeTables(tables, yKey), yKey)

    LinePlot(tsvFile, pngFile,
      xlabel = xlabel,
      ylabel = ylabel,
      title = title,
      removeZero = removeZero).runLocal()
  }

213 214
  /**
   * Generate a line plot for insertsize
Peter van 't Hof's avatar
Peter van 't Hof committed
215
   *
216 217 218 219 220 221
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
222
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
223
                     prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
224
                     summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
225
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
226
                     sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
227
                     libraryId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
228 229
    val statsPaths = Map(
      "insert_size" -> List("histogram", "insert_size"),
Peter van 't Hof's avatar
Peter van 't Hof committed
230
      "count" -> List("histogram", "All_Reads.fr_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
231
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
232

Peter van 't Hof's avatar
Peter van 't Hof committed
233 234 235
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId,statsPaths,
      "insert_size", "count", Right("bammetrics"), Some(Right("CollectInsertSizeMetrics")),
      "Insert size", "Reads", "Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
236
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
237

238
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
239
                         prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
240
                         summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
241
                         libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
242
                         sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
243 244 245 246
                         libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "mapping_quality" -> List("mapping_quality", "histogram", "values"),
      "count" -> List("mapping_quality", "histogram", "counts")
247 248
    )

Peter van 't Hof's avatar
Peter van 't Hof committed
249 250 251
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
      "mapping_quality", "count", Right("bammetrics"), Some(Right("bamstats")),
      "Mapping quality", "Reads", "Mapping quality")
252 253 254
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
255
                   prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
256
                   summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
257
                   libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
258
                   sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
259 260 261 262
                   libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "clipping" -> List("clipping", "histogram", "values"),
      "count" -> List("clipping", "histogram", "counts")
263 264
    )

Peter van 't Hof's avatar
Peter van 't Hof committed
265 266 267
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
      "clipping", "count", Right("bammetrics"), Some(Right("bamstats")),
      "Clipping", "Reads", "Clipping")
268 269
  }

270 271
  /**
   * Generate a line plot for wgs coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
272
   *
273 274 275 276 277 278
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
279
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
280
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
281
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
282
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
283
                       sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
284 285 286 287
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "coverage" -> List("wgs", "histogram", "coverage"),
      "count" -> List("wgs", "histogram", "count")
Peter van 't Hof's avatar
Peter van 't Hof committed
288
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
289

Peter van 't Hof's avatar
Peter van 't Hof committed
290 291 292
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
      "coverage", "count", Right("bammetrics"), Some(Right("wgs")),
      "Coverage", "Bases", "Whole genome coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
293
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
294 295

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
296
   * Generate a line plot for rna coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
297
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
298 299 300 301 302 303
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
304 305
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
306
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
307
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
308 309 310 311 312
                       sampleId: Option[Int] = None,
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "position" -> List("rna", "histogram", "normalized_position"),
      "count" -> List("rna", "histogram", "All_Reads.normalized_coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
313
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
314

Peter van 't Hof's avatar
Peter van 't Hof committed
315 316 317
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
      "coverage", "count", Right("bammetrics"), Some(Right("rna")),
      "Relative position", "Coverage", "Rna coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
318 319
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
320 321
  def mergeTables(tables: Array[Map[String, Array[Any]]],
                  mergeColumn: String, defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
322 323 324 325 326 327 328
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
      columnKey -> keys.map(x => table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
329
    require(table.map(_._2.size).toList.distinct.size == 1, "Not all values has the same number or rows")
Peter van 't Hof's avatar
Peter van 't Hof committed
330 331 332
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
333 334 335
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
336 337 338
    }
    writer.close()
  }
339
}