BammetricsReport.scala 15.3 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, PrintWriter }
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.{ ReportBuilder, ReportBuilderExtension, ReportPage, ReportSection }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.rscript.{ LinePlot, StackedBarPlot }
Peter van 't Hof's avatar
Peter van 't Hof committed
23 24 25 26 27
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Await
import scala.concurrent.duration.Duration
28 29
import scalaz._
import Scalaz._
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
class BammetricsReport(val parent: Configurable) extends ReportBuilderExtension {
32
  def builder = BammetricsReport
33
}
34

35
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
36 37
 * Object to create a report for [[BamMetrics]]
 *
38 39 40
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
41

42
  /** Name of report */
43 44
  val reportName = "Bam Metrics"

45
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
46 47 48
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
49 50
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
51 52 53 54 55 56 57 58 59
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
60

61
  /** Generates a page with alignment stats */
Peter van 't Hof's avatar
Peter van 't Hof committed
62 63 64
  def bamMetricsPage(summary: SummaryDb,
                     sampleId: Option[Int],
                     libId: Option[Int],
65
                     metricsTag: String = "bammetrics") = {
Peter van 't Hof's avatar
Peter van 't Hof committed
66

Peter van 't Hof's avatar
Peter van 't Hof committed
67
    //val pipelineId: Int = summary.getPipelineId(runId, metricsTag).map(_.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
68

Peter van 't Hof's avatar
Peter van 't Hof committed
69 70
    val wgsExecuted = summary.getStatsSize(runId, Some(metricsTag.right), Some(Some("wgs".right)), sample = sampleId.map(x => Some(x.left)), library = libId.map(x => Some(x.left))) >= 1
    val rnaExecuted = summary.getStatsSize(runId, Some(metricsTag.right), Some(Some("rna".right)), sample = sampleId.map(x => Some(x.left)), library = libId.map(x => Some(x.left))) >= 1
Peter van 't Hof's avatar
Peter van 't Hof committed
71

72 73
    val insertsizeMetrics = summary.getStatKeys(runId, metricsTag.right, Some("CollectInsertSizeMetrics".right),
      sample = sampleId.map(_.left), library = libId.map(_.left), Map("metrics" -> List("metrics")))
Peter van 't Hof's avatar
Peter van 't Hof committed
74
      .exists(_._2.isDefined)
75

76
    val targetSettings = summary.getSettingKeys(runId, metricsTag.right, None, sample = sampleId.map(_.left), library = libId.map(_.left),
Peter van 't Hof's avatar
Peter van 't Hof committed
77
      Map("amplicon_name" -> List("amplicon_name"), "roi_name" -> List("roi_name")))
78
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
79 80
      targetSettings("amplicon_name"),
      targetSettings("roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
81 82 83 84 85
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
86 87

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
88 89
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
90
        List(),
91
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
92
        Map())),
93
      List(
94 95 96
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp", Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp", Map("showPlot" -> true))) ++
97
        (if (insertsizeMetrics) List("Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
Peter van 't Hof's avatar
Peter van 't Hof committed
98 99
        )
        else Nil) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
100 101 102 103 104
          Map("showPlot" -> true)))
        else Nil) ++
        (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
          Map("showPlot" -> true)))
        else Nil),
105
      Map("metricsTag" -> metricsTag)
106 107 108
    )
  }

109 110
  /**
   * Generate a stackbar plot for alignment stats
Peter van 't Hof's avatar
Peter van 't Hof committed
111
   *
112 113 114 115 116 117
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
118 119
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
120
                           summary: SummaryDb,
121
                           libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
122
                           sampleId: Option[Int] = None): Unit = {
123 124 125 126 127 128
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

Peter van 't Hof's avatar
Peter van 't Hof committed
129 130 131 132 133 134 135 136
    val statsPaths = Map(
      "Mapped" -> List("flagstats", "Mapped"),
      "Duplicates" -> List("flagstats", "Duplicates"),
      "All" -> List("flagstats", "All"),
      "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
    )

    val results: Map[(Int, Option[Int]), Map[String, Option[Any]]] = if (libraryLevel) {
137
      summary.getStatsForLibraries(runId, "bammetrics".right, Some("bamstats".right),
Peter van 't Hof's avatar
Peter van 't Hof committed
138
        sampleId = sampleId, keyValues = statsPaths).map(x => (x._1._1, Some(x._1._2)) -> x._2)
139 140
    } else summary.getStatsForSamples(runId, "bammetrics".right, Some("bamstats".right),
      sample = sampleId.map(_.left), keyValues = statsPaths).map(x => (x._1, None) -> x._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
141

Peter van 't Hof's avatar
Peter van 't Hof committed
142
    for (((s, l), result) <- results) {
Peter van 't Hof's avatar
Peter van 't Hof committed
143 144
      val sampleName: String = summary.getSampleName(s).map(_.get)
      val libName: Option[String] = l.flatMap(x => Await.result(summary.getLibraryName(x), Duration.Inf))
145
      val sb = new StringBuffer()
Peter van 't Hof's avatar
Peter van 't Hof committed
146 147 148 149 150
      if (libName.isDefined) sb.append(sampleName + "-" + libName.get + "\t") else sb.append(sampleName + "\t")
      val mapped = ConfigUtils.any2long(result("Mapped"))
      val duplicates = ConfigUtils.any2long(result("Duplicates"))
      val total = ConfigUtils.any2long(result("All"))
      val secondary = ConfigUtils.any2long(result("NotPrimaryAlignment"))
Peter van 't Hof's avatar
Peter van 't Hof committed
151
      sb.append((mapped - duplicates - secondary) + "\t")
152
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
153
      sb.append((total - mapped) + "\t")
154
      sb.append(secondary)
Peter van 't Hof's avatar
Peter van 't Hof committed
155
      tsvWriter.println(sb.toString)
156 157 158 159 160 161 162 163
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
164
    if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
165 166
      plot.width = Some(200 + (libraries.filter(s => sampleId.getOrElse(s.id) == s.id).size) * 10)
    } else plot.width = Some(200 + (samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
167
    plot.title = Some("Aligned reads")
168 169 170
    plot.runLocal()
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
171
  def writePlotFromSummary(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
172 173 174 175 176 177 178 179
                           prefix: String,
                           summary: SummaryDb,
                           libraryLevel: Boolean = false,
                           sampleId: Option[Int] = None,
                           libraryId: Option[Int] = None,
                           statsPaths: Map[String, List[String]],
                           xKey: String,
                           yKey: String,
180 181
                           pipeline: \/[Int, String],
                           module: Option[\/[Int, String]],
Peter van 't Hof's avatar
Peter van 't Hof committed
182 183 184 185
                           xlabel: Option[String] = None,
                           ylabel: Option[String] = None,
                           title: Option[String] = None,
                           removeZero: Boolean = true): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
186 187 188 189 190 191
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    val results: Map[(Int, Option[Int]), Map[String, Option[Array[Any]]]] = if (libraryLevel) {
      summary.getStatsForLibraries(runId, pipeline, module, sampleId = sampleId, keyValues = statsPaths)
        .map(x => (x._1._1, Some(x._1._2)) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
192
    } else summary.getStatsForSamples(runId, pipeline, module, sample = sampleId.map(_.left), keyValues = statsPaths)
Peter van 't Hof's avatar
Peter van 't Hof committed
193 194
      .map(x => (x._1, None) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))

Peter van 't Hof's avatar
Peter van 't Hof committed
195 196
    val tables: Array[Map[String, Array[Any]]] = results.map {
      case ((sample, library), map) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
197 198 199
        val sampleName = Await.result(summary.getSampleName(sample), Duration.Inf)
          .getOrElse(throw new IllegalStateException("Sample must be there"))
        val libraryName = library.flatMap(l => Await.result(summary.getLibraryName(l), Duration.Inf))
Peter van 't Hof's avatar
Peter van 't Hof committed
200 201 202 203
        Map(
          yKey -> map(yKey).getOrElse(Array()),
          (sampleName + libraryName.map("-" + _) getOrElse ("")) -> map(xKey).getOrElse(Array())
        )
Peter van 't Hof's avatar
Peter van 't Hof committed
204 205 206 207 208 209 210 211 212 213 214
    }.toArray

    writeTableToTsv(tsvFile, mergeTables(tables, yKey), yKey)

    LinePlot(tsvFile, pngFile,
      xlabel = xlabel,
      ylabel = ylabel,
      title = title,
      removeZero = removeZero).runLocal()
  }

215 216
  /**
   * Generate a line plot for insertsize
Peter van 't Hof's avatar
Peter van 't Hof committed
217
   *
218 219 220 221 222 223
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
224
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
225
                     prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
226
                     summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
227
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
228
                     sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
229
                     libraryId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
230 231
    val statsPaths = Map(
      "insert_size" -> List("histogram", "insert_size"),
Peter van 't Hof's avatar
Peter van 't Hof committed
232
      "count" -> List("histogram", "All_Reads.fr_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
233
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
234

Peter van 't Hof's avatar
Peter van 't Hof committed
235
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
236
      "insert_size", "count", "bammetrics".right, Some("CollectInsertSizeMetrics".right),
Peter van 't Hof's avatar
Peter van 't Hof committed
237
      "Insert size", "Reads", "Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
238
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
239

240
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
241
                         prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
242
                         summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
243
                         libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
244
                         sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
245 246 247 248
                         libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "mapping_quality" -> List("mapping_quality", "histogram", "values"),
      "count" -> List("mapping_quality", "histogram", "counts")
249 250
    )

Peter van 't Hof's avatar
Peter van 't Hof committed
251
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
252
      "mapping_quality", "count", "bammetrics".right, Some("bamstats".right),
Peter van 't Hof's avatar
Peter van 't Hof committed
253
      "Mapping quality", "Reads", "Mapping quality")
254 255 256
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
257
                   prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
258
                   summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
259
                   libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
260
                   sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
261 262 263 264
                   libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "clipping" -> List("clipping", "histogram", "values"),
      "count" -> List("clipping", "histogram", "counts")
265 266
    )

Peter van 't Hof's avatar
Peter van 't Hof committed
267
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
268
      "clipping", "count", "bammetrics".right, Some("bamstats".right),
Peter van 't Hof's avatar
Peter van 't Hof committed
269
      "Clipping", "Reads", "Clipping")
270 271
  }

272 273
  /**
   * Generate a line plot for wgs coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
274
   *
275 276 277 278 279 280
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
281
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
282
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
283
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
284
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
285
                       sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
286 287 288 289
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "coverage" -> List("wgs", "histogram", "coverage"),
      "count" -> List("wgs", "histogram", "count")
Peter van 't Hof's avatar
Peter van 't Hof committed
290
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
291

Peter van 't Hof's avatar
Peter van 't Hof committed
292
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
293
      "coverage", "count", "bammetrics".right, Some("wgs".right),
Peter van 't Hof's avatar
Peter van 't Hof committed
294
      "Coverage", "Bases", "Whole genome coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
295
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
296 297

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
298
   * Generate a line plot for rna coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
299
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
300 301 302 303 304 305
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
306 307
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
308
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
309
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
310 311 312 313 314
                       sampleId: Option[Int] = None,
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "position" -> List("rna", "histogram", "normalized_position"),
      "count" -> List("rna", "histogram", "All_Reads.normalized_coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
315
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
316

Peter van 't Hof's avatar
Peter van 't Hof committed
317
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
318
      "coverage", "count", "bammetrics".right, Some("rna".right),
Peter van 't Hof's avatar
Peter van 't Hof committed
319
      "Relative position", "Coverage", "Rna coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
320 321
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
322 323
  def mergeTables(tables: Array[Map[String, Array[Any]]],
                  mergeColumn: String, defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
324 325 326 327 328 329 330
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
      columnKey -> keys.map(x => table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
331
    require(table.map(_._2.size).toList.distinct.size == 1, "Not all values has the same number or rows")
Peter van 't Hof's avatar
Peter van 't Hof committed
332 333 334
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
335 336 337
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
338 339 340
    }
    writer.close()
  }
341
}