BammetricsReport.scala 17 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.io.{ File, PrintWriter }
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportBuilder, ReportPage, ReportSection }
22
import nl.lumc.sasc.biopet.utils.summary.{ Summary, SummaryValue }
23
import nl.lumc.sasc.biopet.utils.rscript.{ StackedBarPlot, LinePlot }
24

25
class BammetricsReport(val root: Configurable) extends ReportBuilderExtension {
26
  def builder = BammetricsReport
27
}
28

29
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
30
31
 * Object to create a report for [[BamMetrics]]
 *
32
33
34
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
35

36
  /** Name of report */
37
38
  val reportName = "Bam Metrics"

39
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
42
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
47
48
49
50
51
52
53
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
54

55
56
57
58
59
  /** Generates a page with alignment stats */
  def bamMetricsPage(summary: Summary,
                     sampleId: Option[String],
                     libId: Option[String],
                     metricsTag: String = "bammetrics") = {
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
62
63

    val wgsExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "wgs").isDefined
    val rnaExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "rna").isDefined

64
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
65
66
      summary.getValue(sampleId, libId, metricsTag, "settings", "amplicon_name"),
      summary.getValue(sampleId, libId, metricsTag, "settings", "roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
69
70
71
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
72
73

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
74
75
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
76
        List(),
77
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
78
        Map())),
79
80
      List(
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
83
84
85
        "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
      ) ++ ( if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
        Map("showPlot" -> true))) else Nil) ++
        ( if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
        Map("showPlot" -> true))) else Nil),
86
      Map("metricsTag" -> metricsTag)
87
88
89
    )
  }

90
91
92
93
94
95
96
97
  /**
   * Generate a stackbar plot for alignment stats
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
                           summary: Summary,
                           libraryLevel: Boolean = false,
                           sampleId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

    def getLine(summary: Summary, sample: String, lib: Option[String] = None): String = {
      val mapped = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Mapped"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val duplicates = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Duplicates"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val total = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "All"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val secondary = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "NotPrimaryAlignment"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val sb = new StringBuffer()
      if (lib.isDefined) sb.append(sample + "-" + lib.get + "\t") else sb.append(sample + "\t")
      sb.append((mapped - duplicates) + "\t")
      sb.append(duplicates + "\t")
      sb.append((total - mapped - secondary) + "\t")
      sb.append(secondary)
      sb.toString
    }

    if (libraryLevel) {
128
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
129
        sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get;
130
131
        lib <- summary.libraries(sample)
      ) {
132
133
134
        tsvWriter.println(getLine(summary, sample, Some(lib)))
      }
    } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
135
      for (sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get) {
136
137
138
139
140
141
142
143
144
145
        tsvWriter.println(getLine(summary, sample))
      }
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
146
147
    if (libraryLevel) {
      plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
148
    } else plot.width = Some(200 + (summary.samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
149
    plot.title = Some("Aligned reads")
150
151
152
    plot.runLocal()
  }

153
154
155
156
157
158
159
160
  /**
   * Generate a line plot for insertsize
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
161
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
162
163
164
                     prefix: String,
                     summary: Summary,
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
                     sampleId: Option[String] = None,
                     libId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
169
170
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
171
      tsvWriter.println((for (
Peter van 't Hof's avatar
Peter van 't Hof committed
172
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
173
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
174
175
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
178
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
Peter van 't Hof's avatar
Peter van 't Hof committed
179
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
180
181
182
183
184
185
      }
    }

    var map: Map[Int, Map[String, Int]] = Map()

    def fill(sample: String, lib: Option[String]): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
186
187
188
189
190
191
192

      val insertSize = new SummaryValue(List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "insert_size"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "All_Reads.fr_count"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
193
        case (l: List[_], l2: List[_]) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
194
195
196
197
198
199
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
            val count = i._2.toString.toInt
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
202
203
204
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
    if (libraryLevel) {
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
207
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
208
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
209
210
211
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))
Peter van 't Hof's avatar
Peter van 't Hof committed
212
213
214

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
Peter van 't Hof's avatar
Peter van 't Hof committed
215
216
      if (libraryLevel) {
        for (
Peter van 't Hof's avatar
Peter van 't Hof committed
217
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
218
          lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
219
220
        ) tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
      } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
221
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
Peter van 't Hof's avatar
Peter van 't Hof committed
222
223
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
224
225
226
227
228
229
      }
      tsvWriter.println()
    }

    tsvWriter.close()

Peter van 't Hof's avatar
Peter van 't Hof committed
230
    val plot = new LinePlot(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
231
232
233
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
Sander Bollen's avatar
Sander Bollen committed
234
    plot.xlabel = Some("Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
235
    plot.width = Some(1200)
236
    plot.removeZero = true
Sander Bollen's avatar
Sander Bollen committed
237
    plot.title = Some("Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
238
239
    plot.runLocal()
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
240

241
242
243
244
245
246
247
248
  /**
   * Generate a line plot for wgs coverage
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
249
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
250
251
252
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
253
254
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
255
256
257
258
259
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
      tsvWriter.println((for (
Peter van 't Hof's avatar
Peter van 't Hof committed
260
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
261
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
262
263
264
265
266
267
268
269
270
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
      }
    }

271
    var map: Map[Int, Map[String, Long]] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
272
273
274
275
276
277
278
279
280

    def fill(sample: String, lib: Option[String]): Unit = {

      val insertSize = new SummaryValue(List("bammetrics", "stats", "wgs", "histogram", "coverage"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "wgs", "histogram", "count"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
281
        case (l: List[_], l2: List[_]) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
282
283
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
284
            val count = i._2.toString.toLong
Peter van 't Hof's avatar
Peter van 't Hof committed
285
286
287
288
289
290
291
292
293
294
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

    if (libraryLevel) {
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
295
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
296
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
297
298
299
300
301
302
303
304
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
      if (libraryLevel) {
        for (
Peter van 't Hof's avatar
Peter van 't Hof committed
305
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
306
307
308
309
          lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
        ) {
          tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
310
      } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
311
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
Peter van 't Hof's avatar
Peter van 't Hof committed
312
313
314
315
316
317
318
319
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
      }
      tsvWriter.println()
    }

    tsvWriter.close()

Peter van 't Hof's avatar
Peter van 't Hof committed
320
    val plot = new LinePlot(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
321
322
323
324
325
326
327
328
329
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Bases")
    plot.xlabel = Some("Coverage")
    plot.width = Some(1200)
    plot.removeZero = true
    plot.title = Some("Whole genome coverage")
    plot.runLocal()
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412

  /**
    * Generate a line plot for rna coverage
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
      tsvWriter.println((for (
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
      }
    }

    var map: Map[Int, Map[String, Double]] = Map()

    def fill(sample: String, lib: Option[String]): Unit = {

      val insertSize = new SummaryValue(List("bammetrics", "stats", "rna", "histogram", "normalized_position"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "rna", "histogram", "All_Reads.normalized_coverage"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
        case (l: List[_], l2: List[_]) =>
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
            val count = i._2.toString.toDouble
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

    if (libraryLevel) {
      for (
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
      if (libraryLevel) {
        for (
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
          lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
        ) {
          tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
        }
      } else {
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
      }
      tsvWriter.println()
    }

    tsvWriter.close()

    val plot = new LinePlot(null)
    plot.input = tsvFile
    plot.output = pngFile
Peter van 't Hof's avatar
Peter van 't Hof committed
413
414
    plot.xlabel = Some("Reletive position")
    plot.ylabel = Some("Coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
415
416
417
418
419
    plot.width = Some(1200)
    plot.removeZero = true
    plot.title = Some("Rna coverage")
    plot.runLocal()
  }
420
}