BammetricsReport.scala 17 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.io.{ File, PrintWriter }
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportBuilder, ReportPage, ReportSection }
22
import nl.lumc.sasc.biopet.utils.summary.{ Summary, SummaryValue }
23
import nl.lumc.sasc.biopet.utils.rscript.{ StackedBarPlot, LinePlot }
24

25
class BammetricsReport(val root: Configurable) extends ReportBuilderExtension {
26
  def builder = BammetricsReport
27
}
28

29
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
30
31
 * Object to create a report for [[BamMetrics]]
 *
32
33
34
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
35

36
  /** Name of report */
37
38
  val reportName = "Bam Metrics"

39
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
42
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
47
48
49
50
51
52
53
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
54

55
56
57
58
59
  /** Generates a page with alignment stats */
  def bamMetricsPage(summary: Summary,
                     sampleId: Option[String],
                     libId: Option[String],
                     metricsTag: String = "bammetrics") = {
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
62
63

    val wgsExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "wgs").isDefined
    val rnaExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "rna").isDefined

64
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
65
66
      summary.getValue(sampleId, libId, metricsTag, "settings", "amplicon_name"),
      summary.getValue(sampleId, libId, metricsTag, "settings", "roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
69
70
71
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
72
73

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
74
75
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
76
        List(),
77
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
78
        Map())),
79
80
      List(
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
Peter van 't Hof's avatar
Peter van 't Hof committed
81
        "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
84
85
86
87
      ) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
          Map("showPlot" -> true)))
        else Nil) ++
        (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
          Map("showPlot" -> true)))
        else Nil),
88
      Map("metricsTag" -> metricsTag)
89
90
91
    )
  }

92
93
94
95
96
97
98
99
  /**
   * Generate a stackbar plot for alignment stats
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
                           summary: Summary,
                           libraryLevel: Boolean = false,
                           sampleId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

    def getLine(summary: Summary, sample: String, lib: Option[String] = None): String = {
      val mapped = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Mapped"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val duplicates = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Duplicates"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val total = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "All"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val secondary = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "NotPrimaryAlignment"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val sb = new StringBuffer()
      if (lib.isDefined) sb.append(sample + "-" + lib.get + "\t") else sb.append(sample + "\t")
      sb.append((mapped - duplicates) + "\t")
      sb.append(duplicates + "\t")
      sb.append((total - mapped - secondary) + "\t")
      sb.append(secondary)
      sb.toString
    }

    if (libraryLevel) {
130
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
131
        sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get;
132
133
        lib <- summary.libraries(sample)
      ) {
134
135
136
        tsvWriter.println(getLine(summary, sample, Some(lib)))
      }
    } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
137
      for (sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get) {
138
139
140
141
142
143
144
145
146
147
        tsvWriter.println(getLine(summary, sample))
      }
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
148
149
    if (libraryLevel) {
      plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
150
    } else plot.width = Some(200 + (summary.samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
151
    plot.title = Some("Aligned reads")
152
153
154
    plot.runLocal()
  }

155
156
157
158
159
160
161
162
  /**
   * Generate a line plot for insertsize
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
163
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
164
165
166
                     prefix: String,
                     summary: Summary,
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
                     sampleId: Option[String] = None,
                     libId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
169
170
171
172
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
173
      tsvWriter.println((for (
Peter van 't Hof's avatar
Peter van 't Hof committed
174
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
175
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
178
179
180
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
Peter van 't Hof's avatar
Peter van 't Hof committed
181
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
182
183
184
185
186
187
      }
    }

    var map: Map[Int, Map[String, Int]] = Map()

    def fill(sample: String, lib: Option[String]): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
190
191
192
193
194

      val insertSize = new SummaryValue(List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "insert_size"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "All_Reads.fr_count"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
195
        case (l: List[_], l2: List[_]) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
198
199
200
201
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
            val count = i._2.toString.toInt
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
Peter van 't Hof's avatar
Peter van 't Hof committed
202
203
204
205
206
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
207
208
    if (libraryLevel) {
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
209
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
210
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
211
212
213
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))
Peter van 't Hof's avatar
Peter van 't Hof committed
214
215
216

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
Peter van 't Hof's avatar
Peter van 't Hof committed
217
218
      if (libraryLevel) {
        for (
Peter van 't Hof's avatar
Peter van 't Hof committed
219
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
220
          lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
221
222
        ) tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
      } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
223
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
Peter van 't Hof's avatar
Peter van 't Hof committed
224
225
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
226
227
228
229
230
231
      }
      tsvWriter.println()
    }

    tsvWriter.close()

Peter van 't Hof's avatar
Peter van 't Hof committed
232
    val plot = new LinePlot(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
233
234
235
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
Sander Bollen's avatar
Sander Bollen committed
236
    plot.xlabel = Some("Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
237
    plot.width = Some(1200)
238
    plot.removeZero = true
Sander Bollen's avatar
Sander Bollen committed
239
    plot.title = Some("Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
240
241
    plot.runLocal()
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
242

243
244
245
246
247
248
249
250
  /**
   * Generate a line plot for wgs coverage
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
251
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
252
253
254
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
255
256
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
257
258
259
260
261
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
      tsvWriter.println((for (
Peter van 't Hof's avatar
Peter van 't Hof committed
262
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
263
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
264
265
266
267
268
269
270
271
272
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
      }
    }

273
    var map: Map[Int, Map[String, Long]] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
274
275
276
277
278
279
280
281
282

    def fill(sample: String, lib: Option[String]): Unit = {

      val insertSize = new SummaryValue(List("bammetrics", "stats", "wgs", "histogram", "coverage"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "wgs", "histogram", "count"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
283
        case (l: List[_], l2: List[_]) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
284
285
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
286
            val count = i._2.toString.toLong
Peter van 't Hof's avatar
Peter van 't Hof committed
287
288
289
290
291
292
293
294
295
296
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

    if (libraryLevel) {
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
297
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
298
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
299
300
301
302
303
304
305
306
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
      if (libraryLevel) {
        for (
Peter van 't Hof's avatar
Peter van 't Hof committed
307
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
308
309
310
311
          lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
        ) {
          tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
312
      } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
313
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
Peter van 't Hof's avatar
Peter van 't Hof committed
314
315
316
317
318
319
320
321
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
      }
      tsvWriter.println()
    }

    tsvWriter.close()

Peter van 't Hof's avatar
Peter van 't Hof committed
322
    val plot = new LinePlot(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
323
324
325
326
327
328
329
330
331
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Bases")
    plot.xlabel = Some("Coverage")
    plot.width = Some(1200)
    plot.removeZero = true
    plot.title = Some("Whole genome coverage")
    plot.runLocal()
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
332
333

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
334
335
336
337
338
339
340
   * Generate a line plot for rna coverage
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
      tsvWriter.println((for (
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
      }
    }

    var map: Map[Int, Map[String, Double]] = Map()

    def fill(sample: String, lib: Option[String]): Unit = {

      val insertSize = new SummaryValue(List("bammetrics", "stats", "rna", "histogram", "normalized_position"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "rna", "histogram", "All_Reads.normalized_coverage"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
        case (l: List[_], l2: List[_]) =>
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
            val count = i._2.toString.toDouble
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

    if (libraryLevel) {
      for (
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
      if (libraryLevel) {
        for (
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
          lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
        ) {
          tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
        }
      } else {
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
      }
      tsvWriter.println()
    }

    tsvWriter.close()

    val plot = new LinePlot(null)
    plot.input = tsvFile
    plot.output = pngFile
Peter van 't Hof's avatar
Peter van 't Hof committed
415
416
    plot.xlabel = Some("Reletive position")
    plot.ylabel = Some("Coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
417
418
419
420
421
    plot.width = Some(1200)
    plot.removeZero = true
    plot.title = Some("Rna coverage")
    plot.runLocal()
  }
422
}