BammetricsReport.scala 16.6 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.io.{ File, PrintWriter }
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportBuilder, ReportPage, ReportSection }
22
import nl.lumc.sasc.biopet.utils.summary.{ Summary, SummaryValue }
23
import nl.lumc.sasc.biopet.utils.rscript.{ StackedBarPlot, LinePlot }
24

25
class BammetricsReport(val root: Configurable) extends ReportBuilderExtension {
26
  def builder = BammetricsReport
27
}
28

29
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
30
31
 * Object to create a report for [[BamMetrics]]
 *
32
33
34
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
35

36
  /** Name of report */
37
38
  val reportName = "Bam Metrics"

39
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
42
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
47
48
49
50
51
52
53
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
54

55
56
57
58
59
  /** Generates a page with alignment stats */
  def bamMetricsPage(summary: Summary,
                     sampleId: Option[String],
                     libId: Option[String],
                     metricsTag: String = "bammetrics") = {
60
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
61
62
      summary.getValue(sampleId, libId, "bammetrics", "settings", "amplicon_name"),
      summary.getValue(sampleId, libId, "bammetrics", "settings", "roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
65
66
67
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
68
69

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
72
        List(),
73
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
74
        Map())),
75
76
      List(
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
77
78
        "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true)),
        "Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", Map("showPlot" -> true))
79
      ),
80
      Map("metricsTag" -> metricsTag)
81
82
83
    )
  }

84
85
86
87
88
89
90
91
  /**
   * Generate a stackbar plot for alignment stats
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
                           summary: Summary,
                           libraryLevel: Boolean = false,
                           sampleId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

    def getLine(summary: Summary, sample: String, lib: Option[String] = None): String = {
      val mapped = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Mapped"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val duplicates = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Duplicates"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val total = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "All"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val secondary = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "NotPrimaryAlignment"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val sb = new StringBuffer()
      if (lib.isDefined) sb.append(sample + "-" + lib.get + "\t") else sb.append(sample + "\t")
      sb.append((mapped - duplicates) + "\t")
      sb.append(duplicates + "\t")
      sb.append((total - mapped - secondary) + "\t")
      sb.append(secondary)
      sb.toString
    }

    if (libraryLevel) {
122
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
123
        sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get;
124
125
        lib <- summary.libraries(sample)
      ) {
126
127
128
        tsvWriter.println(getLine(summary, sample, Some(lib)))
      }
    } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
129
      for (sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get) {
130
131
132
133
134
135
136
137
138
139
        tsvWriter.println(getLine(summary, sample))
      }
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
140
141
    if (libraryLevel) {
      plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
142
    } else plot.width = Some(200 + (summary.samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
143
    plot.title = Some("Aligned reads")
144
145
146
    plot.runLocal()
  }

147
148
149
150
151
152
153
154
  /**
   * Generate a line plot for insertsize
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
155
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
156
157
158
                     prefix: String,
                     summary: Summary,
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
159
160
                     sampleId: Option[String] = None,
                     libId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
161
162
163
164
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
165
      tsvWriter.println((for (
Peter van 't Hof's avatar
Peter van 't Hof committed
166
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
167
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
168
169
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
170
171
172
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
Peter van 't Hof's avatar
Peter van 't Hof committed
173
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
174
175
176
177
178
179
      }
    }

    var map: Map[Int, Map[String, Int]] = Map()

    def fill(sample: String, lib: Option[String]): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
180
181
182
183
184
185
186

      val insertSize = new SummaryValue(List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "insert_size"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "All_Reads.fr_count"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
187
        case (l: List[_], l2: List[_]) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
190
191
192
193
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
            val count = i._2.toString.toInt
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
Peter van 't Hof's avatar
Peter van 't Hof committed
194
195
196
197
198
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
199
200
    if (libraryLevel) {
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
201
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
202
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
203
204
205
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))
Peter van 't Hof's avatar
Peter van 't Hof committed
206
207
208

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
Peter van 't Hof's avatar
Peter van 't Hof committed
209
210
      if (libraryLevel) {
        for (
Peter van 't Hof's avatar
Peter van 't Hof committed
211
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
212
          lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
213
214
        ) tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
      } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
215
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
Peter van 't Hof's avatar
Peter van 't Hof committed
216
217
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
218
219
220
221
222
223
      }
      tsvWriter.println()
    }

    tsvWriter.close()

Peter van 't Hof's avatar
Peter van 't Hof committed
224
    val plot = new LinePlot(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
225
226
227
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
Sander Bollen's avatar
Sander Bollen committed
228
    plot.xlabel = Some("Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
229
    plot.width = Some(1200)
230
    plot.removeZero = true
Sander Bollen's avatar
Sander Bollen committed
231
    plot.title = Some("Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
232
233
    plot.runLocal()
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
234

235
236
237
238
239
240
241
242
  /**
   * Generate a line plot for wgs coverage
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
243
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
244
245
246
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
247
248
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
249
250
251
252
253
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
      tsvWriter.println((for (
Peter van 't Hof's avatar
Peter van 't Hof committed
254
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
255
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
256
257
258
259
260
261
262
263
264
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
      }
    }

265
    var map: Map[Int, Map[String, Long]] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
268
269
270
271
272
273
274

    def fill(sample: String, lib: Option[String]): Unit = {

      val insertSize = new SummaryValue(List("bammetrics", "stats", "wgs", "histogram", "coverage"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "wgs", "histogram", "count"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
275
        case (l: List[_], l2: List[_]) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
276
277
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
278
            val count = i._2.toString.toLong
Peter van 't Hof's avatar
Peter van 't Hof committed
279
280
281
282
283
284
285
286
287
288
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

    if (libraryLevel) {
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
289
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
290
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
Peter van 't Hof's avatar
Peter van 't Hof committed
291
292
293
294
295
296
297
298
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
      if (libraryLevel) {
        for (
Peter van 't Hof's avatar
Peter van 't Hof committed
299
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
300
301
302
303
          lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
        ) {
          tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
304
      } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
305
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
Peter van 't Hof's avatar
Peter van 't Hof committed
306
307
308
309
310
311
312
313
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
      }
      tsvWriter.println()
    }

    tsvWriter.close()

Peter van 't Hof's avatar
Peter van 't Hof committed
314
    val plot = new LinePlot(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
315
316
317
318
319
320
321
322
323
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Bases")
    plot.xlabel = Some("Coverage")
    plot.width = Some(1200)
    plot.removeZero = true
    plot.title = Some("Whole genome coverage")
    plot.runLocal()
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413

  /**
    * Generate a line plot for rna coverage
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
      tsvWriter.println((for (
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
      }
    }

    var map: Map[Int, Map[String, Double]] = Map()

    def fill(sample: String, lib: Option[String]): Unit = {

      val insertSize = new SummaryValue(List("bammetrics", "stats", "rna", "histogram", "normalized_position"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "rna", "histogram", "All_Reads.normalized_coverage"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
        case (l: List[_], l2: List[_]) =>
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
            val count = i._2.toString.toDouble
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

    if (libraryLevel) {
      for (
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
        lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
      if (libraryLevel) {
        for (
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
          lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
        ) {
          tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
        }
      } else {
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
      }
      tsvWriter.println()
    }

    tsvWriter.close()

    val plot = new LinePlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reletive position")
    plot.xlabel = Some("Coverage")
    plot.width = Some(1200)
    plot.removeZero = true
    plot.title = Some("Rna coverage")
    plot.runLocal()
  }
414
}