BammetricsReport.scala 12.8 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.io.{ File, PrintWriter }
19

20
import nl.lumc.sasc.biopet.core.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportBuilder, ReportPage, ReportSection }
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.core.summary.{ Summary, SummaryValue }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.rscript.{ StackedBarPlot, LinePlot }
24

25
26
27
class BammetricsReport(val root: Configurable) extends ReportBuilderExtension {
  val builder = BammetricsReport
}
28

29
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
30
31
 * Object to create a report for [[BamMetrics]]
 *
32
33
34
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
35

36
  /** Name of report */
37
38
  val reportName = "Bam Metrics"

39
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
42
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
47
48
49
50
51
52
53
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
54

55
56
57
58
59
  /** Generates a page with alignment stats */
  def bamMetricsPage(summary: Summary,
                     sampleId: Option[String],
                     libId: Option[String],
                     metricsTag: String = "bammetrics") = {
60
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
61
62
      summary.getValue(sampleId, libId, "bammetrics", "settings", "amplicon_name"),
      summary.getValue(sampleId, libId, "bammetrics", "settings", "roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
65
66
67
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
68
69

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
72
        List(),
Peter van 't Hof's avatar
Peter van 't Hof committed
73
74
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> t))),
        Map())),
75
76
      List(
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
77
78
        "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true)),
        "Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", Map("showPlot" -> true))
79
      ),
80
      Map("metricsTag" -> metricsTag)
81
82
83
    )
  }

84
85
86
87
88
89
90
91
  /**
   * Generate a stackbar plot for alignment stats
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
                           summary: Summary,
                           libraryLevel: Boolean = false,
                           sampleId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

    def getLine(summary: Summary, sample: String, lib: Option[String] = None): String = {
      val mapped = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Mapped"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val duplicates = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Duplicates"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val total = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "All"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val secondary = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "NotPrimaryAlignment"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val sb = new StringBuffer()
      if (lib.isDefined) sb.append(sample + "-" + lib.get + "\t") else sb.append(sample + "\t")
      sb.append((mapped - duplicates) + "\t")
      sb.append(duplicates + "\t")
      sb.append((total - mapped - secondary) + "\t")
      sb.append(secondary)
      sb.toString
    }

    if (libraryLevel) {
122
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
123
        sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get;
124
125
        lib <- summary.libraries(sample)
      ) {
126
127
128
        tsvWriter.println(getLine(summary, sample, Some(lib)))
      }
    } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
129
      for (sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get) {
130
131
132
133
134
135
136
137
138
139
        tsvWriter.println(getLine(summary, sample))
      }
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
140
141
    if (libraryLevel) {
      plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
142
    } else plot.width = Some(200 + (summary.samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
143
    plot.title = Some("Aligned reads")
144
145
146
    plot.runLocal()
  }

147
148
149
150
151
152
153
154
  /**
   * Generate a line plot for insertsize
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
155
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
156
157
158
159
                     prefix: String,
                     summary: Summary,
                     libraryLevel: Boolean = false,
                     sampleId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
162
163
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
164
      tsvWriter.println((for (
Peter van 't Hof's avatar
Peter van 't Hof committed
165
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
166
167
168
        lib <- summary.libraries(sample)
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
169
170
171
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
Peter van 't Hof's avatar
Peter van 't Hof committed
172
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
173
174
175
176
177
178
      }
    }

    var map: Map[Int, Map[String, Int]] = Map()

    def fill(sample: String, lib: Option[String]): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
181
182
183
184
185

      val insertSize = new SummaryValue(List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "insert_size"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "All_Reads.fr_count"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
186
        case (l: List[_], l2: List[_]) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
187
188
189
190
191
192
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
            val count = i._2.toString.toInt
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
Peter van 't Hof's avatar
Peter van 't Hof committed
193
194
195
196
197
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
198
199
    if (libraryLevel) {
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
200
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
203
204
        lib <- summary.libraries(sample)
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
207

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
Peter van 't Hof's avatar
Peter van 't Hof committed
208
209
      if (libraryLevel) {
        for (
Peter van 't Hof's avatar
Peter van 't Hof committed
210
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
211
212
213
          lib <- summary.libraries(sample)
        ) tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
      } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
214
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
Peter van 't Hof's avatar
Peter van 't Hof committed
215
216
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
217
218
219
220
221
222
      }
      tsvWriter.println()
    }

    tsvWriter.close()

Peter van 't Hof's avatar
Peter van 't Hof committed
223
    val plot = new LinePlot(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
224
225
226
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
Sander Bollen's avatar
Sander Bollen committed
227
    plot.xlabel = Some("Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
228
    plot.width = Some(1200)
229
    plot.removeZero = true
Sander Bollen's avatar
Sander Bollen committed
230
    plot.title = Some("Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
231
232
    plot.runLocal()
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
233

234
235
236
237
238
239
240
241
  /**
   * Generate a line plot for wgs coverage
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
242
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
243
244
245
246
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
                       sampleId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
247
248
249
250
251
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
      tsvWriter.println((for (
Peter van 't Hof's avatar
Peter van 't Hof committed
252
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
253
254
255
256
257
258
259
260
261
262
        lib <- summary.libraries(sample)
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
      }
    }

263
    var map: Map[Int, Map[String, Long]] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
264
265
266
267
268
269
270
271
272

    def fill(sample: String, lib: Option[String]): Unit = {

      val insertSize = new SummaryValue(List("bammetrics", "stats", "wgs", "histogram", "coverage"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "wgs", "histogram", "count"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
273
        case (l: List[_], l2: List[_]) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
274
275
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
276
            val count = i._2.toString.toLong
Peter van 't Hof's avatar
Peter van 't Hof committed
277
278
279
280
281
282
283
284
285
286
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

    if (libraryLevel) {
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
287
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
288
289
290
291
292
293
294
295
296
        lib <- summary.libraries(sample)
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
      if (libraryLevel) {
        for (
Peter van 't Hof's avatar
Peter van 't Hof committed
297
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
298
299
300
          lib <- summary.libraries(sample)
        ) tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
      } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
301
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
Peter van 't Hof's avatar
Peter van 't Hof committed
302
303
304
305
306
307
308
309
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
      }
      tsvWriter.println()
    }

    tsvWriter.close()

Peter van 't Hof's avatar
Peter van 't Hof committed
310
    val plot = new LinePlot(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
311
312
313
314
315
316
317
318
319
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Bases")
    plot.xlabel = Some("Coverage")
    plot.width = Some(1200)
    plot.removeZero = true
    plot.title = Some("Whole genome coverage")
    plot.runLocal()
  }
320
}