BammetricsReport.scala 12.8 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16 17
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.io.{ File, PrintWriter }
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportBuilder, ReportPage, ReportSection }
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.core.summary.{ Summary, SummaryValue }
23
import nl.lumc.sasc.biopet.utils.rscript.{ StackedBarPlot, LinePlot }
24

25 26 27
class BammetricsReport(val root: Configurable) extends ReportBuilderExtension {
  val builder = BammetricsReport
}
28

29
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
30 31
 * Object to create a report for [[BamMetrics]]
 *
32 33 34
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
35

36
  /** Name of report */
37 38
  val reportName = "Bam Metrics"

39
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
40 41 42
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
43 44
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
45 46 47 48 49 50 51 52 53
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
54

55 56 57 58 59
  /** Generates a page with alignment stats */
  def bamMetricsPage(summary: Summary,
                     sampleId: Option[String],
                     libId: Option[String],
                     metricsTag: String = "bammetrics") = {
60
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
61 62
      summary.getValue(sampleId, libId, "bammetrics", "settings", "amplicon_name"),
      summary.getValue(sampleId, libId, "bammetrics", "settings", "roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
63 64 65 66 67
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
68 69

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
70 71
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
72
        List(),
Peter van 't Hof's avatar
Peter van 't Hof committed
73 74
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> t))),
        Map())),
75 76
      List(
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
77 78
        "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true)),
        "Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", Map("showPlot" -> true))
79
      ),
80
      Map("metricsTag" -> metricsTag)
81 82 83
    )
  }

84 85 86 87 88 89 90 91
  /**
   * Generate a stackbar plot for alignment stats
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
                           summary: Summary,
                           libraryLevel: Boolean = false,
                           sampleId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

    def getLine(summary: Summary, sample: String, lib: Option[String] = None): String = {
      val mapped = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Mapped"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val duplicates = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Duplicates"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val total = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "All"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val secondary = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "NotPrimaryAlignment"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val sb = new StringBuffer()
      if (lib.isDefined) sb.append(sample + "-" + lib.get + "\t") else sb.append(sample + "\t")
      sb.append((mapped - duplicates) + "\t")
      sb.append(duplicates + "\t")
      sb.append((total - mapped - secondary) + "\t")
      sb.append(secondary)
      sb.toString
    }

    if (libraryLevel) {
122
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
123
        sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get;
124 125
        lib <- summary.libraries(sample)
      ) {
126 127 128
        tsvWriter.println(getLine(summary, sample, Some(lib)))
      }
    } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
129
      for (sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get) {
130 131 132 133 134 135 136 137 138 139
        tsvWriter.println(getLine(summary, sample))
      }
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
140 141
    if (libraryLevel) {
      plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
142
    } else plot.width = Some(200 + (summary.samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
143
    plot.title = Some("Aligned reads")
144 145 146
    plot.runLocal()
  }

147 148 149 150 151 152 153 154
  /**
   * Generate a line plot for insertsize
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
155
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
156 157 158 159
                     prefix: String,
                     summary: Summary,
                     libraryLevel: Boolean = false,
                     sampleId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
160 161 162 163
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
164
      tsvWriter.println((for (
Peter van 't Hof's avatar
Peter van 't Hof committed
165
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
166 167 168
        lib <- summary.libraries(sample)
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
169 170 171
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
Peter van 't Hof's avatar
Peter van 't Hof committed
172
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
173 174 175 176 177 178
      }
    }

    var map: Map[Int, Map[String, Int]] = Map()

    def fill(sample: String, lib: Option[String]): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
179 180 181 182 183 184 185

      val insertSize = new SummaryValue(List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "insert_size"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "All_Reads.fr_count"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
186
        case (l: List[_], l2: List[_]) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
187 188 189 190 191 192
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
            val count = i._2.toString.toInt
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
Peter van 't Hof's avatar
Peter van 't Hof committed
193 194 195 196 197
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
198 199
    if (libraryLevel) {
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
200
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
201 202 203 204
        lib <- summary.libraries(sample)
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))
Peter van 't Hof's avatar
Peter van 't Hof committed
205 206 207

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
Peter van 't Hof's avatar
Peter van 't Hof committed
208 209
      if (libraryLevel) {
        for (
Peter van 't Hof's avatar
Peter van 't Hof committed
210
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
211 212 213
          lib <- summary.libraries(sample)
        ) tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
      } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
214
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
Peter van 't Hof's avatar
Peter van 't Hof committed
215 216
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
217 218 219 220 221 222
      }
      tsvWriter.println()
    }

    tsvWriter.close()

Peter van 't Hof's avatar
Peter van 't Hof committed
223
    val plot = new LinePlot(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
224 225 226
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
Sander Bollen's avatar
Sander Bollen committed
227
    plot.xlabel = Some("Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
228
    plot.width = Some(1200)
229
    plot.removeZero = true
Sander Bollen's avatar
Sander Bollen committed
230
    plot.title = Some("Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
231 232
    plot.runLocal()
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
233

234 235 236 237 238 239 240 241
  /**
   * Generate a line plot for wgs coverage
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
242
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
243 244 245 246
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
                       sampleId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
247 248 249 250 251
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) {
      tsvWriter.println((for (
Peter van 't Hof's avatar
Peter van 't Hof committed
252
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
253 254 255 256 257 258 259 260 261 262
        lib <- summary.libraries(sample)
      ) yield s"$sample-$lib")
        .mkString("library\t", "\t", ""))
    } else {
      sampleId match {
        case Some(sample) => tsvWriter.println("\t" + sample)
        case _            => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
      }
    }

263
    var map: Map[Int, Map[String, Long]] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
264 265 266 267 268 269 270 271 272

    def fill(sample: String, lib: Option[String]): Unit = {

      val insertSize = new SummaryValue(List("bammetrics", "stats", "wgs", "histogram", "coverage"),
        summary, Some(sample), lib).value.getOrElse(List())
      val counts = new SummaryValue(List("bammetrics", "stats", "wgs", "histogram", "count"),
        summary, Some(sample), lib).value.getOrElse(List())

      (insertSize, counts) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
273
        case (l: List[_], l2: List[_]) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
274 275
          l.zip(l2).foreach(i => {
            val insertSize = i._1.toString.toInt
276
            val count = i._2.toString.toLong
Peter van 't Hof's avatar
Peter van 't Hof committed
277 278 279 280 281 282 283 284 285 286
            val old = map.getOrElse(insertSize, Map())
            if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
            else map += insertSize -> (old + (sample -> count))
          })
        case _ => throw new IllegalStateException("Must be a list")
      }
    }

    if (libraryLevel) {
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
287
        sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
288 289 290 291 292 293 294 295 296
        lib <- summary.libraries(sample)
      ) fill(sample, Some(lib))
    } else if (sampleId.isDefined) fill(sampleId.get, None)
    else summary.samples.foreach(fill(_, None))

    for ((insertSize, counts) <- map) {
      tsvWriter.print(insertSize)
      if (libraryLevel) {
        for (
Peter van 't Hof's avatar
Peter van 't Hof committed
297
          sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
Peter van 't Hof's avatar
Peter van 't Hof committed
298 299 300
          lib <- summary.libraries(sample)
        ) tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
      } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
301
        for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
Peter van 't Hof's avatar
Peter van 't Hof committed
302 303 304 305 306 307 308 309
          tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
        }
      }
      tsvWriter.println()
    }

    tsvWriter.close()

Peter van 't Hof's avatar
Peter van 't Hof committed
310
    val plot = new LinePlot(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
311 312 313 314 315 316 317 318 319
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Bases")
    plot.xlabel = Some("Coverage")
    plot.width = Some(1200)
    plot.removeZero = true
    plot.title = Some("Whole genome coverage")
    plot.runLocal()
  }
320
}