BammetricsReport.scala 13.6 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, PrintWriter }
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportBuilder, ReportPage, ReportSection }
21
import nl.lumc.sasc.biopet.utils.summary.{ Summary, SummaryValue }
22
import nl.lumc.sasc.biopet.utils.rscript.{ StackedBarPlot, LinePlot }
23

24
class BammetricsReport(val root: Configurable) extends ReportBuilderExtension {
25
  def builder = BammetricsReport
26
}
27

28
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
29 30
 * Object to create a report for [[BamMetrics]]
 *
31 32 33
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
34

35
  /** Name of report */
36 37
  val reportName = "Bam Metrics"

38
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
39 40 41
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
42 43
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
44 45 46 47 48 49 50 51 52
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
53

54 55 56 57 58
  /** Generates a page with alignment stats */
  def bamMetricsPage(summary: Summary,
                     sampleId: Option[String],
                     libId: Option[String],
                     metricsTag: String = "bammetrics") = {
Peter van 't Hof's avatar
Peter van 't Hof committed
59 60 61 62

    val wgsExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "wgs").isDefined
    val rnaExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "rna").isDefined

Peter van 't Hof's avatar
Peter van 't Hof committed
63 64
    val insertsizeMetrics = summary.getValue(sampleId, libId, metricsTag, "stats", "CollectInsertSizeMetrics", "metrics") match {
      case Some(None) => false
Peter van 't Hof's avatar
Peter van 't Hof committed
65 66
      case Some(_)    => true
      case _          => false
Peter van 't Hof's avatar
Peter van 't Hof committed
67
    }
68

69
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
70 71
      summary.getValue(sampleId, libId, metricsTag, "settings", "amplicon_name"),
      summary.getValue(sampleId, libId, metricsTag, "settings", "roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
72 73 74 75 76
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
77 78

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
79 80
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
81
        List(),
82
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
83
        Map())),
84
      List(
85 86
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp")) ++
        (if (insertsizeMetrics) List("Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
Peter van 't Hof's avatar
Peter van 't Hof committed
87 88
        )
        else Nil) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
89 90 91 92 93
          Map("showPlot" -> true)))
        else Nil) ++
        (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
          Map("showPlot" -> true)))
        else Nil),
94
      Map("metricsTag" -> metricsTag)
95 96 97
    )
  }

98 99
  /**
   * Generate a stackbar plot for alignment stats
Peter van 't Hof's avatar
Peter van 't Hof committed
100
   *
101 102 103 104 105 106
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
                           summary: Summary,
                           libraryLevel: Boolean = false,
                           sampleId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

    def getLine(summary: Summary, sample: String, lib: Option[String] = None): String = {
      val mapped = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Mapped"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val duplicates = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Duplicates"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val total = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "All"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val secondary = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "NotPrimaryAlignment"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val sb = new StringBuffer()
      if (lib.isDefined) sb.append(sample + "-" + lib.get + "\t") else sb.append(sample + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
129
      sb.append((mapped - duplicates - secondary) + "\t")
130
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
131
      sb.append((total - mapped) + "\t")
132 133 134 135 136
      sb.append(secondary)
      sb.toString
    }

    if (libraryLevel) {
137
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
138
        sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get;
139 140
        lib <- summary.libraries(sample)
      ) {
141 142 143
        tsvWriter.println(getLine(summary, sample, Some(lib)))
      }
    } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
144
      for (sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get) {
145 146 147 148 149 150 151 152 153 154
        tsvWriter.println(getLine(summary, sample))
      }
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
155 156
    if (libraryLevel) {
      plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
157
    } else plot.width = Some(200 + (summary.samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
158
    plot.title = Some("Aligned reads")
159 160 161
    plot.runLocal()
  }

162 163
  /**
   * Generate a line plot for insertsize
Peter van 't Hof's avatar
Peter van 't Hof committed
164
   *
165 166 167 168 169 170
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
171
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
172 173 174
                     prefix: String,
                     summary: Summary,
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
175 176
                     sampleId: Option[String] = None,
                     libId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
177 178 179
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
180 181 182 183
    def paths(name: String) = Map(
      "insert_size" -> List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "insert_size"),
      name -> List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "All_Reads.fr_count")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
184

Peter van 't Hof's avatar
Peter van 't Hof committed
185 186 187
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
      .map { case (sample, lib) =>
        getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
188
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
189
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "insert_size"), "insert_size")
Peter van 't Hof's avatar
Peter van 't Hof committed
190

Peter van 't Hof's avatar
Peter van 't Hof committed
191
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
192 193 194 195
      xlabel = Some("Insert size"),
      ylabel = Some("Reads"),
      title = Some("Insert size"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
196
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
197

198 199
  /**
   * Generate a line plot for wgs coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
200
   *
201 202 203 204 205 206
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
207
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
208 209 210
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
211 212
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
213 214 215
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
216 217 218 219
    def paths(name: String) = Map(
      "coverage" -> List("bammetrics", "stats", "wgs", "histogram", "coverage"),
      name -> List("bammetrics", "stats", "wgs", "histogram", "count")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
220

Peter van 't Hof's avatar
Peter van 't Hof committed
221 222 223
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
      .map { case (sample, lib) =>
        getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
224
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
225
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "coverage"), "coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
226

Peter van 't Hof's avatar
Peter van 't Hof committed
227
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
228 229 230 231
      xlabel = Some("Coverage"),
      ylabel = Some("Bases"),
      title = Some("Whole genome coverage"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
232
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
233 234

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
235
   * Generate a line plot for rna coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
236
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
237 238 239 240 241 242
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
243 244 245 246 247 248 249 250 251
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
252 253 254 255
    def paths(name: String) = Map(
      "normalized_position" -> List("bammetrics", "stats", "rna", "histogram", "normalized_position"),
      name -> List("bammetrics", "stats", "rna", "histogram", "All_Reads.normalized_coverage")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
256

Peter van 't Hof's avatar
Peter van 't Hof committed
257 258 259
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
      .map { case (sample, lib) =>
        getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
260
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
261
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "normalized_position"), "normalized_position")
Peter van 't Hof's avatar
Peter van 't Hof committed
262

Peter van 't Hof's avatar
Peter van 't Hof committed
263
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
264 265 266 267
      xlabel = Some("Relative position"),
      ylabel = Some("Coverage"),
      title = Some("Rna coverage"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
268
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
269

Peter van 't Hof's avatar
Peter van 't Hof committed
270 271 272 273 274 275 276 277 278 279
  private def getSampleLibraries(summary: Summary,
                                 sampleId: Option[String] = None,
                                 LibId: Option[String] = None,
                                 libraryLevel: Boolean = false): List[(String, Option[String])] = {
    if (LibId.isDefined) require(sampleId.isDefined)
    if (libraryLevel || LibId.isDefined)
      for ((sample, libs) <- summary.libraries.toList; lib <- libs) yield (sample, Some(lib))
    else for ((sample, libs) <- summary.libraries.toList) yield (sample, None)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
280 281 282 283 284 285 286 287 288 289 290 291 292
  def getTableFromSummary(summary: Summary,
                          paths: Map[String, List[String]],
                          sampleId: Option[String] = None,
                          libId: Option[String] = None): Map[String, Array[Any]] = {
    val pathValues: Map[String, Array[Any]] = paths.map { case (key, path) =>
      val value = summary.getValueAsArray(sampleId, libId, path:_*)
      require(value.isDefined, s"Sample: $sampleId, library: $libId on path: '${path.mkString(",")}' does not exist in summary")
      key -> value.get
    }
    require(pathValues.map(_._2.size).toList.distinct == 1, s"Arrays in summary does not have the same number of values, $paths")
    pathValues
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
293 294
  def mergeTables(tables: Array[Map[String, Array[Any]]],
                  mergeColumn: String, defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
      columnKey -> keys.map(x => table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
    require(table.map(_._2.size).toList.distinct == 1, "Not all values has the same number or rows")
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
    table(firstColumn).zipWithIndex.foreach { case (c, i) =>
      writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
    }
    writer.close()
  }
311
}