BammetricsReport.scala 13.6 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, PrintWriter }
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportBuilder, ReportPage, ReportSection }
21
import nl.lumc.sasc.biopet.utils.summary.{ Summary, SummaryValue }
22
import nl.lumc.sasc.biopet.utils.rscript.{ StackedBarPlot, LinePlot }
23

24
class BammetricsReport(val root: Configurable) extends ReportBuilderExtension {
25
  def builder = BammetricsReport
26
}
27

28
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
29 30
 * Object to create a report for [[BamMetrics]]
 *
31 32 33
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
34

35
  /** Name of report */
36 37
  val reportName = "Bam Metrics"

38
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
39 40 41
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
42 43
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
44 45 46 47 48 49 50 51 52
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
53

54 55 56 57 58
  /** Generates a page with alignment stats */
  def bamMetricsPage(summary: Summary,
                     sampleId: Option[String],
                     libId: Option[String],
                     metricsTag: String = "bammetrics") = {
Peter van 't Hof's avatar
Peter van 't Hof committed
59 60 61 62

    val wgsExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "wgs").isDefined
    val rnaExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "rna").isDefined

Peter van 't Hof's avatar
Peter van 't Hof committed
63 64
    val insertsizeMetrics = summary.getValue(sampleId, libId, metricsTag, "stats", "CollectInsertSizeMetrics", "metrics") match {
      case Some(None) => false
Peter van 't Hof's avatar
Peter van 't Hof committed
65 66
      case Some(_)    => true
      case _          => false
Peter van 't Hof's avatar
Peter van 't Hof committed
67
    }
68

69
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
70 71
      summary.getValue(sampleId, libId, metricsTag, "settings", "amplicon_name"),
      summary.getValue(sampleId, libId, metricsTag, "settings", "roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
72 73 74 75 76
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
77 78

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
79 80
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
81
        List(),
82
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
83
        Map())),
84
      List(
85 86
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp")) ++
        (if (insertsizeMetrics) List("Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
Peter van 't Hof's avatar
Peter van 't Hof committed
87 88
        )
        else Nil) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
89 90 91 92 93
          Map("showPlot" -> true)))
        else Nil) ++
        (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
          Map("showPlot" -> true)))
        else Nil),
94
      Map("metricsTag" -> metricsTag)
95 96 97
    )
  }

98 99
  /**
   * Generate a stackbar plot for alignment stats
Peter van 't Hof's avatar
Peter van 't Hof committed
100
   *
101 102 103 104 105 106
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
                           summary: Summary,
                           libraryLevel: Boolean = false,
                           sampleId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

    def getLine(summary: Summary, sample: String, lib: Option[String] = None): String = {
      val mapped = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Mapped"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val duplicates = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "Duplicates"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val total = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "All"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val secondary = new SummaryValue(List("bammetrics", "stats", "biopet_flagstat", "NotPrimaryAlignment"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val sb = new StringBuffer()
      if (lib.isDefined) sb.append(sample + "-" + lib.get + "\t") else sb.append(sample + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
129
      sb.append((mapped - duplicates - secondary) + "\t")
130
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
131
      sb.append((total - mapped) + "\t")
132 133 134 135 136
      sb.append(secondary)
      sb.toString
    }

    if (libraryLevel) {
137
      for (
Peter van 't Hof's avatar
Peter van 't Hof committed
138
        sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get;
139 140
        lib <- summary.libraries(sample)
      ) {
141 142 143
        tsvWriter.println(getLine(summary, sample, Some(lib)))
      }
    } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
144
      for (sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get) {
145 146 147 148 149 150 151 152 153 154
        tsvWriter.println(getLine(summary, sample))
      }
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
155 156
    if (libraryLevel) {
      plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
157
    } else plot.width = Some(200 + (summary.samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
158
    plot.title = Some("Aligned reads")
159 160 161
    plot.runLocal()
  }

162 163
  /**
   * Generate a line plot for insertsize
Peter van 't Hof's avatar
Peter van 't Hof committed
164
   *
165 166 167 168 169 170
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
171
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
172 173 174
                     prefix: String,
                     summary: Summary,
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
175 176
                     sampleId: Option[String] = None,
                     libId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
177 178 179
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
180 181 182 183
    def paths(name: String) = Map(
      "insert_size" -> List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "insert_size"),
      name -> List("bammetrics", "stats", "CollectInsertSizeMetrics", "histogram", "All_Reads.fr_count")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
184

Peter van 't Hof's avatar
Peter van 't Hof committed
185
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
Peter van 't Hof's avatar
Peter van 't Hof committed
186 187 188
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
189
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
190
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "insert_size"), "insert_size")
Peter van 't Hof's avatar
Peter van 't Hof committed
191

Peter van 't Hof's avatar
Peter van 't Hof committed
192
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
193 194 195 196
      xlabel = Some("Insert size"),
      ylabel = Some("Reads"),
      title = Some("Insert size"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
197
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
198

199 200
  /**
   * Generate a line plot for wgs coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
201
   *
202 203 204 205 206 207
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
208
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
209 210 211
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
212 213
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
214 215 216
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
217 218 219 220
    def paths(name: String) = Map(
      "coverage" -> List("bammetrics", "stats", "wgs", "histogram", "coverage"),
      name -> List("bammetrics", "stats", "wgs", "histogram", "count")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
221

Peter van 't Hof's avatar
Peter van 't Hof committed
222
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
Peter van 't Hof's avatar
Peter van 't Hof committed
223 224 225
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
226
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
227
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "coverage"), "coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
228

Peter van 't Hof's avatar
Peter van 't Hof committed
229
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
230 231 232 233
      xlabel = Some("Coverage"),
      ylabel = Some("Bases"),
      title = Some("Whole genome coverage"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
234
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
235 236

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
237
   * Generate a line plot for rna coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
238
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
239 240 241 242 243 244
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
245 246 247 248 249 250 251 252 253
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
                       summary: Summary,
                       libraryLevel: Boolean = false,
                       sampleId: Option[String] = None,
                       libId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

Peter van 't Hof's avatar
Peter van 't Hof committed
254 255 256 257
    def paths(name: String) = Map(
      "normalized_position" -> List("bammetrics", "stats", "rna", "histogram", "normalized_position"),
      name -> List("bammetrics", "stats", "rna", "histogram", "All_Reads.normalized_coverage")
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
258

Peter van 't Hof's avatar
Peter van 't Hof committed
259
    val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
Peter van 't Hof's avatar
Peter van 't Hof committed
260 261 262
      .map {
        case (sample, lib) =>
          getTableFromSummary(summary, paths(lib.map(l => s"$sample-$l").getOrElse(sample)), Some(sample), lib)
Peter van 't Hof's avatar
Peter van 't Hof committed
263
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
264
    writeTableToTsv(tsvFile, mergeTables(tables.toArray, "normalized_position"), "normalized_position")
Peter van 't Hof's avatar
Peter van 't Hof committed
265

Peter van 't Hof's avatar
Peter van 't Hof committed
266
    LinePlot(tsvFile, pngFile,
Peter van 't Hof's avatar
Peter van 't Hof committed
267 268 269 270
      xlabel = Some("Relative position"),
      ylabel = Some("Coverage"),
      title = Some("Rna coverage"),
      removeZero = true).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
271
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
272

Peter van 't Hof's avatar
Peter van 't Hof committed
273 274 275 276 277 278 279 280 281 282
  private def getSampleLibraries(summary: Summary,
                                 sampleId: Option[String] = None,
                                 LibId: Option[String] = None,
                                 libraryLevel: Boolean = false): List[(String, Option[String])] = {
    if (LibId.isDefined) require(sampleId.isDefined)
    if (libraryLevel || LibId.isDefined)
      for ((sample, libs) <- summary.libraries.toList; lib <- libs) yield (sample, Some(lib))
    else for ((sample, libs) <- summary.libraries.toList) yield (sample, None)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
283 284 285 286
  def getTableFromSummary(summary: Summary,
                          paths: Map[String, List[String]],
                          sampleId: Option[String] = None,
                          libId: Option[String] = None): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
287 288 289 290 291
    val pathValues: Map[String, Array[Any]] = paths.map {
      case (key, path) =>
        val value = summary.getValueAsArray(sampleId, libId, path: _*)
        require(value.isDefined, s"Sample: $sampleId, library: $libId on path: '${path.mkString(",")}' does not exist in summary")
        key -> value.get
Peter van 't Hof's avatar
Peter van 't Hof committed
292 293 294 295 296
    }
    require(pathValues.map(_._2.size).toList.distinct == 1, s"Arrays in summary does not have the same number of values, $paths")
    pathValues
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
297 298
  def mergeTables(tables: Array[Map[String, Array[Any]]],
                  mergeColumn: String, defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
299 300 301 302 303 304 305 306 307 308 309
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
      columnKey -> keys.map(x => table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
    require(table.map(_._2.size).toList.distinct == 1, "Not all values has the same number or rows")
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
310 311 312
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
313 314 315
    }
    writer.close()
  }
316
}