FlexiprepReport.scala 7.84 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16 17
package nl.lumc.sasc.biopet.pipelines.flexiprep

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.io.{ File, PrintWriter }
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportBuilder, ReportPage, ReportSection }
22
import nl.lumc.sasc.biopet.utils.rscript.StackedBarPlot
23
import nl.lumc.sasc.biopet.utils.summary.{ Summary, SummaryValue }
24

Peter van 't Hof's avatar
Peter van 't Hof committed
25 26 27 28
class FlexiprepReport(val root: Configurable) extends ReportBuilderExtension {
  val builder = FlexiprepReport
}

29
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
30 31
 * Class to generate a report for [[Flexiprep]]
 *
32 33 34 35 36
 * Created by pjvan_thof on 3/30/15.
 */
object FlexiprepReport extends ReportBuilder {
  val reportName = "Flexiprep"

Peter van 't Hof's avatar
Peter van 't Hof committed
37
  override def pageArgs = Map("multisample" -> false)
38

39 40 41
  /** Index page for a flexiprep report */
  def indexPage = {
    val flexiprepPage = this.flexiprepPage
Peter van 't Hof's avatar
Peter van 't Hof committed
42 43
    ReportPage(List("Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
    )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
44 45 46 47
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"),
        "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp")
      ), Map())
48 49 50 51 52 53
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFront.ssp")
    ) ::: flexiprepPage.sections,
      Map()
    )
  }
54

55 56
  /** Generate a QC report page for 1 single library, sampleId and libId must be defined in the arguments */
  def flexiprepPage: ReportPage = ReportPage(
57
    List(),
58
    List(
59 60 61 62 63 64 65 66 67 68 69
      "Read Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"),
      "Base Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp"),
      fastqcPlotSection("Base quality", "plot_per_base_quality"),
      fastqcPlotSection("Sequence quality", "plot_per_sequence_quality"),
      fastqcPlotSection("Base GC content", "plot_per_base_gc_content"),
      fastqcPlotSection("Sequence GC content", "plot_per_sequence_gc_content"),
      fastqcPlotSection("Base seqeunce content", "plot_per_base_sequence_content"),
      fastqcPlotSection("Duplication", "plot_duplication_levels"),
      fastqcPlotSection("Kmers", "plot_kmer_profiles"),
      fastqcPlotSection("Length distribution", "plot_sequence_length_distribution")
    ),
70 71
    Map()
  )
Peter van 't Hof's avatar
Peter van 't Hof committed
72

73
  protected def fastqcPlotSection(name: String, tag: String) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
74
    name -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFastaqcPlot.ssp", Map("plot" -> tag))
75 76
  }

77 78 79 80 81 82 83 84
  /**
   * Generated a stacked bar plot for reads QC
   * @param outputDir OutputDir for plot
   * @param prefix prefix for tsv and png file
   * @param read Must give "R1" or "R2"
   * @param summary Summary class
   * @param sampleId Default selects all samples, when given plot is limits on given sample
   */
85
  def readSummaryPlot(outputDir: File,
86 87 88 89
                      prefix: String,
                      read: String,
                      summary: Summary,
                      sampleId: Option[String] = None): Unit = {
90 91 92 93 94 95 96 97
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    tsvWriter.println("Library\tAfter_QC\tClipping\tTrimming\tSynced")

    def getLine(summary: Summary, sample: String, lib: String): String = {
      val beforeTotal = new SummaryValue(List("flexiprep", "stats", "seqstat_" + read, "reads", "num_total"),
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong
Peter van 't Hof's avatar
Peter van 't Hof committed
98
      val afterTotal = new SummaryValue(List("flexiprep", "stats", "seqstat_" + read + "_qc", "reads", "num_total"),
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong
      val clippingDiscardedToShort = new SummaryValue(List("flexiprep", "stats", "clipping_" + read, "num_reads_discarded_too_short"),
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong
      val clippingDiscardedToLong = new SummaryValue(List("flexiprep", "stats", "clipping_" + read, "num_reads_discarded_too_long"),
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong
      val trimmingDiscarded = new SummaryValue(List("flexiprep", "stats", "trimming", "num_reads_discarded_" + read),
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong

      val sb = new StringBuffer()
      sb.append(sample + "-" + lib + "\t")
      sb.append(afterTotal + "\t")
      sb.append((clippingDiscardedToShort + clippingDiscardedToLong) + "\t")
      sb.append(trimmingDiscarded + "\t")
      sb.append(beforeTotal - afterTotal - trimmingDiscarded - clippingDiscardedToShort - clippingDiscardedToLong)
      sb.toString
    }

116
    for (
Peter van 't Hof's avatar
Peter van 't Hof committed
117
      sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get;
118 119
      lib <- summary.libraries(sample)
    ) {
120 121 122 123 124 125 126 127 128
      tsvWriter.println(getLine(summary, sample, lib))
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
129
    plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
130
    plot.title = Some("QC summary on " + read + " reads")
131 132 133
    plot.runLocal()
  }

134 135 136 137 138 139 140 141
  /**
   * Generated a stacked bar plot for bases QC
   * @param outputDir OutputDir for plot
   * @param prefix prefix for tsv and png file
   * @param read Must give "R1" or "R2"
   * @param summary Summary class
   * @param sampleId Default selects all samples, when given plot is limits on given sample
   */
142 143 144 145 146 147 148 149 150 151 152 153 154
  def baseSummaryPlot(outputDir: File,
                      prefix: String,
                      read: String,
                      summary: Summary,
                      sampleId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    tsvWriter.println("Library\tAfter_QC\tDiscarded")

    def getLine(summary: Summary, sample: String, lib: String): String = {
      val beforeTotal = new SummaryValue(List("flexiprep", "stats", "seqstat_" + read, "bases", "num_total"),
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong
Peter van 't Hof's avatar
Peter van 't Hof committed
155
      val afterTotal = new SummaryValue(List("flexiprep", "stats", "seqstat_" + read + "_qc", "bases", "num_total"),
156 157 158 159 160 161 162 163 164
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong

      val sb = new StringBuffer()
      sb.append(sample + "-" + lib + "\t")
      sb.append(afterTotal + "\t")
      sb.append(beforeTotal - afterTotal)
      sb.toString
    }

165
    for (
Peter van 't Hof's avatar
Peter van 't Hof committed
166
      sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get;
167 168
      lib <- summary.libraries(sample)
    ) {
169 170 171 172 173 174 175 176 177
      tsvWriter.println(getLine(summary, sample, lib))
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Bases")
178
    plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
179
    plot.title = Some("QC summary on " + read + " bases")
180 181
    plot.runLocal()
  }
182
}