FlexiprepReport.scala 7.11 KB
Newer Older
1 2
package nl.lumc.sasc.biopet.pipelines.flexiprep

3
import java.io.{ PrintWriter, File }
4

Peter van 't Hof's avatar
Peter van 't Hof committed
5 6
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportSection, ReportPage, ReportBuilder }
7
import nl.lumc.sasc.biopet.core.summary.{ SummaryValue, Summary }
8
import nl.lumc.sasc.biopet.extensions.rscript.StackedBarPlot
9

Peter van 't Hof's avatar
Peter van 't Hof committed
10 11 12 13
class FlexiprepReport(val root: Configurable) extends ReportBuilderExtension {
  val builder = FlexiprepReport
}

14 15 16 17 18 19
/**
 * Created by pjvan_thof on 3/30/15.
 */
object FlexiprepReport extends ReportBuilder {
  val reportName = "Flexiprep"

Peter van 't Hof's avatar
Peter van 't Hof committed
20 21
  override def pageArgs = Map("multisample" -> false)

22 23 24
  /** Index page for a flexiprep report */
  def indexPage = {
    val flexiprepPage = this.flexiprepPage
Peter van 't Hof's avatar
Peter van 't Hof committed
25 26 27 28 29 30 31
    ReportPage(List("Versions" -> ReportPage(List(), List((
      "Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      ))), Map()),
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"),
        "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp")
      ), Map())
32 33 34 35 36 37
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFront.ssp")
    ) ::: flexiprepPage.sections,
      Map()
    )
  }
38

39 40
  /** Generate a QC report page for 1 single library, sampleId and libId must be defined in the arguments */
  def flexiprepPage: ReportPage = ReportPage(
41
    List(),
42
    List(
43 44 45 46 47 48 49 50 51 52 53
      "Read Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"),
      "Base Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp"),
      fastqcPlotSection("Base quality", "plot_per_base_quality"),
      fastqcPlotSection("Sequence quality", "plot_per_sequence_quality"),
      fastqcPlotSection("Base GC content", "plot_per_base_gc_content"),
      fastqcPlotSection("Sequence GC content", "plot_per_sequence_gc_content"),
      fastqcPlotSection("Base seqeunce content", "plot_per_base_sequence_content"),
      fastqcPlotSection("Duplication", "plot_duplication_levels"),
      fastqcPlotSection("Kmers", "plot_kmer_profiles"),
      fastqcPlotSection("Length distribution", "plot_sequence_length_distribution")
    ),
54 55
    Map()
  )
Peter van 't Hof's avatar
Peter van 't Hof committed
56

57
  protected def fastqcPlotSection(name: String, tag: String) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
    name -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFastaqcPlot.ssp", Map("plot" -> tag))
59 60
  }

61 62 63 64 65 66 67 68
  /**
   * Generated a stacked bar plot for reads QC
   * @param outputDir OutputDir for plot
   * @param prefix prefix for tsv and png file
   * @param read Must give "R1" or "R2"
   * @param summary Summary class
   * @param sampleId Default selects all samples, when given plot is limits on given sample
   */
69
  def readSummaryPlot(outputDir: File,
70 71 72 73
                      prefix: String,
                      read: String,
                      summary: Summary,
                      sampleId: Option[String] = None): Unit = {
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    tsvWriter.println("Library\tAfter_QC\tClipping\tTrimming\tSynced")

    def getLine(summary: Summary, sample: String, lib: String): String = {
      val beforeTotal = new SummaryValue(List("flexiprep", "stats", "seqstat_" + read, "reads", "num_total"),
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong
      val afterTotal = new SummaryValue(List("flexiprep", "stats", "seqstat_" + read + "_after", "reads", "num_total"),
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong
      val clippingDiscardedToShort = new SummaryValue(List("flexiprep", "stats", "clipping_" + read, "num_reads_discarded_too_short"),
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong
      val clippingDiscardedToLong = new SummaryValue(List("flexiprep", "stats", "clipping_" + read, "num_reads_discarded_too_long"),
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong
      val trimmingDiscarded = new SummaryValue(List("flexiprep", "stats", "trimming", "num_reads_discarded_" + read),
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong

      val sb = new StringBuffer()
      sb.append(sample + "-" + lib + "\t")
      sb.append(afterTotal + "\t")
      sb.append((clippingDiscardedToShort + clippingDiscardedToLong) + "\t")
      sb.append(trimmingDiscarded + "\t")
      sb.append(beforeTotal - afterTotal - trimmingDiscarded - clippingDiscardedToShort - clippingDiscardedToLong)
      sb.toString
    }

100 101 102 103
    for (
      sample <- summary.samples if (sampleId.isEmpty || sample == sampleId.get);
      lib <- summary.libraries(sample)
    ) {
104 105 106 107 108 109 110 111 112
      tsvWriter.println(getLine(summary, sample, lib))
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
113
    plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
114
    plot.title = Some("QC summary on " + read + " reads")
115 116 117
    plot.runLocal()
  }

118 119 120 121 122 123 124 125
  /**
   * Generated a stacked bar plot for bases QC
   * @param outputDir OutputDir for plot
   * @param prefix prefix for tsv and png file
   * @param read Must give "R1" or "R2"
   * @param summary Summary class
   * @param sampleId Default selects all samples, when given plot is limits on given sample
   */
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
  def baseSummaryPlot(outputDir: File,
                      prefix: String,
                      read: String,
                      summary: Summary,
                      sampleId: Option[String] = None): Unit = {
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    tsvWriter.println("Library\tAfter_QC\tDiscarded")

    def getLine(summary: Summary, sample: String, lib: String): String = {
      val beforeTotal = new SummaryValue(List("flexiprep", "stats", "seqstat_" + read, "bases", "num_total"),
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong
      val afterTotal = new SummaryValue(List("flexiprep", "stats", "seqstat_" + read + "_after", "bases", "num_total"),
        summary, Some(sample), Some(lib)).value.getOrElse(0).toString.toLong

      val sb = new StringBuffer()
      sb.append(sample + "-" + lib + "\t")
      sb.append(afterTotal + "\t")
      sb.append(beforeTotal - afterTotal)
      sb.toString
    }

149 150 151 152
    for (
      sample <- summary.samples if (sampleId.isEmpty || sample == sampleId.get);
      lib <- summary.libraries(sample)
    ) {
153 154 155 156 157 158 159 160 161
      tsvWriter.println(getLine(summary, sample, lib))
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Bases")
162
    plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
163
    plot.title = Some("QC summary on " + read + " bases")
164 165
    plot.runLocal()
  }
166
}