ShivaReport.scala 9.74 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.shiva

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.io.{ File, PrintWriter }
19

20
import nl.lumc.sasc.biopet.core.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.report._
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.core.summary.{ Summary, SummaryValue }
23
import nl.lumc.sasc.biopet.extensions.rscript.StackedBarPlot
24
import nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport
25
26
27
import nl.lumc.sasc.biopet.pipelines.flexiprep.FlexiprepReport

/**
28
29
 * With this extension the report is executed within a pipeline
 *
30
31
 * Created by pjvan_thof on 3/30/15.
 */
32
33
34
35
class ShivaReport(val root: Configurable) extends ReportBuilderExtension {
  val builder = ShivaReport
}

36
/** Object for report generation for Shiva pipeline */
37
object ShivaReport extends MultisampleReportBuilder {
38

39
  /** Root page for the shiva report */
40
  def indexPage = {
41
    val regions = regionsPage
42
    ReportPage(
43
      List("Samples" -> generateSamplesPage(pageArgs)) ++
44
        (if (regions.isDefined) Map(regions.get) else Map()) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
47
48
        Map("Reference" -> ReportPage(List(), List(
          "Reference" -> ReportSection("/nl/lumc/sasc/biopet/core/report/reference.ssp", Map("pipeline" -> "shiva"))
        ), Map()),
          "Files" -> filesPage,
Peter van 't Hof's avatar
Peter van 't Hof committed
49
50
51
          "Versions" -> ReportPage(List(), List(
            "Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp")
          ), Map())
52
        ),
53
54
55
      List(
        "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp"),
        "Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
56
          Map("showPlot" -> true, "showTable" -> false)),
57
        "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
58
          Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)
59
        ),
60
        "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
61
          Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)),
Peter van 't Hof's avatar
Peter van 't Hof committed
62
63
        "Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
          Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)),
64
        "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
65
          Map("showPlot" -> true, "showTable" -> false)),
66
        "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
67
          Map("showPlot" -> true, "showTable" -> false))
68
69
70
71
      ),
      pageArgs
    )
  }
72

73
74
75
  //TODO: Add variants per target
  /** Generate a page with all target coverage stats */
  def regionsPage: Option[(String, ReportPage)] = {
76
77
78
79
80
81
    val roi = summary.getValue("shiva", "settings", "regions_of_interest")
    val amplicon = summary.getValue("shiva", "settings", "amplicon_bed")

    var regionPages: Map[String, ReportPage] = Map()

    def createPage(name: String, amplicon: Boolean = false): ReportPage = {
Peter van 't Hof's avatar
Peter van 't Hof committed
82
      ReportPage(
83
        List(),
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
86
        List("Coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp")),
        Map("target" -> name)
      )
87
88
89
    }

    amplicon match {
90
      case Some(x: String) => regionPages += (x + " (Amplicon)") -> createPage(x, amplicon = true)
91
92
93
94
      case _               =>
    }

    roi match {
95
96
      case Some(x: String)  => regionPages += x -> createPage(x, amplicon = false)
      case Some(x: List[_]) => x.foreach(x => regionPages += x.toString -> createPage(x.toString, amplicon = false))
97
98
99
      case _                =>
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
100
    if (regionPages.nonEmpty) Some("Regions" -> ReportPage(
101
      List(),
102
      regionPages.map(p => p._1 -> ReportSection(
Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
        "/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp",
        Map("target" -> p._1.stripSuffix(" (Amplicon)"))
105
      )).toList.sortBy(_._1),
Peter van 't Hof's avatar
Peter van 't Hof committed
106
      Map())
107
108
    )
    else None
109
110
  }

111
112
  /** Files page, can be used general or at sample level */
  def filesPage: ReportPage = ReportPage(List(), List(
113
114
    "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"),
    "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"),
115
116
117
    "Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)),
    "Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp",
      Map("pipelineName" -> "shiva", "fileTag" -> "preProcessBam")),
118
119
120
    "VCF files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/outputVcfFiles.ssp", Map("sampleId" -> None))
  ), Map())

121
122
  /** Single sample page */
  def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = {
123
    ReportPage(List(
124
      "Libraries" -> generateLibraryPage(args),
125
      "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), None),
126
      "Files" -> filesPage
127
    ), List(
128
129
      "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp",
        if (summary.libraries(sampleId).size > 1) Map("showPlot" -> true) else Map()),
130
      "Preprocessing" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true)),
Peter van 't Hof's avatar
Peter van 't Hof committed
131
      "Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp"),
132
133
      "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"),
      "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp")
134
135
136
    ), args)
  }

137
138
  /** Library page */
  def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = {
139
    ReportPage(List(
140
      "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)),
141
142
143
      "QC" -> FlexiprepReport.flexiprepPage
    ), List(
      "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
144
145
146
      "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"),
      "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp")
    ), args)
147
148
  }

149
  /** Name of the report */
150
  def reportName = "Shiva Report"
151

152
153
154
155
156
157
158
159
  /**
   * Generate a stackbar plot for found variants
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
160
  def variantSummaryPlot(outputDir: File,
161
162
163
164
                         prefix: String,
                         summary: Summary,
                         libraryLevel: Boolean = false,
                         sampleId: Option[String] = None): Unit = {
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tHomVar\tHet\tHomRef\tNoCall")

    def getLine(summary: Summary, sample: String, lib: Option[String] = None): String = {
      val homVar = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "HomVar"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val homRef = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "HomRef"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val noCall = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "NoCall"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val het = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "Het"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val sb = new StringBuffer()
      if (lib.isDefined) sb.append(sample + "-" + lib.get + "\t") else sb.append(sample + "\t")
      sb.append(homVar + "\t")
      sb.append(het + "\t")
      sb.append(homRef + "\t")
      sb.append(noCall)
      sb.toString
    }

    if (libraryLevel) {
190
      for (
191
        sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get;
192
193
        lib <- summary.libraries(sample)
      ) {
194
195
196
        tsvWriter.println(getLine(summary, sample, Some(lib)))
      }
    } else {
197
      for (sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get) {
198
199
200
201
202
203
204
205
206
207
        tsvWriter.println(getLine(summary, sample))
      }
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("VCF records")
208
209
    if (libraryLevel) {
      plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
210
    } else plot.width = Some(200 + (summary.samples.count(s => sampleId.getOrElse(s) == s) * 10))
211
212
    plot.runLocal()
  }
213
}