ShivaReport.scala 9.99 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.shiva

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.io.{ File, PrintWriter }
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.report._
22
import nl.lumc.sasc.biopet.utils.summary.{ Summary, SummaryValue }
23
import nl.lumc.sasc.biopet.utils.rscript.StackedBarPlot
24
import nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport
25
26
27
import nl.lumc.sasc.biopet.pipelines.flexiprep.FlexiprepReport

/**
28
29
 * With this extension the report is executed within a pipeline
 *
30
31
 * Created by pjvan_thof on 3/30/15.
 */
32
33
34
35
class ShivaReport(val root: Configurable) extends ReportBuilderExtension {
  val builder = ShivaReport
}

36
/** Object for report generation for Shiva pipeline */
37
object ShivaReport extends MultisampleReportBuilder {
38

39
40
41
42
43
  def variantcallingExecuted = summary.getValue("shiva", "settings", "multisample_variantcalling") match {
    case true => true
    case _ => false
  }

44
  /** Root page for the shiva report */
45
  def indexPage = {
46
    val regions = regionsPage
47
    ReportPage(
48
      List("Samples" -> generateSamplesPage(pageArgs)) ++
49
        (if (regions.isDefined) Map(regions.get) else Map()) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
50
51
52
53
        Map("Reference" -> ReportPage(List(), List(
          "Reference" -> ReportSection("/nl/lumc/sasc/biopet/core/report/reference.ssp", Map("pipeline" -> "shiva"))
        ), Map()),
          "Files" -> filesPage,
Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
56
          "Versions" -> ReportPage(List(), List(
            "Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp")
          ), Map())
57
        ),
58
      List(
59
60
61
62
        "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp")) ++
        (if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp",
          Map("showPlot" -> true, "showTable" -> false))) else Nil) ++
        List("Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
63
          Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)
64
        ),
65
        "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
66
          Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)),
Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
        "Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
          Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)),
69
        "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
70
          Map("showPlot" -> true, "showTable" -> false)),
71
        "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
72
          Map("showPlot" -> true, "showTable" -> false))
73
74
75
76
      ),
      pageArgs
    )
  }
77

78
79
80
  //TODO: Add variants per target
  /** Generate a page with all target coverage stats */
  def regionsPage: Option[(String, ReportPage)] = {
81
82
83
84
85
86
    val roi = summary.getValue("shiva", "settings", "regions_of_interest")
    val amplicon = summary.getValue("shiva", "settings", "amplicon_bed")

    var regionPages: Map[String, ReportPage] = Map()

    def createPage(name: String, amplicon: Boolean = false): ReportPage = {
Peter van 't Hof's avatar
Peter van 't Hof committed
87
      ReportPage(
88
        List(),
Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
91
        List("Coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp")),
        Map("target" -> name)
      )
92
93
94
    }

    amplicon match {
95
      case Some(x: String) => regionPages += (x + " (Amplicon)") -> createPage(x, amplicon = true)
96
97
98
99
      case _               =>
    }

    roi match {
100
101
      case Some(x: String)  => regionPages += x -> createPage(x, amplicon = false)
      case Some(x: List[_]) => x.foreach(x => regionPages += x.toString -> createPage(x.toString, amplicon = false))
102
103
104
      case _                =>
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
105
    if (regionPages.nonEmpty) Some("Regions" -> ReportPage(
106
      List(),
107
      regionPages.map(p => p._1 -> ReportSection(
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
        "/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp",
        Map("target" -> p._1.stripSuffix(" (Amplicon)"))
110
      )).toList.sortBy(_._1),
Peter van 't Hof's avatar
Peter van 't Hof committed
111
      Map())
112
113
    )
    else None
114
115
  }

116
117
  /** Files page, can be used general or at sample level */
  def filesPage: ReportPage = ReportPage(List(), List(
118
119
    "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"),
    "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"),
120
121
122
    "Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)),
    "Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp",
      Map("pipelineName" -> "shiva", "fileTag" -> "preProcessBam")),
123
124
125
    "VCF files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/outputVcfFiles.ssp", Map("sampleId" -> None))
  ), Map())

126
127
  /** Single sample page */
  def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = {
128
    ReportPage(List(
129
      "Libraries" -> generateLibraryPage(args),
130
      "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), None),
131
      "Files" -> filesPage
132
    ), List(
133
134
      "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp",
        if (summary.libraries(sampleId).size > 1) Map("showPlot" -> true) else Map()),
135
136
137
      "Preprocessing" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true))) ++
      (if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp")) else Nil) ++
      List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"),
138
      "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp")
139
140
141
    ), args)
  }

142
143
  /** Library page */
  def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = {
144
    ReportPage(List(
145
      "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)),
146
147
148
      "QC" -> FlexiprepReport.flexiprepPage
    ), List(
      "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
149
150
151
      "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"),
      "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp")
    ), args)
152
153
  }

154
  /** Name of the report */
155
  def reportName = "Shiva Report"
156

157
158
159
160
161
162
163
164
  /**
   * Generate a stackbar plot for found variants
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
165
  def variantSummaryPlot(outputDir: File,
166
167
168
169
                         prefix: String,
                         summary: Summary,
                         libraryLevel: Boolean = false,
                         sampleId: Option[String] = None): Unit = {
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tHomVar\tHet\tHomRef\tNoCall")

    def getLine(summary: Summary, sample: String, lib: Option[String] = None): String = {
      val homVar = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "HomVar"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val homRef = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "HomRef"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val noCall = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "NoCall"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val het = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "Het"),
        summary, Some(sample), lib).value.getOrElse(0).toString.toLong
      val sb = new StringBuffer()
      if (lib.isDefined) sb.append(sample + "-" + lib.get + "\t") else sb.append(sample + "\t")
      sb.append(homVar + "\t")
      sb.append(het + "\t")
      sb.append(homRef + "\t")
      sb.append(noCall)
      sb.toString
    }

    if (libraryLevel) {
195
      for (
196
        sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get;
197
198
        lib <- summary.libraries(sample)
      ) {
199
200
201
        tsvWriter.println(getLine(summary, sample, Some(lib)))
      }
    } else {
202
      for (sample <- summary.samples if sampleId.isEmpty || sample == sampleId.get) {
203
204
205
206
207
208
209
210
211
212
        tsvWriter.println(getLine(summary, sample))
      }
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("VCF records")
213
214
    if (libraryLevel) {
      plot.width = Some(200 + (summary.libraries.filter(s => sampleId.getOrElse(s._1) == s._1).foldLeft(0)(_ + _._2.size) * 10))
215
    } else plot.width = Some(200 + (summary.samples.count(s => sampleId.getOrElse(s) == s) * 10))
216
217
    plot.runLocal()
  }
218
}