BamMetrics.scala 7.36 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
19
20
import nl.lumc.sasc.biopet.scripts.CoverageStats
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.{ SampleLibraryTag, PipelineCommand }
22
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.tools.BiopetFlagstat
24
25
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect }
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.extensions.picard._
27
28
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat

Peter van 't Hof's avatar
Peter van 't Hof committed
29
class BamMetrics(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
30
31
32
33
34
  def this() = this(null)

  @Input(doc = "Bam File", shortName = "BAM", required = true)
  var inputBam: File = _

Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
  /** Bed files for region of interests */
  var roiBedFiles: List[File] = config("regions_of_interest", Nil)
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
39
  /** Bed of amplicon that is used */
  var ampliconBedFile: Option[File] = config("amplicon_bed")
40

Peter van 't Hof's avatar
Peter van 't Hof committed
41
42
  var rnaMetrics: Boolean = config("rna_metrcis", default = false)

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  /** return location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
44
45
46
47
48
49
  def summaryFile = (sampleId, libId) match {
    case (Some(sampleId), Some(libId)) => new File(outputDir, sampleId + "-" + libId + ".BamMetrics.summary.json")
    case (Some(sampleId), _)           => new File(outputDir, sampleId + ".BamMetrics.summary.json")
    case _                             => new File(outputDir, "BamMetrics.summary.json")
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
50
  /** returns files to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
51
52
53
  def summaryFiles = Map("input_bam" -> inputBam) ++
    ampliconBedFile.map("amplicon" -> _).toMap ++
    ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
54

Peter van 't Hof's avatar
Peter van 't Hof committed
55
  /** return settings */
Peter van 't Hof's avatar
Peter van 't Hof committed
56
57
  def summarySettings = Map("amplicon_name" -> ampliconBedFile.collect { case x => x.getName.stripSuffix(".bed") },
    "roi_name" -> roiBedFiles.map(_.getName.stripSuffix(".bed")))
Peter van 't Hof's avatar
Peter van 't Hof committed
58

Peter van 't Hof's avatar
Peter van 't Hof committed
59
  /** executed before script */
60
61
62
  def init() {
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
63
  /** Script to add jobs */
64
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
65
    add(SamtoolsFlagstat(this, inputBam, swapExt(outputDir, inputBam, ".bam", ".flagstat")))
66

67
    val biopetFlagstat = BiopetFlagstat(this, inputBam, outputDir)
68
69
70
    add(biopetFlagstat)
    addSummarizable(biopetFlagstat, "biopet_flagstat")

Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
73
74
    val multiMetrics = new CollectMultipleMetrics(this)
    multiMetrics.input = inputBam
    multiMetrics.outputName = new File(outputDir, inputBam.getName.stripSuffix(".bam"))
    add(multiMetrics)
75
    addSummarizable(multiMetrics, "multi_metrics")
76

77
78
79
    val gcBiasMetrics = CollectGcBiasMetrics(this, inputBam, outputDir)
    add(gcBiasMetrics)
    addSummarizable(gcBiasMetrics, "gc_bias")
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
    val wgsMetrics = new CollectWgsMetrics(this)
    wgsMetrics.input = inputBam
Peter van 't Hof's avatar
Peter van 't Hof committed
83
    wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".wgs.metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
84
    add(wgsMetrics)
85
    addSummarizable(wgsMetrics, "wgs")
Peter van 't Hof's avatar
Peter van 't Hof committed
86

Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
89
90
91
92
    if (rnaMetrics) {
      val rnaMetrics = new CollectRnaSeqMetrics(this)
      rnaMetrics.input = inputBam
      rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
      rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
      add(rnaMetrics)
93
      addSummarizable(rnaMetrics, "rna")
Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
    case class Intervals(bed: File, intervals: File)

    // Create temp jobs to convert bed files to intervals lists
    val roiIntervals = roiBedFiles.map(roiBedFile => {
      val roiIntervals = swapExt(outputDir, roiBedFile, ".bed", ".intervals")
      val ampBedToInterval = BedToIntervalList(this, roiBedFile, roiIntervals)
      ampBedToInterval.isIntermediate = true
      add(ampBedToInterval)
      Intervals(roiBedFile, roiIntervals)
    })

    // Metrics that require a amplicon bed file
    val ampIntervals = ampliconBedFile.collect {
      case ampliconBedFile => {
        val ampIntervals = swapExt(outputDir, ampliconBedFile, ".bed", ".intervals")
        val ampBedToInterval = BedToIntervalList(this, ampliconBedFile, ampIntervals)
        ampBedToInterval.isIntermediate = true
        add(ampBedToInterval)

        val chsMetrics = CalculateHsMetrics(this, inputBam,
          List(ampIntervals), ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(chsMetrics)
118
        addSummarizable(chsMetrics, "hs_metrics")
119

Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
122
        val pcrMetrics = CollectTargetedPcrMetrics(this, inputBam,
          ampIntervals, ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(pcrMetrics)
123
        addSummarizable(chsMetrics, "targeted_pcr_metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
124
125
126
127
128
129
130

        Intervals(ampliconBedFile, ampIntervals)
      }
    }

    // Create stats and coverage plot for each bed/interval file
    for (intervals <- roiIntervals ++ ampIntervals) {
131
132
      val targetName = intervals.bed.getName.stripSuffix(".bed")
      val targetDir = new File(outputDir, targetName)
Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
135

      val biStrict = BedtoolsIntersect(this, inputBam, intervals.bed,
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.bam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
136
        minOverlap = config("strict_intersect_overlap", default = 1.0))
Peter van 't Hof's avatar
Peter van 't Hof committed
137
138
139
      biStrict.isIntermediate = true
      add(biStrict)
      add(SamtoolsFlagstat(this, biStrict.output, targetDir))
140
141
142
      val biopetFlagstatStrict = BiopetFlagstat(this, biStrict.output, targetDir)
      add(biopetFlagstatStrict)
      addSummarizable(biopetFlagstatStrict, targetName + "_biopet_flagstat_strict")
Peter van 't Hof's avatar
Peter van 't Hof committed
143
144
145

      val biLoose = BedtoolsIntersect(this, inputBam, intervals.bed,
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.bam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
146
        minOverlap = config("loose_intersect_overlap", default = 0.01))
Peter van 't Hof's avatar
Peter van 't Hof committed
147
148
149
      biLoose.isIntermediate = true
      add(biLoose)
      add(SamtoolsFlagstat(this, biLoose.output, targetDir))
150
151
152
      val biopetFlagstatLoose = BiopetFlagstat(this, biLoose.output, targetDir)
      add(biopetFlagstatLoose)
      addSummarizable(biopetFlagstatLoose, targetName + "_biopet_flagstat_loose")
153
154

      val coverageFile = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
155
156

      //FIXME:should use piping
Peter van 't Hof's avatar
Peter van 't Hof committed
157
      add(BedtoolsCoverage(this, inputBam, intervals.bed, coverageFile, depth = true), true)
158
      val covStats = CoverageStats(this, coverageFile, targetDir)
159
      covStats.title = Some("Coverage for " + targetName)
Peter van 't Hof's avatar
Peter van 't Hof committed
160
      covStats.subTitle = Some(".")
161
      add(covStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
162
      addSummarizable(covStats, targetName + "_cov_stats")
163
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
164
165

    addSummaryJobs
166
167
168
169
  }
}

object BamMetrics extends PipelineCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
170
  /** Make default implementation of BamMetrics and runs script already */
Peter van 't Hof's avatar
Peter van 't Hof committed
171
  def apply(root: Configurable, bamFile: File, outputDir: File): BamMetrics = {
172
173
174
175
    val bamMetrics = new BamMetrics(root)
    bamMetrics.inputBam = bamFile
    bamMetrics.outputDir = outputDir

Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
178
    bamMetrics.init()
    bamMetrics.biopetScript()
    bamMetrics
179
180
  }
}