BamMetrics.scala 9.27 KB
Newer Older
1
/**
2 3 4 5 6 7 8 9 10 11 12 13 14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15 16 17
package nl.lumc.sasc.biopet.pipelines.bammetrics

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18

19
import nl.lumc.sasc.biopet.core.annotations.{AnnotationRefFlat, RibosomalRefFlat}
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
21 22
import nl.lumc.sasc.biopet.core.{BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag}
import nl.lumc.sasc.biopet.extensions.bedtools.{BedtoolsCoverage, BedtoolsIntersect, BedtoolsSort}
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.picard._
24
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
25
import nl.lumc.sasc.biopet.extensions.tools.{BamStats, BiopetFlagstat}
Sander Bollen's avatar
Sander Bollen committed
26 27
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.utils.config.Configurable
28
import nl.lumc.sasc.biopet.utils.intervals.BedCheck
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import org.broadinstitute.gatk.queue.QScript
30

31 32 33 34 35 36 37 38
class BamMetrics(val parent: Configurable)
    extends QScript
    with SummaryQScript
    with SampleLibraryTag
    with Reference
    with TargetRegions
    with AnnotationRefFlat
    with RibosomalRefFlat {
39

40 41 42 43 44
  def this() = this(null)

  @Input(doc = "Bam File", shortName = "BAM", required = true)
  var inputBam: File = _

45 46 47
  @Argument(required = false)
  var paired: Boolean = true

48 49
  override def defaults = Map("bedtoolscoverage" -> Map("sorted" -> true))

Peter van 't Hof's avatar
Peter van 't Hof committed
50
  /** returns files to store in summary */
51
  def summaryFiles: Map[String, File] =
52 53 54
    Map("reference" -> referenceFasta(), "input_bam" -> inputBam) ++
      ampliconBedFile.map("amplicon" -> _).toMap ++
      ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
55

Peter van 't Hof's avatar
Peter van 't Hof committed
56
  /** return settings */
57 58 59
  def summarySettings =
    Map("amplicon_name" -> ampliconBedFile.collect { case x => x.getName.stripSuffix(".bed") },
        "roi_name" -> roiBedFiles.map(_.getName.stripSuffix(".bed")))
Peter van 't Hof's avatar
Peter van 't Hof committed
60

61
  override def reportClass: Some[BammetricsReport] = {
62 63
    val bammetricsReport = new BammetricsReport(this)
    bammetricsReport.outputDir = new File(outputDir, "report")
64
    bammetricsReport.summaryDbFile = summaryDbFile
65 66 67 68
    bammetricsReport.args =
      if (libId.isDefined)
        Map("sampleId" -> sampleId.getOrElse("."), "libId" -> libId.getOrElse("."))
      else Map("sampleId" -> sampleId.getOrElse("."))
69 70 71
    Some(bammetricsReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
72
  /** executed before script */
73
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
74
    inputFiles :+= new InputFile(inputBam)
75 76
    ampliconBedFile.foreach(
      BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
77
    roiBedFiles.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
78 79
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
80
  /** Script to add jobs */
81
  def biopetScript() {
82
    add(SamtoolsFlagstat(this, inputBam, outputDir))
83

84 85 86 87
    val bamStats = new BamStats(this)
    bamStats.bamFile = inputBam
    bamStats.outputDir = new File(outputDir, "bamstats")
    add(bamStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
88
    addSummarizable(bamStats, "bamstats")
89

Peter van 't Hof's avatar
Peter van 't Hof committed
90 91 92
    val multiMetrics = new CollectMultipleMetrics(this)
    multiMetrics.input = inputBam
    multiMetrics.outputName = new File(outputDir, inputBam.getName.stripSuffix(".bam"))
93 94 95
    if (!paired)
      multiMetrics.program = multiMetrics.program
        .filter(_ != CollectMultipleMetrics.Programs.CollectInsertSizeMetrics)
Peter van 't Hof's avatar
Peter van 't Hof committed
96
    add(multiMetrics)
97
    addSummarizable(multiMetrics, "multi_metrics")
98

99 100 101
    val gcBiasMetrics = CollectGcBiasMetrics(this, inputBam, outputDir)
    add(gcBiasMetrics)
    addSummarizable(gcBiasMetrics, "gc_bias")
102

103
    if (config("wgs_metrics", default = true)) {
bow's avatar
bow committed
104 105 106 107 108 109
      val wgsMetrics = new CollectWgsMetrics(this)
      wgsMetrics.input = inputBam
      wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".wgs.metrics")
      add(wgsMetrics)
      addSummarizable(wgsMetrics, "wgs")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
110

111
    if (config("rna_metrics", default = false)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
112 113 114 115
      val rnaMetrics = new CollectRnaSeqMetrics(this)
      rnaMetrics.input = inputBam
      rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
      rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
116 117
      rnaMetrics.refFlat = annotationRefFlat()
      rnaMetrics.ribosomalIntervals = ribosomalRefFlat()
Peter van 't Hof's avatar
Peter van 't Hof committed
118
      add(rnaMetrics)
119
      addSummarizable(rnaMetrics, "rna")
Peter van 't Hof's avatar
Peter van 't Hof committed
120 121
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
122 123 124 125 126 127 128 129 130 131 132 133 134
    case class Intervals(bed: File, intervals: File)

    // Create temp jobs to convert bed files to intervals lists
    val roiIntervals = roiBedFiles.map(roiBedFile => {
      val roiIntervals = swapExt(outputDir, roiBedFile, ".bed", ".intervals")
      val ampBedToInterval = BedToIntervalList(this, roiBedFile, roiIntervals)
      ampBedToInterval.isIntermediate = true
      add(ampBedToInterval)
      Intervals(roiBedFile, roiIntervals)
    })

    // Metrics that require a amplicon bed file
    val ampIntervals = ampliconBedFile.collect {
Peter van 't Hof's avatar
Peter van 't Hof committed
135 136 137
      case bedFile =>
        val ampIntervals = swapExt(outputDir, bedFile, ".bed", ".intervals")
        val ampBedToInterval = BedToIntervalList(this, bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
138 139 140
        ampBedToInterval.isIntermediate = true
        add(ampBedToInterval)

141 142 143 144 145
        val chsMetrics = CollectHsMetrics(this,
                                          inputBam,
                                          List(ampIntervals),
                                          ampIntervals :: roiIntervals.map(_.intervals),
                                          outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
146
        add(chsMetrics)
147
        addSummarizable(chsMetrics, "hs_metrics")
148

149 150 151 152 153
        val pcrMetrics = CollectTargetedPcrMetrics(this,
                                                   inputBam,
                                                   ampIntervals,
                                                   ampIntervals :: roiIntervals.map(_.intervals),
                                                   outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
154
        add(pcrMetrics)
bow's avatar
bow committed
155
        addSummarizable(pcrMetrics, "targeted_pcr_metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
156

Peter van 't Hof's avatar
Peter van 't Hof committed
157
        Intervals(bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
158 159 160
    }

    // Create stats and coverage plot for each bed/interval file
161 162
    val allIntervalNames = (roiIntervals ++ ampIntervals).map(_.bed.getName)
    if (allIntervalNames.size != allIntervalNames.toSet.size) {
163 164
      logger.warn(
        "There are multiple region files with the same name. Metric values might get overwritten")
165
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
166
    for (intervals <- roiIntervals ++ ampIntervals) {
167 168
      val targetName = intervals.bed.getName.stripSuffix(".bed")
      val targetDir = new File(outputDir, targetName)
Peter van 't Hof's avatar
Peter van 't Hof committed
169

170 171 172 173
      val biStrict = BedtoolsIntersect(
        this,
        inputBam,
        intervals.bed,
174
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.sam"),
175 176
        minOverlap = config("strict_intersect_overlap", default = 1.0)
      )
177
      val biopetFlagstatStrict = BiopetFlagstat(this, biStrict.output, targetDir)
178
      addSummarizable(biopetFlagstatStrict, targetName + "_flagstats_strict")
179
      add(new BiopetFifoPipe(this, List(biStrict, biopetFlagstatStrict)))
Peter van 't Hof's avatar
Peter van 't Hof committed
180

181 182 183 184
      val biLoose = BedtoolsIntersect(
        this,
        inputBam,
        intervals.bed,
185
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.sam"),
186 187
        minOverlap = config("loose_intersect_overlap", default = 0.01)
      )
188
      val biopetFlagstatLoose = BiopetFlagstat(this, biLoose.output, targetDir)
189
      addSummarizable(biopetFlagstatLoose, targetName + "_flagstats_loose")
190
      add(new BiopetFifoPipe(this, List(biLoose, biopetFlagstatLoose)))
191

192 193 194 195 196 197 198 199 200 201
      val sortedBed = BamMetrics.sortedbedCache.getOrElse(
        intervals.bed, {
          val sorter = new BedtoolsSort(this)
          sorter.input = intervals.bed
          sorter.output = swapExt(targetDir, intervals.bed, ".bed", ".sorted.bed")
          add(sorter)
          BamMetrics.sortedbedCache += intervals.bed -> sorter.output
          sorter.output
        }
      )
Peter van 't Hof's avatar
Peter van 't Hof committed
202
      val bedCov = BedtoolsCoverage(this, sortedBed, inputBam, depth = true)
203 204
      val covStats =
        CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
205 206
      covStats.title = Some("Coverage Plot")
      covStats.subTitle = Some(s"for file '$targetName.bed'")
207
      add(bedCov | covStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
208
      addSummarizable(covStats, targetName + "_cov_stats")
209
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
210

Peter van 't Hof's avatar
Peter van 't Hof committed
211
    addSummaryJobs()
212 213 214 215
  }
}

object BamMetrics extends PipelineCommand {
216

Peter van 't Hof's avatar
Peter van 't Hof committed
217
  /** Make default implementation of BamMetrics and runs script already */
Peter van 't Hof's avatar
Peter van 't Hof committed
218
  def apply(root: Configurable,
219 220
            bamFile: File,
            outputDir: File,
221
            paired: Boolean,
Peter van 't Hof's avatar
Peter van 't Hof committed
222 223
            sampleId: Option[String] = None,
            libId: Option[String] = None): BamMetrics = {
224
    val bamMetrics = new BamMetrics(root)
Peter van 't Hof's avatar
Peter van 't Hof committed
225 226
    bamMetrics.sampleId = sampleId
    bamMetrics.libId = libId
227 228
    bamMetrics.inputBam = bamFile
    bamMetrics.outputDir = outputDir
229
    bamMetrics.paired = paired
230

Peter van 't Hof's avatar
Peter van 't Hof committed
231 232 233
    bamMetrics.init()
    bamMetrics.biopetScript()
    bamMetrics
234
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
235 236

  private var sortedbedCache: Map[File, File] = Map()
237
}