BamMetrics.scala 4.53 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
package nl.lumc.sasc.biopet.pipelines.bammetrics

import nl.lumc.sasc.biopet.scripts.CoverageStats
import org.broadinstitute.gatk.queue.QScript
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import java.io.File
import nl.lumc.sasc.biopet.tools.{ BedToInterval, BiopetFlagstat }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect }
import nl.lumc.sasc.biopet.extensions.picard.{ CollectInsertSizeMetrics, CollectGcBiasMetrics, CalculateHsMetrics, CollectAlignmentSummaryMetrics }
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat

class BamMetrics(val root: Configurable) extends QScript with BiopetQScript {
  def this() = this(null)

  @Input(doc = "Bam File", shortName = "BAM", required = true)
  var inputBam: File = _

  @Input(doc = "Bed tracks targets", shortName = "target", required = false)
  var bedFiles: List[File] = Nil

  @Input(doc = "Bed tracks bait", shortName = "bait", required = false)
  var baitBedFile: File = _

  @Argument(doc = "", required = false)
  var wholeGenome = false

  def init() {
    if (config.contains("target_bed")) {
      for (file <- config("target_bed").asList) {
        bedFiles +:= new File(file.toString)
      }
    }
    if (baitBedFile == null && config.contains("target_bait")) baitBedFile = config("target_bait")
  }

  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
53
54
    add(SamtoolsFlagstat(this, inputBam, swapExt(outputDir, inputBam, ".bam", ".flagstat")))
    add(BiopetFlagstat(this, inputBam, swapExt(outputDir, inputBam, ".bam", ".biopetflagstat")))
55
56
57
58
59
60
61
62
63
    add(CollectGcBiasMetrics(this, inputBam, outputDir))
    add(CollectInsertSizeMetrics(this, inputBam, outputDir))
    add(CollectAlignmentSummaryMetrics(this, inputBam, outputDir))

    val baitIntervalFile = if (baitBedFile != null) new File(outputDir, baitBedFile.getName.stripSuffix(".bed") + ".interval") else null
    if (baitIntervalFile != null)
      add(BedToInterval(this, baitBedFile, inputBam, outputDir), true)

    for (bedFile <- bedFiles) {
Peter van 't Hof's avatar
Peter van 't Hof committed
64
      val targetDir = new File(outputDir, bedFile.getName.stripSuffix(".bed"))
65
66
67
68
69
70
71
      val targetInterval = BedToInterval(this, bedFile, inputBam, targetDir)
      add(targetInterval, true)
      add(CalculateHsMetrics(this, inputBam, if (baitIntervalFile != null) baitIntervalFile
      else targetInterval.output, targetInterval.output, targetDir))

      val strictOutputBam = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.bam")
      add(BedtoolsIntersect(this, inputBam, bedFile, strictOutputBam, minOverlap = config("strictintersectoverlap", default = 1.0)), true)
Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
      add(SamtoolsFlagstat(this, strictOutputBam, swapExt(targetDir, strictOutputBam, ".bam", ".flagstat")))
      add(BiopetFlagstat(this, strictOutputBam, swapExt(targetDir, strictOutputBam, ".bam", ".biopetflagstat")))
74
75
76

      val looseOutputBam = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.bam")
      add(BedtoolsIntersect(this, inputBam, bedFile, looseOutputBam, minOverlap = config("looseintersectoverlap", default = 0.01)), true)
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
      add(SamtoolsFlagstat(this, looseOutputBam, swapExt(targetDir, looseOutputBam, ".bam", ".biopet")))
      add(BiopetFlagstat(this, looseOutputBam, swapExt(targetDir, looseOutputBam, ".bam", ".biopetflagstat")))
79
80
81
82
83
84
85
86
87

      val coverageFile = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
      add(BedtoolsCoverage(this, inputBam, bedFile, coverageFile, true), true)
      add(CoverageStats(this, coverageFile, targetDir))
    }
  }
}

object BamMetrics extends PipelineCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
88
  def apply(root: Configurable, bamFile: File, outputDir: File): BamMetrics = {
89
90
91
92
93
94
95
96
97
    val bamMetrics = new BamMetrics(root)
    bamMetrics.inputBam = bamFile
    bamMetrics.outputDir = outputDir

    bamMetrics.init
    bamMetrics.biopetScript
    return bamMetrics
  }
}