Carp.scala 5.13 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
14
15
16
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.pipelines.carp

17
18
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core._
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
21
import nl.lumc.sasc.biopet.extensions.macs2.Macs2CallPeak
22
23
import nl.lumc.sasc.biopet.extensions.picard.BuildBamIndex
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView
24
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
26
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
27
import nl.lumc.sasc.biopet.utils.Logging
28
import nl.lumc.sasc.biopet.utils.config._
29
30
31
32
33
34
35
import org.broadinstitute.gatk.queue.QScript

/**
 * Carp pipeline
 * Chip-Seq analysis pipeline
 * This pipeline performs QC,mapping and peak calling
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
36
class Carp(val parent: Configurable) extends QScript with MultisampleMappingTrait with Reference {
37
  qscript =>
38
39
  def this() = this(null)

40
  override def defaults = super.defaults ++ Map(
41
    "mapping" -> Map(
42
      "skip_markduplicates" -> false,
Peter van 't Hof's avatar
Peter van 't Hof committed
43
      "aligner" -> "bwa-mem"
44
    ),
Peter van 't Hof's avatar
Peter van 't Hof committed
45
    "merge_strategy" -> "preprocessmergesam",
46
    "samtoolsview" -> Map("q" -> 10)
47
  )
48

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  override def fixedValues = super.fixedValues ++ Map(
50
51
52
    "samtoolsview" -> Map(
      "h" -> true,
      "b" -> true
53
54
    ),
    "macs2callpeak" -> Map("fileformat" -> "")
55
56
  )

Peter van 't Hof's avatar
Peter van 't Hof committed
57
  def summaryFile = new File(outputDir, "Carp.summary.json")
58

59
60
  override def makeSample(id: String) = new Sample(id)
  class Sample(sampleId: String) extends super.Sample(sampleId) {
61

Peter van 't Hof's avatar
Peter van 't Hof committed
62
    override def preProcessBam = Some(createFile("filter.bam"))
63

64
65
    override def metricsPreprogressBam = false

66
67
    val controls: List[String] = config("control", default = Nil)

Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
    override def summarySettings = super.summarySettings ++ Map("controls" -> controls)

70
71
    override def addJobs(): Unit = {
      super.addJobs()
72

73
      add(Bam2Wig(qscript, bamFile.get))
Peter van 't Hof's avatar
Peter van 't Hof committed
74

75
      val samtoolsView = new SamtoolsView(qscript)
76
77
      samtoolsView.input = bamFile.get
      samtoolsView.output = preProcessBam.get
78
79
      samtoolsView.b = true
      samtoolsView.h = true
80
81
      add(samtoolsView)

82
83
84
85
86
87
88
      val bamMetricsFilter = BamMetrics(qscript, preProcessBam.get, new File(sampleDir, "metrics-filter"), sampleId = Some(sampleId))
      addAll(bamMetricsFilter.functions)
      bamMetricsFilter.summaryName = "bammetrics-filter"
      addSummaryQScript(bamMetricsFilter)

      add(Bam2Wig(qscript, preProcessBam.get))

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
89
      val buildBamIndex = new BuildBamIndex(qscript)
90
91
      buildBamIndex.input = preProcessBam.get
      buildBamIndex.output = swapExt(preProcessBam.get.getParentFile, preProcessBam.get, ".bam", ".bai")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
92
93
      add(buildBamIndex)

94
      val macs2 = new Macs2CallPeak(qscript)
95
      macs2.treatment = preProcessBam.get
Peter van 't Hof's avatar
Peter van 't Hof committed
96
      macs2.name = Some(sampleId)
97
      macs2.outputdir = sampleDir + File.separator + "macs2" + File.separator + sampleId + File.separator
98
      macs2.fileformat = if (paired) Some("BAMPE") else Some("BAM")
99
100
      add(macs2)
    }
101
102
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
103
  override def reportClass: Option[ReportBuilderExtension] = {
104
105
    val carp = new CarpReport(this)
    carp.outputDir = new File(outputDir, "report")
106
    carp.summaryDbFile = summaryDbFile
107
108
109
    Some(carp)
  }

110
111
112
113
114
115
116
  lazy val paired: Boolean = {
    val notPaired = samples.forall(_._2.libraries.forall(_._2.inputR2.isEmpty))
    val p = samples.forall(_._2.libraries.forall(_._2.inputR2.isDefined))
    if (!notPaired && !p) Logging.addError("Combination of Paired-end and Single-end detected, this is not allowed in Carp")
    p
  }

117
118
  override def init() = {
    super.init()
119
120
121
    // ensure that no samples are called 'control' since that is our reserved keyword
    require(!sampleIds.contains("control"),
      "No sample should be named 'control' since it is a reserved for the Carp pipeline")
122
123
  }

124
125
  override def addMultiSampleJobs(): Unit = {
    super.addMultiSampleJobs()
126
    for ((sampleId, sample) <- samples) {
127
128
129
      for (controlId <- sample.controls) {
        if (!samples.contains(controlId))
          throw new IllegalStateException("For sample: " + sampleId + " this control: " + controlId + " does not exist")
130
        val macs2 = new Macs2CallPeak(this)
131
132
        macs2.treatment = sample.preProcessBam.get
        macs2.control = samples(controlId).preProcessBam.get
Peter van 't Hof's avatar
Peter van 't Hof committed
133
        macs2.name = Some(sampleId + "_VS_" + controlId)
134
        macs2.fileformat = if (paired) Some("BAMPE") else Some("BAM")
135
        macs2.outputdir = sample.sampleDir + File.separator + "macs2" + File.separator + macs2.name.get + File.separator
136
137
138
        add(macs2)
      }
    }
139
140
141
142
  }
}

object Carp extends PipelineCommand