Carp.scala 4.68 KB
Newer Older
1
/**
2
3
4
5
6
7
8
9
10
11
12
13
14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15
16
package nl.lumc.sasc.biopet.pipelines.carp

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import nl.lumc.sasc.biopet.core._
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
19
20
import nl.lumc.sasc.biopet.extensions.picard.BuildBamIndex
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView
21
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
23
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
24
import nl.lumc.sasc.biopet.utils.Logging
25
import nl.lumc.sasc.biopet.utils.config._
26
27
28
import org.broadinstitute.gatk.queue.QScript

/**
29
30
31
32
  * Carp pipeline
  * Chip-Seq analysis pipeline
  * This pipeline performs QC,mapping and peak calling
  */
Peter van 't Hof's avatar
Peter van 't Hof committed
33
class Carp(val parent: Configurable) extends QScript with MultisampleMappingTrait with Reference {
34
  qscript =>
35
36
  def this() = this(null)

37
  override def defaults: Map[String, Any] = super.defaults ++ Map(
38
    "mapping" -> Map(
39
      "skip_markduplicates" -> false,
Peter van 't Hof's avatar
Peter van 't Hof committed
40
      "aligner" -> "bwa-mem"
41
    ),
Peter van 't Hof's avatar
Peter van 't Hof committed
42
    "merge_strategy" -> "preprocessmergesam",
43
    "samtoolsview" -> Map("q" -> 10)
44
  )
45

46
  override def fixedValues: Map[String, Any] = super.fixedValues ++ Map(
47
48
49
    "samtoolsview" -> Map(
      "h" -> true,
      "b" -> true
50
51
    ),
    "macs2callpeak" -> Map("fileformat" -> "")
52
53
  )

54
55
  override def makeSample(id: String) = new Sample(id)
  class Sample(sampleId: String) extends super.Sample(sampleId) {
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
    override def bamFile: Option[File] =
      if (libraries.size == 1) libraries.head._2.mapping.map(_.finalBamFile) else super.bamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
59

Peter van 't Hof's avatar
Peter van 't Hof committed
60
    override def preProcessBam = Some(createFile("filter.bam"))
61

62
63
    override def metricsPreprogressBam = false

64
65
    val controls: List[String] = config("control", default = Nil)

66
67
    override def summarySettings: Map[String, Any] =
      super.summarySettings ++ Map("controls" -> controls)
Peter van 't Hof's avatar
Peter van 't Hof committed
68

69
70
    override def addJobs(): Unit = {
      super.addJobs()
71

72
      add(Bam2Wig(qscript, bamFile.get))
Peter van 't Hof's avatar
Peter van 't Hof committed
73

74
      val samtoolsView = new SamtoolsView(qscript)
75
76
      samtoolsView.input = bamFile.get
      samtoolsView.output = preProcessBam.get
77
78
      samtoolsView.b = true
      samtoolsView.h = true
79
80
      add(samtoolsView)

81
82
83
      val bamMetricsFilter = BamMetrics(qscript,
                                        preProcessBam.get,
                                        new File(sampleDir, "metrics-filter"),
84
                                        paired,
85
                                        sampleId = Some(sampleId))
86
87
88
89
      addAll(bamMetricsFilter.functions)
      bamMetricsFilter.summaryName = "bammetrics-filter"
      addSummaryQScript(bamMetricsFilter)

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
90
      val buildBamIndex = new BuildBamIndex(qscript)
91
      buildBamIndex.input = preProcessBam.get
92
93
      buildBamIndex.output =
        swapExt(preProcessBam.get.getParentFile, preProcessBam.get, ".bam", ".bai")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
94
      add(buildBamIndex)
95
    }
96
97
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
98
  override def reportClass: Option[ReportBuilderExtension] = {
99
100
    val carp = new CarpReport(this)
    carp.outputDir = new File(outputDir, "report")
101
    carp.summaryDbFile = summaryDbFile
102
103
104
    Some(carp)
  }

105
106
107
  lazy val paired: Boolean = {
    val notPaired = samples.forall(_._2.libraries.forall(_._2.inputR2.isEmpty))
    val p = samples.forall(_._2.libraries.forall(_._2.inputR2.isDefined))
108
109
110
    if (!notPaired && !p)
      Logging.addError(
        "Combination of Paired-end and Single-end detected, this is not allowed in Carp")
111
112
113
    p
  }

114
  override def init(): Unit = {
115
    super.init()
116
117
    // ensure that no samples are called 'control' since that is our reserved keyword
    require(!sampleIds.contains("control"),
118
            "No sample should be named 'control' since it is a reserved for the Carp pipeline")
119
120
    peakCalling.controls = samples.map(x => x._1 -> x._2.controls)
    peakCalling.outputDir = new File(outputDir, "peak_calling")
121
122
  }

123
124
  lazy val peakCalling = new PeakCalling(this)

125
126
  override def addMultiSampleJobs(): Unit = {
    super.addMultiSampleJobs()
127
128
129
    peakCalling.inputBams = samples.map(x => x._1 -> x._2.preProcessBam.get)
    peakCalling.paired = paired
    add(peakCalling)
130
131
132
133
  }
}

object Carp extends PipelineCommand