Gentrap.scala 9.67 KB
Newer Older
bow's avatar
bow committed
1
/**
2
3
4
5
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
bow's avatar
bow committed
6
 *
7
8
9
10
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
bow's avatar
bow committed
14
 */
bow's avatar
bow committed
15
16
17
package nl.lumc.sasc.biopet.pipelines.gentrap

import nl.lumc.sasc.biopet.core._
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.annotations.{ RibosomalRefFlat, AnnotationRefFlat }
19
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
20
import nl.lumc.sasc.biopet.extensions.tools.WipeReads
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.pipelines.gentrap.Gentrap.{ StrandProtocol, ExpMeasures }
22
import nl.lumc.sasc.biopet.pipelines.gentrap.measures._
23
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
24
import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.utils.{ LazyCheck, Logging }
26
import nl.lumc.sasc.biopet.utils.config._
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.utils.camelize
Peter van 't Hof's avatar
Peter van 't Hof committed
28
29
import org.broadinstitute.gatk.queue.QScript
import picard.analysis.directed.RnaSeqMetricsCollector.StrandSpecificity
30
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32

import scala.language.reflectiveCalls
bow's avatar
bow committed
33

bow's avatar
bow committed
34
35
36
/**
 * Gentrap pipeline
 * Generic transcriptome analysis pipeline
bow's avatar
bow committed
37
 *
38
 * @author Peter van 't Hof <p.j.van_t_hof@lumc.nl>
bow's avatar
bow committed
39
 * @author Wibowo Arindrarto <w.arindrarto@lumc.nl>
bow's avatar
bow committed
40
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
41
class Gentrap(val parent: Configurable) extends QScript
42
  with MultisampleMappingTrait with AnnotationRefFlat with RibosomalRefFlat { qscript =>
bow's avatar
bow committed
43

44
  // alternative constructor for initialization with empty configuration
bow's avatar
bow committed
45
46
  def this() = this(null)

47
48
49
  override def reportClass: Option[ReportBuilderExtension] = {
    val report = new GentrapReport(this)
    report.outputDir = new File(outputDir, "report")
50
    report.summaryDbFile = summaryDbFile
51
52
53
    Some(report)
  }

54
  /** Expression measurement modes */
bow's avatar
bow committed
55
  // see the enumeration below for valid modes
Peter van 't Hof's avatar
Peter van 't Hof committed
56
57
  lazy val expMeasures = new LazyCheck({
    config("expression_measures", default = Nil).asStringList.map(value =>
Peter van 't Hof's avatar
Peter van 't Hof committed
58
      ExpMeasures.values.find(_.toString == camelize(value)) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
61
62
63
        case Some(v) => v
        case _       => throw new IllegalArgumentException(s"'$value' is not a valid Expression measurement")
      }
    ).toSet
  })
64

65
  /** Strandedness modes */
Peter van 't Hof's avatar
Peter van 't Hof committed
66
  lazy val strandProtocol = new LazyCheck({
67
    val value: String = config("strand_protocol")
Peter van 't Hof's avatar
Peter van 't Hof committed
68
    StrandProtocol.values.find(_.toString == camelize(value)) match {
69
      case Some(v) => v
Peter van 't Hof's avatar
Peter van 't Hof committed
70
      case other =>
71
72
        Logging.addError(s"'$other' is no strand_protocol or strand_protocol is not given")
        StrandProtocol.NonSpecific
73
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
74
  })
75

76
  /** Whether to remove rRNA regions or not */
77
  lazy val removeRibosomalReads: Boolean = config("remove_ribosomal_reads", default = false)
78

bow's avatar
bow committed
79
  /** Default pipeline config */
80
  override def defaults = super.defaults ++ Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
81
    "htseqcount" -> (if (strandProtocol.isSet) Map("stranded" -> (strandProtocol() match {
82
83
      case StrandProtocol.NonSpecific => "no"
      case StrandProtocol.Dutp        => "reverse"
Peter van 't Hof's avatar
Peter van 't Hof committed
84
      case otherwise                  => throw new IllegalStateException(otherwise.toString)
Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
87
    }))
    else Map()),
    "cufflinks" -> (if (strandProtocol.isSet) Map("library_type" -> (strandProtocol() match {
88
89
      case StrandProtocol.NonSpecific => "fr-unstranded"
      case StrandProtocol.Dutp        => "fr-firststrand"
Peter van 't Hof's avatar
Peter van 't Hof committed
90
      case otherwise                  => throw new IllegalStateException(otherwise.toString)
Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
    }))
    else Map()),
93
    "merge_strategy" -> "preprocessmergesam",
Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
    "gsnap" -> Map(
      "novelsplicing" -> 1,
96
      "batch" -> 4
Peter van 't Hof's avatar
Peter van 't Hof committed
97
    ),
98
99
100
101
    "shivavariantcalling" -> Map(
      "variantcallers" -> List("varscan_cns_singlesample"),
      "name_prefix" -> "multisample"
    ),
Peter van 't Hof's avatar
Peter van 't Hof committed
102
    "bammetrics" -> Map(
103
104
      "wgs_metrics" -> false,
      "rna_metrics" -> true,
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
      "collectrnaseqmetrics" -> ((if (strandProtocol.isSet) Map(
        "strand_specificity" -> (strandProtocol() match {
107
108
          case StrandProtocol.NonSpecific => StrandSpecificity.NONE.toString
          case StrandProtocol.Dutp        => StrandSpecificity.SECOND_READ_TRANSCRIPTION_STRAND.toString
Peter van 't Hof's avatar
Peter van 't Hof committed
109
          case otherwise                  => throw new IllegalStateException(otherwise.toString)
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
        })
      )
112
      else Map()))
Peter van 't Hof's avatar
Peter van 't Hof committed
113
    ),
Peter van 't Hof's avatar
Peter van 't Hof committed
114
115
116
117
118
119
120
    "cutadapt" -> Map("minimum_length" -> 20),
    // avoid conflicts when merging since the MarkDuplicate tags often cause merges to fail
    "picard" -> Map(
      "programrecordid" -> "null"
    ),
    // disable markduplicates since it may not play well with all aligners (this can still be overriden via config)
    "mapping" -> Map(
121
      "aligner" -> "gsnap",
Peter van 't Hof's avatar
Peter van 't Hof committed
122
      "skip_markduplicates" -> true
Wai Yi Leung's avatar
Wai Yi Leung committed
123
124
125
126
    ),
    "wipereads" -> Map(
      "limit_removal" -> true,
      "no_make_index" -> false
127
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
128
  )
129

Peter van 't Hof's avatar
Peter van 't Hof committed
130
  lazy val fragmentsPerGene = if (expMeasures().contains(ExpMeasures.FragmentsPerGene))
131
    Some(new FragmentsPerGene(this)) else None
132

Peter van 't Hof's avatar
Peter van 't Hof committed
133
  lazy val baseCounts = if (expMeasures().contains(ExpMeasures.BaseCounts))
Peter van 't Hof's avatar
Peter van 't Hof committed
134
    Some(new BaseCounts(this)) else None
135

Peter van 't Hof's avatar
Peter van 't Hof committed
136
  lazy val cufflinksBlind = if (expMeasures().contains(ExpMeasures.CufflinksBlind))
137
138
    Some(new CufflinksBlind(this)) else None

Peter van 't Hof's avatar
Peter van 't Hof committed
139
  lazy val cufflinksGuided = if (expMeasures().contains(ExpMeasures.CufflinksGuided))
140
141
    Some(new CufflinksGuided(this)) else None

Peter van 't Hof's avatar
Peter van 't Hof committed
142
  lazy val cufflinksStrict = if (expMeasures().contains(ExpMeasures.CufflinksStrict))
143
144
    Some(new CufflinksStrict(this)) else None

Peter van 't Hof's avatar
Peter van 't Hof committed
145
  def executedMeasures = (fragmentsPerGene :: baseCounts :: cufflinksBlind ::
146
147
148
149
150
151
152
153
    cufflinksGuided :: cufflinksStrict :: Nil).flatten

  /** Whether to do simple variant calling on RNA or not */
  lazy val shivaVariantcalling = if (config("call_variants", default = false)) {
    val pipeline = new ShivaVariantcalling(this)
    pipeline.outputDir = new File(outputDir, "variantcalling")
    Some(pipeline)
  } else None
bow's avatar
bow committed
154

bow's avatar
bow committed
155
156
157
158
  /** Output summary file */
  def summaryFile: File = new File(outputDir, "gentrap.summary.json")

  /** Files that will be listed in the summary file */
159
  override def summaryFiles: Map[String, File] = super.summaryFiles ++ Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
160
    "annotation_refflat" -> annotationRefFlat()
bow's avatar
bow committed
161
  ) ++ Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
162
      "ribosome_refflat" -> ribosomalRefFlat()
163
    ).collect { case (key, Some(value)) => key -> value }
bow's avatar
bow committed
164
165

  /** Pipeline settings shown in the summary file */
166
  override def summarySettings: Map[String, Any] = super.summarySettings ++ Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
167
    "expression_measures" -> expMeasures().toList.map(_.toString),
Peter van 't Hof's avatar
Peter van 't Hof committed
168
    "strand_protocol" -> strandProtocol().toString,
169
170
    "call_variants" -> shivaVariantcalling.isDefined,
    "remove_ribosomal_reads" -> removeRibosomalReads
bow's avatar
bow committed
171
  )
bow's avatar
bow committed
172

bow's avatar
bow committed
173
  /** Steps to run before biopetScript */
174
175
176
  override def init(): Unit = {
    super.init()

Peter van 't Hof's avatar
Peter van 't Hof committed
177
    if (expMeasures().isEmpty) Logging.addError("'expression_measures' is missing in the config")
Peter van 't Hof's avatar
Peter van 't Hof committed
178
179
180
    require(Gentrap.StrandProtocol.values.contains(strandProtocol()))
    if (removeRibosomalReads && ribosomalRefFlat().isEmpty)
      Logging.addError("removeRibosomalReads is enabled but no ribosomalRefFlat is given")
181

Peter van 't Hof's avatar
Peter van 't Hof committed
182
    executedMeasures.foreach(x => x.outputDir = new File(outputDir, "expression_measures" + File.separator + x.name))
bow's avatar
bow committed
183
  }
184

bow's avatar
bow committed
185
  /** Pipeline run for multiple samples */
186
  override def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
187
    super.addMultiSampleJobs()
188
    // merge expression tables
189
190
    executedMeasures.foreach(add)
    shivaVariantcalling.foreach(add)
191
  }
192

bow's avatar
bow committed
193
  /** Returns a [[Sample]] object */
194
  override def makeSample(sampleId: String): Sample = new Sample(sampleId)
bow's avatar
bow committed
195

bow's avatar
bow committed
196
197
198
199
200
  /**
   * Gentrap sample
   *
   * @param sampleId Unique identifier of the sample
   */
201
  class Sample(sampleId: String) extends super.Sample(sampleId) {
bow's avatar
bow committed
202

203
    /** Summary stats of the sample */
204
    override def summaryStats: Map[String, Any] = super.summaryStats ++ Map(
205
206
207
      "all_paired" -> allPaired,
      "all_single" -> allSingle
    )
208

Peter van 't Hof's avatar
Peter van 't Hof committed
209
210
211
    override lazy val preProcessBam = if (removeRibosomalReads) {
      val job = new WipeReads(qscript)
      job.inputBam = bamFile.get
Peter van 't Hof's avatar
Peter van 't Hof committed
212
      ribosomalRefFlat().foreach(job.intervalFile = _)
Peter van 't Hof's avatar
Peter van 't Hof committed
213
      job.outputBam = createFile("cleaned.bam")
Wai Yi Leung's avatar
Wai Yi Leung committed
214
      job.discardedBam = Some(createFile("rrna.bam"))
Peter van 't Hof's avatar
Peter van 't Hof committed
215
216
217
      add(job)
      Some(job.outputBam)
    } else bamFile
218

219
    /** Whether all libraries are paired or not */
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
220
    def allPaired: Boolean = libraries.values.forall(_.mapping.forall(_.inputR2.isDefined))
221
222

    /** Whether all libraries are single or not */
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
223
    def allSingle: Boolean = libraries.values.forall(_.mapping.forall(_.inputR2.isEmpty))
224

bow's avatar
bow committed
225
    /** Adds all jobs for the sample */
226
227
    override def addJobs(): Unit = {
      super.addJobs()
228
229
      // TODO: this is our requirement since it's easier to calculate base counts when all libraries are either paired or single
      require(allPaired || allSingle, s"Sample $sampleId contains only single-end or paired-end libraries")
bow's avatar
bow committed
230
      // add bigwig output, also per-strand when possible
231
232
233
234
235

      preProcessBam.foreach { file =>
        executedMeasures.foreach(_.addBamfile(sampleId, file))
        shivaVariantcalling.foreach(_.inputBams += sampleId -> file)
      }
236
    }
bow's avatar
bow committed
237
  }
bow's avatar
bow committed
238
}
bow's avatar
bow committed
239

bow's avatar
bow committed
240
object Gentrap extends PipelineCommand {
241

bow's avatar
bow committed
242
243
  /** Enumeration of available expression measures */
  object ExpMeasures extends Enumeration {
Peter van 't Hof's avatar
Peter van 't Hof committed
244
    val FragmentsPerGene, BaseCounts, CufflinksStrict, CufflinksGuided, CufflinksBlind = Value
245
  }
bow's avatar
bow committed
246

bow's avatar
bow committed
247
  /** Enumeration of available strandedness */
248
  object StrandProtocol extends Enumeration {
bow's avatar
bow committed
249
250
251
    // for now, only non-strand specific and dUTP stranded protocol is supported
    val NonSpecific, Dutp = Value
  }
252
}