Gentrap.scala 10 KB
Newer Older
bow's avatar
bow committed
1
/**
2 3 4 5
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
bow's avatar
bow committed
6
 *
7 8 9 10
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
bow's avatar
bow committed
14
 */
bow's avatar
bow committed
15 16 17
package nl.lumc.sasc.biopet.pipelines.gentrap

import nl.lumc.sasc.biopet.core._
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.annotations.{ RibosomalRefFlat, AnnotationRefFlat }
19
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
20
import nl.lumc.sasc.biopet.extensions.tools.WipeReads
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.pipelines.gentrap.Gentrap.{ StrandProtocol, ExpMeasures }
22
import nl.lumc.sasc.biopet.pipelines.gentrap.measures._
23
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
24
import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.utils.{ LazyCheck, Logging }
26
import nl.lumc.sasc.biopet.utils.config._
Peter van 't Hof's avatar
Peter van 't Hof committed
27 28
import org.broadinstitute.gatk.queue.QScript
import picard.analysis.directed.RnaSeqMetricsCollector.StrandSpecificity
29
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
30 31

import scala.language.reflectiveCalls
bow's avatar
bow committed
32

bow's avatar
bow committed
33 34 35
/**
 * Gentrap pipeline
 * Generic transcriptome analysis pipeline
bow's avatar
bow committed
36
 *
37
 * @author Peter van 't Hof <p.j.van_t_hof@lumc.nl>
bow's avatar
bow committed
38
 * @author Wibowo Arindrarto <w.arindrarto@lumc.nl>
bow's avatar
bow committed
39
 */
40
class Gentrap(val root: Configurable) extends QScript
41
  with MultisampleMappingTrait with AnnotationRefFlat with RibosomalRefFlat { qscript =>
bow's avatar
bow committed
42

43
  // alternative constructor for initialization with empty configuration
bow's avatar
bow committed
44 45
  def this() = this(null)

46 47 48 49 50 51 52
  override def reportClass: Option[ReportBuilderExtension] = {
    val report = new GentrapReport(this)
    report.outputDir = new File(outputDir, "report")
    report.summaryFile = summaryFile
    Some(report)
  }

53
  /** Expression measurement modes */
bow's avatar
bow committed
54
  // see the enumeration below for valid modes
Peter van 't Hof's avatar
Peter van 't Hof committed
55 56 57 58 59 60 61 62
  lazy val expMeasures = new LazyCheck({
    config("expression_measures", default = Nil).asStringList.map(value =>
      ExpMeasures.values.find(_.toString == Gentrap.camelize(value)) match {
        case Some(v) => v
        case _       => throw new IllegalArgumentException(s"'$value' is not a valid Expression measurement")
      }
    ).toSet
  })
63

64
  /** Strandedness modes */
Peter van 't Hof's avatar
Peter van 't Hof committed
65
  lazy val strandProtocol = new LazyCheck({
66
    val value: String = config("strand_protocol")
Peter van 't Hof's avatar
Peter van 't Hof committed
67
    StrandProtocol.values.find(_.toString == Gentrap.camelize(value)) match {
68
      case Some(v) => v
Peter van 't Hof's avatar
Peter van 't Hof committed
69
      case other =>
70 71
        Logging.addError(s"'$other' is no strand_protocol or strand_protocol is not given")
        StrandProtocol.NonSpecific
72
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
73
  })
74

75
  /** Whether to remove rRNA regions or not */
76
  lazy val removeRibosomalReads: Boolean = config("remove_ribosomal_reads", default = false)
77

bow's avatar
bow committed
78
  /** Default pipeline config */
79
  override def defaults = super.defaults ++ Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
80
    "htseqcount" -> (if (strandProtocol.isSet) Map("stranded" -> (strandProtocol() match {
81 82
      case StrandProtocol.NonSpecific => "no"
      case StrandProtocol.Dutp        => "reverse"
Peter van 't Hof's avatar
Peter van 't Hof committed
83
      case otherwise                  => throw new IllegalStateException(otherwise.toString)
Peter van 't Hof's avatar
Peter van 't Hof committed
84 85 86
    }))
    else Map()),
    "cufflinks" -> (if (strandProtocol.isSet) Map("library_type" -> (strandProtocol() match {
87 88
      case StrandProtocol.NonSpecific => "fr-unstranded"
      case StrandProtocol.Dutp        => "fr-firststrand"
Peter van 't Hof's avatar
Peter van 't Hof committed
89
      case otherwise                  => throw new IllegalStateException(otherwise.toString)
Peter van 't Hof's avatar
Peter van 't Hof committed
90 91
    }))
    else Map()),
92
    "merge_strategy" -> "preprocessmergesam",
Peter van 't Hof's avatar
Peter van 't Hof committed
93 94
    "gsnap" -> Map(
      "novelsplicing" -> 1,
95
      "batch" -> 4
Peter van 't Hof's avatar
Peter van 't Hof committed
96
    ),
97 98 99 100
    "shivavariantcalling" -> Map(
      "variantcallers" -> List("varscan_cns_singlesample"),
      "name_prefix" -> "multisample"
    ),
Peter van 't Hof's avatar
Peter van 't Hof committed
101
    "bammetrics" -> Map(
102 103
      "wgs_metrics" -> false,
      "rna_metrics" -> true,
Peter van 't Hof's avatar
Peter van 't Hof committed
104 105
      "collectrnaseqmetrics" -> ((if (strandProtocol.isSet) Map(
        "strand_specificity" -> (strandProtocol() match {
106 107
          case StrandProtocol.NonSpecific => StrandSpecificity.NONE.toString
          case StrandProtocol.Dutp        => StrandSpecificity.SECOND_READ_TRANSCRIPTION_STRAND.toString
Peter van 't Hof's avatar
Peter van 't Hof committed
108
          case otherwise                  => throw new IllegalStateException(otherwise.toString)
Peter van 't Hof's avatar
Peter van 't Hof committed
109 110
        })
      )
111
      else Map()))
Peter van 't Hof's avatar
Peter van 't Hof committed
112
    ),
Peter van 't Hof's avatar
Peter van 't Hof committed
113 114 115 116 117 118 119
    "cutadapt" -> Map("minimum_length" -> 20),
    // avoid conflicts when merging since the MarkDuplicate tags often cause merges to fail
    "picard" -> Map(
      "programrecordid" -> "null"
    ),
    // disable markduplicates since it may not play well with all aligners (this can still be overriden via config)
    "mapping" -> Map(
120
      "aligner" -> "gsnap",
Peter van 't Hof's avatar
Peter van 't Hof committed
121
      "skip_markduplicates" -> true
Wai Yi Leung's avatar
Wai Yi Leung committed
122 123 124 125
    ),
    "wipereads" -> Map(
      "limit_removal" -> true,
      "no_make_index" -> false
126
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
127
  )
128

Peter van 't Hof's avatar
Peter van 't Hof committed
129
  lazy val fragmentsPerGene = if (expMeasures().contains(ExpMeasures.FragmentsPerGene))
130
    Some(new FragmentsPerGene(this)) else None
131

Peter van 't Hof's avatar
Peter van 't Hof committed
132
  lazy val fragmentsPerExon = if (expMeasures().contains(ExpMeasures.FragmentsPerExon))
133
    Some(new FragmentsPerExon(this)) else None
bow's avatar
bow committed
134

Peter van 't Hof's avatar
Peter van 't Hof committed
135
  lazy val baseCounts = if (expMeasures().contains(ExpMeasures.BaseCounts))
Peter van 't Hof's avatar
Peter van 't Hof committed
136
    Some(new BaseCounts(this)) else None
137

Peter van 't Hof's avatar
Peter van 't Hof committed
138
  lazy val cufflinksBlind = if (expMeasures().contains(ExpMeasures.CufflinksBlind))
139 140
    Some(new CufflinksBlind(this)) else None

Peter van 't Hof's avatar
Peter van 't Hof committed
141
  lazy val cufflinksGuided = if (expMeasures().contains(ExpMeasures.CufflinksGuided))
142 143
    Some(new CufflinksGuided(this)) else None

Peter van 't Hof's avatar
Peter van 't Hof committed
144
  lazy val cufflinksStrict = if (expMeasures().contains(ExpMeasures.CufflinksStrict))
145 146
    Some(new CufflinksStrict(this)) else None

147
  def executedMeasures = (fragmentsPerGene :: fragmentsPerExon :: baseCounts :: cufflinksBlind ::
148 149 150 151 152 153 154 155
    cufflinksGuided :: cufflinksStrict :: Nil).flatten

  /** Whether to do simple variant calling on RNA or not */
  lazy val shivaVariantcalling = if (config("call_variants", default = false)) {
    val pipeline = new ShivaVariantcalling(this)
    pipeline.outputDir = new File(outputDir, "variantcalling")
    Some(pipeline)
  } else None
bow's avatar
bow committed
156

bow's avatar
bow committed
157 158 159 160
  /** Output summary file */
  def summaryFile: File = new File(outputDir, "gentrap.summary.json")

  /** Files that will be listed in the summary file */
161
  override def summaryFiles: Map[String, File] = super.summaryFiles ++ Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
162
    "annotation_refflat" -> annotationRefFlat()
bow's avatar
bow committed
163
  ) ++ Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
164
      "ribosome_refflat" -> ribosomalRefFlat()
165
    ).collect { case (key, Some(value)) => key -> value }
bow's avatar
bow committed
166 167

  /** Pipeline settings shown in the summary file */
168
  override def summarySettings: Map[String, Any] = super.summarySettings ++ Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
169
    "expression_measures" -> expMeasures().toList.map(_.toString),
Peter van 't Hof's avatar
Peter van 't Hof committed
170
    "strand_protocol" -> strandProtocol().toString,
171 172
    "call_variants" -> shivaVariantcalling.isDefined,
    "remove_ribosomal_reads" -> removeRibosomalReads
bow's avatar
bow committed
173
  )
bow's avatar
bow committed
174

bow's avatar
bow committed
175
  /** Steps to run before biopetScript */
176 177 178
  override def init(): Unit = {
    super.init()

Peter van 't Hof's avatar
Peter van 't Hof committed
179
    if (expMeasures().isEmpty) Logging.addError("'expression_measures' is missing in the config")
Peter van 't Hof's avatar
Peter van 't Hof committed
180 181 182
    require(Gentrap.StrandProtocol.values.contains(strandProtocol()))
    if (removeRibosomalReads && ribosomalRefFlat().isEmpty)
      Logging.addError("removeRibosomalReads is enabled but no ribosomalRefFlat is given")
183

Peter van 't Hof's avatar
Peter van 't Hof committed
184
    executedMeasures.foreach(x => x.outputDir = new File(outputDir, "expression_measures" + File.separator + x.name))
bow's avatar
bow committed
185
  }
186

bow's avatar
bow committed
187
  /** Pipeline run for multiple samples */
188
  override def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
189
    super.addMultiSampleJobs()
190
    // merge expression tables
191 192
    executedMeasures.foreach(add)
    shivaVariantcalling.foreach(add)
193
  }
194

bow's avatar
bow committed
195
  /** Returns a [[Sample]] object */
196
  override def makeSample(sampleId: String): Sample = new Sample(sampleId)
bow's avatar
bow committed
197

bow's avatar
bow committed
198 199 200 201 202
  /**
   * Gentrap sample
   *
   * @param sampleId Unique identifier of the sample
   */
203
  class Sample(sampleId: String) extends super.Sample(sampleId) {
bow's avatar
bow committed
204

205
    /** Summary stats of the sample */
206
    override def summaryStats: Map[String, Any] = super.summaryStats ++ Map(
207 208 209
      "all_paired" -> allPaired,
      "all_single" -> allSingle
    )
210

Peter van 't Hof's avatar
Peter van 't Hof committed
211 212 213
    override lazy val preProcessBam = if (removeRibosomalReads) {
      val job = new WipeReads(qscript)
      job.inputBam = bamFile.get
Peter van 't Hof's avatar
Peter van 't Hof committed
214
      ribosomalRefFlat().foreach(job.intervalFile = _)
Peter van 't Hof's avatar
Peter van 't Hof committed
215
      job.outputBam = createFile("cleaned.bam")
Wai Yi Leung's avatar
Wai Yi Leung committed
216
      job.discardedBam = Some(createFile("rrna.bam"))
Peter van 't Hof's avatar
Peter van 't Hof committed
217 218 219
      add(job)
      Some(job.outputBam)
    } else bamFile
220

221
    /** Whether all libraries are paired or not */
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
222
    def allPaired: Boolean = libraries.values.forall(_.mapping.forall(_.inputR2.isDefined))
223 224

    /** Whether all libraries are single or not */
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
225
    def allSingle: Boolean = libraries.values.forall(_.mapping.forall(_.inputR2.isEmpty))
226

bow's avatar
bow committed
227
    /** Adds all jobs for the sample */
228 229
    override def addJobs(): Unit = {
      super.addJobs()
230 231
      // TODO: this is our requirement since it's easier to calculate base counts when all libraries are either paired or single
      require(allPaired || allSingle, s"Sample $sampleId contains only single-end or paired-end libraries")
bow's avatar
bow committed
232
      // add bigwig output, also per-strand when possible
233 234 235 236 237

      preProcessBam.foreach { file =>
        executedMeasures.foreach(_.addBamfile(sampleId, file))
        shivaVariantcalling.foreach(_.inputBams += sampleId -> file)
      }
238
    }
bow's avatar
bow committed
239
  }
bow's avatar
bow committed
240
}
bow's avatar
bow committed
241

bow's avatar
bow committed
242
object Gentrap extends PipelineCommand {
243

bow's avatar
bow committed
244 245
  /** Enumeration of available expression measures */
  object ExpMeasures extends Enumeration {
246
    val FragmentsPerGene, FragmentsPerExon, BaseCounts, CufflinksStrict, CufflinksGuided, CufflinksBlind = Value
247
  }
bow's avatar
bow committed
248

bow's avatar
bow committed
249
  /** Enumeration of available strandedness */
250
  object StrandProtocol extends Enumeration {
bow's avatar
bow committed
251 252 253
    // for now, only non-strand specific and dUTP stranded protocol is supported
    val NonSpecific, Dutp = Value
  }
254 255

  /** Converts string with underscores into camel-case strings */
256
  private[gentrap] def camelize(ustring: String): String = ustring
257 258 259
    .split("_")
    .map(_.toLowerCase.capitalize)
    .mkString("")
260
}