ShivaTrait.scala 14.2 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
package nl.lumc.sasc.biopet.pipelines.shiva

import htsjdk.samtools.SamReaderFactory
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.extensions.Ln
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq }
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
25
import nl.lumc.sasc.biopet.utils.Logging
26
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
27

Peter van 't Hof's avatar
Peter van 't Hof committed
28
29
30
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
 * Created by pjvan_thof on 2/26/15.
 */
35
trait ShivaTrait extends MultiSampleQScript with Reference { qscript: QScript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
36

Peter van 't Hof's avatar
Peter van 't Hof committed
37
  /** Executed before running the script */
Peter van 't Hof's avatar
Peter van 't Hof committed
38
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
39
40
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
41
  /** Method to add jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
42
  def biopetScript(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
    addSamplesJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
45
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
46
47
  }

48
49
50
51
52
53
54
  override def reportClass = {
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
    shiva.summaryFile = summaryFile
    Some(shiva)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
55
  /** Method to make the variantcalling submodule of shiva */
56
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
Peter van 't Hof's avatar
Peter van 't Hof committed
57
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
      override def namePrefix = "multisample"
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
61
62
63
64
65
66
      override def configName = "shivavariantcalling"
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
      override def configName = "shivavariantcalling"
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
67
  /** Method to make a sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
68
  def makeSample(id: String) = new Sample(id)
69

Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
72
73
74
  /** Class that will generate jobs for a sample */
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /** Sample specific files to add to summary */
    def summaryFiles: Map[String, File] = {
      preProcessBam match {
75
76
        case Some(b) => Map("preProcessBam" -> b)
        case _       => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
      }
    }
79

Peter van 't Hof's avatar
Peter van 't Hof committed
80
    /** Sample specific stats to add to summary */
81
82
    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
83
    /** Method to make a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
    def makeLibrary(id: String) = new Library(id)

86
87
88
    /** Sample specific settings */
    override def summarySettings = Map("single_sample_variantcalling" -> variantcalling.isDefined)

Peter van 't Hof's avatar
Peter van 't Hof committed
89
    /** Class to generate jobs for a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
90
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
      /** Library specific files to add to the summary */
      def summaryFiles: Map[String, File] = {
93
        ((bamFile, preProcessBam) match {
94
          case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb)
Peter van 't Hof's avatar
Peter van 't Hof committed
95
          case (Some(b), _)        => Map("bamFile" -> b, "preProcessBam" -> b)
96
          case _                   => Map()
97
98
99
        }) ++ (inputR1.map("input_R1" -> _) ::
          inputR2.map("input_R2" -> _) ::
          inputBam.map("input_bam" -> _) :: Nil).flatten.toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
100
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
101

Peter van 't Hof's avatar
Peter van 't Hof committed
102
      /** Library specific stats to add to summary */
103
104
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
105
      /** Method to execute library preprocess */
Peter van 't Hof's avatar
Peter van 't Hof committed
106
107
      def preProcess(input: File): Option[File] = None

108
109
110
      /** Library specific settings */
      override def summarySettings = Map("library_variantcalling" -> variantcalling.isDefined)

Peter van 't Hof's avatar
Peter van 't Hof committed
111
      /** Method to make the mapping submodule */
Peter van 't Hof's avatar
Peter van 't Hof committed
112
113
114
115
116
      def makeMapping = {
        val mapping = new Mapping(qscript)
        mapping.sampleId = Some(sampleId)
        mapping.libId = Some(libId)
        mapping.outputDir = libDir
117
        mapping.outputName = sampleId + "-" + libId
Peter van 't Hof's avatar
Peter van 't Hof committed
118
119
120
        (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
      }

121
122
123
124
125
126
127
128
129
130
131
      def fileMustBeAbsulute(file: Option[File]): Option[File] = {
        if (file.forall(_.isAbsolute)) file
        else {
          Logging.addError(s"$file for $sampleId / $libId should be a absolute file path")
          file.map(_.getAbsoluteFile)
        }
      }

      lazy val inputR1: Option[File] = fileMustBeAbsulute(config("R1"))
      lazy val inputR2: Option[File] = fileMustBeAbsulute(config("R2"))
      lazy val inputBam: Option[File] = fileMustBeAbsulute(if (inputR1.isEmpty) config("bam") else None)
132

Peter van 't Hof's avatar
Peter van 't Hof committed
133
      lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
134
        (inputR1.isDefined, inputBam.isDefined) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
135
136
137
138
          case (true, _) => makeMapping // Default starting from fastq files
          case (false, true) => // Starting from bam file
            config("bam_to_fastq", default = false).asBoolean match {
              case true => makeMapping // bam file will be converted to fastq
139
              case false =>
Peter van 't Hof's avatar
Peter van 't Hof committed
140
141
142
143
144
145
                val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
                (None, Some(file), preProcess(file))
            }
          case _ => (None, None, None)
        }

146
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
147
148
149
150
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
151
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
152
      def addJobs(): Unit = {
153
        (inputR1.isDefined, inputBam.isDefined) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
154
          case (true, _) => mapping.foreach(mapping => {
155
156
            mapping.input_R1 = inputR1.get
            mapping.input_R2 = inputR2
Peter van 't Hof's avatar
Peter van 't Hof committed
157
158
            inputFiles :+= new InputFile(mapping.input_R1, config("R1_md5"))
            mapping.input_R2.foreach(inputFiles :+= new InputFile(_, config("R2_md5")))
Peter van 't Hof's avatar
Peter van 't Hof committed
159
          })
Peter van 't Hof's avatar
Peter van 't Hof committed
160
          case (false, true) => {
161
            inputFiles :+= new InputFile(inputBam.get, config("bam_md5"))
Peter van 't Hof's avatar
Peter van 't Hof committed
162
163
            config("bam_to_fastq", default = false).asBoolean match {
              case true =>
164
                val samToFastq = SamToFastq(qscript, inputBam.get,
Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
                  new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"),
                  new File(libDir, sampleId + "-" + libId + ".R2.fq.gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
169
170
171
172
173
                samToFastq.isIntermediate = true
                qscript.add(samToFastq)
                mapping.foreach(mapping => {
                  mapping.input_R1 = samToFastq.fastqR1
                  mapping.input_R2 = Some(samToFastq.fastqR2)
                })
              case false =>
174
                val inputSam = SamReaderFactory.makeDefault.open(inputBam.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
175
176
177
178
179
180
181
182
183
184
185
                val readGroups = inputSam.getFileHeader.getReadGroups

                val readGroupOke = readGroups.forall(readGroup => {
                  if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
                  if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
                  readGroup.getSample == sampleId && readGroup.getLibrary == libId
                })
                inputSam.close()

                if (!readGroupOke) {
                  if (config("correct_readgroups", default = false).asBoolean) {
186
187
                    logger.info("Correcting readgroups, file:" + inputBam.get)
                    val aorrg = AddOrReplaceReadGroups(qscript, inputBam.get, bamFile.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
190
                    aorrg.RGID = sampleId + "-" + libId
                    aorrg.RGLB = libId
                    aorrg.RGSM = sampleId
Peter van 't Hof's avatar
Peter van 't Hof committed
191
192
                    aorrg.RGPL = "unknown"
                    aorrg.RGPU = "na"
Peter van 't Hof's avatar
Peter van 't Hof committed
193
194
195
196
197
                    aorrg.isIntermediate = true
                    qscript.add(aorrg)
                  } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                    "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
                } else {
198
                  val oldBamFile: File = inputBam.get
Peter van 't Hof's avatar
Peter van 't Hof committed
199
                  val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
Peter van 't Hof's avatar
Peter van 't Hof committed
200
                  val newIndex: File = new File(libDir, bamFile.get.getName.stripSuffix(".bam") + ".bai")
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
203
204
205
206
                  val baiLn = Ln(qscript, oldIndex, newIndex)
                  add(baiLn)

                  val bamLn = Ln(qscript, oldBamFile, bamFile.get)
                  bamLn.deps :+= baiLn.output
                  add(bamLn)
207
208
209
210
211
212
213
214
215
216

                  val bamMetrics = new BamMetrics(qscript)
                  bamMetrics.sampleId = Some(sampleId)
                  bamMetrics.libId = Some(libId)
                  bamMetrics.inputBam = bamFile.get
                  bamMetrics.outputDir = new File(libDir, "metrics")
                  bamMetrics.init()
                  bamMetrics.biopetScript()
                  addAll(bamMetrics.functions)
                  addSummaryQScript(bamMetrics)
Peter van 't Hof's avatar
Peter van 't Hof committed
217
218
                }
            }
Peter van 't Hof's avatar
Peter van 't Hof committed
219
220
221
222
223
          }
          case _ => logger.warn("Sample: " + sampleId + "  Library: " + libId + ", no reads found")
        }

        mapping.foreach(mapping => {
224
225
          mapping.init()
          mapping.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
226
227
228
229
          addAll(mapping.functions) // Add functions of mapping to curent function pool
          addSummaryQScript(mapping)
        })

Peter van 't Hof's avatar
Peter van 't Hof committed
230
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
231
232
          vc.sampleId = Some(sampleId)
          vc.libId = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
233
          vc.outputDir = new File(libDir, "variantcalling")
234
235
          if (preProcessBam.isDefined) vc.inputBams = Map(sampleId -> preProcessBam.get)
          else vc.inputBams = Map(sampleId -> bamFile.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
236
237
          vc.init()
          vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
238
          addAll(vc.functions)
239
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
240
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
241
242
243
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
244
    /** This will add jobs for the double preprocessing */
Peter van 't Hof's avatar
Peter van 't Hof committed
245
    protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
246
247
      if (input == Nil) None
      else if (input.tail == Nil) {
Peter van 't Hof's avatar
Peter van 't Hof committed
248
        val bamFile = new File(sampleDir, s"$sampleId.bam")
Peter van 't Hof's avatar
Peter van 't Hof committed
249
        val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
Peter van 't Hof's avatar
Peter van 't Hof committed
250
        val newIndex: File = new File(sampleDir, s"$sampleId.bai")
Peter van 't Hof's avatar
Peter van 't Hof committed
251
252
253
254
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

        val bamLn = Ln(qscript, input.head, bamFile)
bow's avatar
bow committed
255
        bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
256
257
258
259
260
261
        add(bamLn)
        Some(bamFile)
      } else {
        val md = new MarkDuplicates(qscript)
        md.input = input
        md.output = new File(sampleDir, sampleId + ".dedup.bam")
262
        md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
263
        //FIXME: making this file intermediate make the pipeline restart unnessery jobs
264
        //md.isIntermediate = isIntermediate
Peter van 't Hof's avatar
Peter van 't Hof committed
265
        add(md)
266
        addSummarizable(md, "mark_duplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
267
268
269
270
        Some(md.output)
      }
    }

271
    lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => {
Peter van 't Hof's avatar
Peter van 't Hof committed
272
273
274
      (lib._2.bamFile, lib._2.preProcessBam) match {
        case (_, Some(file)) => Some(file)
        case (Some(file), _) => Some(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
275
276
        case _               => None
      }
277
    }).toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
278

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
279
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
280
      Some(makeVariantcalling(multisample = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
281
282
    } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
283
    /** This will add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
284
285
286
    def addJobs(): Unit = {
      addPerLibJobs()

287
      preProcessBam.foreach { bam =>
288
        val bamMetrics = new BamMetrics(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
289
        bamMetrics.sampleId = Some(sampleId)
290
        bamMetrics.inputBam = bam
291
        bamMetrics.outputDir = new File(sampleDir, "metrics")
292
293
        bamMetrics.init()
        bamMetrics.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
294
295
296
        addAll(bamMetrics.functions)
        addSummaryQScript(bamMetrics)

Peter van 't Hof's avatar
Peter van 't Hof committed
297
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
298
299
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
300
          vc.inputBams = Map(sampleId -> bam)
Peter van 't Hof's avatar
Peter van 't Hof committed
301
302
          vc.init()
          vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
303
304
          addAll(vc.functions)
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
305
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
306
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
307
308
309
    }
  }

310
  lazy val multisampleVariantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
311
312
313
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
314
  lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
315
    Some(new ShivaSvCalling(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
316
317
  } else None

318
319
320
321
322
  lazy val annotation = if (multisampleVariantCalling.isDefined &&
    config("annotation", default = false).asBoolean) {
    Some(new Toucan(this))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
323
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
324
  def addMultiSampleJobs(): Unit = {
325
    multisampleVariantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
326
      vc.outputDir = new File(outputDir, "variantcalling")
327
      vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
328
329
      vc.init()
      vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
330
      addAll(vc.functions)
331
      addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
332

333
      annotation.foreach { toucan =>
Peter van 't Hof's avatar
Peter van 't Hof committed
334
335
336
337
338
339
340
        toucan.outputDir = new File(outputDir, "annotation")
        toucan.inputVCF = vc.finalFile
        toucan.init()
        toucan.biopetScript()
        addAll(toucan.functions)
        addSummaryQScript(toucan)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
341
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
342
343

    svCalling.foreach(sv => {
Peter van 't Hof's avatar
Peter van 't Hof committed
344
      sv.outputDir = new File(outputDir, "sv_calling")
345
      sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
346
347
348
349
350
      sv.init()
      sv.biopetScript()
      addAll(sv.functions)
      addSummaryQScript(sv)
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
351
352
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
353
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
354
355
  def summaryFile = new File(outputDir, "Shiva.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
356
  /** Settings of pipeline for summary */
357
  def summarySettings = Map(
358
      "reference" -> referenceSummary,
359
360
361
      "annotation" -> annotation.isDefined,
      "multisample_variantcalling" -> multisampleVariantCalling.isDefined,
      "sv_calling" -> svCalling.isDefined
362
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
363

Peter van 't Hof's avatar
Peter van 't Hof committed
364
  /** Files for the summary */
365
  def summaryFiles = Map("referenceFasta" -> referenceFasta())
Peter van 't Hof's avatar
Peter van 't Hof committed
366
}