ShivaTrait.scala 14.4 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
19
20
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.File

import htsjdk.samtools.SamReaderFactory
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.extensions.Ln
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
27
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
28

Peter van 't Hof's avatar
Peter van 't Hof committed
29
30
31
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
 * Created by pjvan_thof on 2/26/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
36
trait ShivaTrait extends MultiSampleQScript with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
37
38
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
39
  /** Executed before running the script */
Peter van 't Hof's avatar
Peter van 't Hof committed
40
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
41
42
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  /** Method to add jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
44
  def biopetScript(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
    addSamplesJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
47
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
  }

50
51
52
53
54
55
56
  override def reportClass = {
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
    shiva.summaryFile = summaryFile
    Some(shiva)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
57
  /** Method to make the variantcalling submodule of shiva */
58
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
Peter van 't Hof's avatar
Peter van 't Hof committed
59
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
60
      override def namePrefix = "multisample"
Peter van 't Hof's avatar
Peter van 't Hof committed
61
62
63
64
65
66
67
68
      override def configName = "shivavariantcalling"
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
      override def configName = "shivavariantcalling"
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
69
  /** Method to make a sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
70
  def makeSample(id: String) = new Sample(id)
71

Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
74
75
76
  /** Class that will generate jobs for a sample */
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /** Sample specific files to add to summary */
    def summaryFiles: Map[String, File] = {
      preProcessBam match {
77
78
        case Some(b) => Map("preProcessBam" -> b)
        case _       => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
      }
    }
81

Peter van 't Hof's avatar
Peter van 't Hof committed
82
    /** Sample specific stats to add to summary */
83
84
    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
85
    /** Method to make a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
86
87
    def makeLibrary(id: String) = new Library(id)

88
89
90
    /** Sample specific settings */
    override def summarySettings = Map("single_sample_variantcalling" -> variantcalling.isDefined)

Peter van 't Hof's avatar
Peter van 't Hof committed
91
    /** Class to generate jobs for a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
92
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
      /** Library specific files to add to the summary */
      def summaryFiles: Map[String, File] = {
95
        ((bamFile, preProcessBam) match {
96
          case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb)
Peter van 't Hof's avatar
Peter van 't Hof committed
97
          case (Some(b), _)        => Map("bamFile" -> b, "preProcessBam" -> b)
98
          case _                   => Map()
99
100
101
        }) ++ (inputR1.map("input_R1" -> _) ::
          inputR2.map("input_R2" -> _) ::
          inputBam.map("input_bam" -> _) :: Nil).flatten.toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
102
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
103

Peter van 't Hof's avatar
Peter van 't Hof committed
104
      /** Library specific stats to add to summary */
105
106
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
107
      /** Method to execute library preprocess */
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
      def preProcess(input: File): Option[File] = None

110
111
112
      /** Library specific settings */
      override def summarySettings = Map("library_variantcalling" -> variantcalling.isDefined)

Peter van 't Hof's avatar
Peter van 't Hof committed
113
      /** Method to make the mapping submodule */
Peter van 't Hof's avatar
Peter van 't Hof committed
114
115
116
117
118
      def makeMapping = {
        val mapping = new Mapping(qscript)
        mapping.sampleId = Some(sampleId)
        mapping.libId = Some(libId)
        mapping.outputDir = libDir
119
        mapping.outputName = sampleId + "-" + libId
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
122
        (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
      }

123
124
125
126
127
128
129
130
131
132
133
      def fileMustBeAbsulute(file: Option[File]): Option[File] = {
        if (file.forall(_.isAbsolute)) file
        else {
          Logging.addError(s"$file for $sampleId / $libId should be a absolute file path")
          file.map(_.getAbsoluteFile)
        }
      }

      lazy val inputR1: Option[File] = fileMustBeAbsulute(config("R1"))
      lazy val inputR2: Option[File] = fileMustBeAbsulute(config("R2"))
      lazy val inputBam: Option[File] = fileMustBeAbsulute(if (inputR1.isEmpty) config("bam") else None)
134

Peter van 't Hof's avatar
Peter van 't Hof committed
135
      lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
136
        (inputR1.isDefined, inputBam.isDefined) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
137
138
139
140
          case (true, _) => makeMapping // Default starting from fastq files
          case (false, true) => // Starting from bam file
            config("bam_to_fastq", default = false).asBoolean match {
              case true => makeMapping // bam file will be converted to fastq
141
              case false =>
Peter van 't Hof's avatar
Peter van 't Hof committed
142
143
144
145
146
147
                val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
                (None, Some(file), preProcess(file))
            }
          case _ => (None, None, None)
        }

148
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
149
150
151
152
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
153
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
154
      def addJobs(): Unit = {
155
        (inputR1.isDefined, inputBam.isDefined) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
156
          case (true, _) => mapping.foreach(mapping => {
157
158
            mapping.input_R1 = inputR1.get
            mapping.input_R2 = inputR2
Peter van 't Hof's avatar
Peter van 't Hof committed
159
160
            inputFiles :+= new InputFile(mapping.input_R1, config("R1_md5"))
            mapping.input_R2.foreach(inputFiles :+= new InputFile(_, config("R2_md5")))
Peter van 't Hof's avatar
Peter van 't Hof committed
161
          })
Peter van 't Hof's avatar
Peter van 't Hof committed
162
          case (false, true) => {
163
            inputFiles :+= new InputFile(inputBam.get, config("bam_md5"))
Peter van 't Hof's avatar
Peter van 't Hof committed
164
165
            config("bam_to_fastq", default = false).asBoolean match {
              case true =>
166
                val samToFastq = SamToFastq(qscript, inputBam.get,
Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
                  new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"),
                  new File(libDir, sampleId + "-" + libId + ".R2.fq.gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
169
170
171
172
173
174
175
                samToFastq.isIntermediate = true
                qscript.add(samToFastq)
                mapping.foreach(mapping => {
                  mapping.input_R1 = samToFastq.fastqR1
                  mapping.input_R2 = Some(samToFastq.fastqR2)
                })
              case false =>
176
                val inputSam = SamReaderFactory.makeDefault.open(inputBam.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
177
178
179
180
181
182
183
184
185
186
187
                val readGroups = inputSam.getFileHeader.getReadGroups

                val readGroupOke = readGroups.forall(readGroup => {
                  if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
                  if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
                  readGroup.getSample == sampleId && readGroup.getLibrary == libId
                })
                inputSam.close()

                if (!readGroupOke) {
                  if (config("correct_readgroups", default = false).asBoolean) {
188
189
                    logger.info("Correcting readgroups, file:" + inputBam.get)
                    val aorrg = AddOrReplaceReadGroups(qscript, inputBam.get, bamFile.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
192
                    aorrg.RGID = sampleId + "-" + libId
                    aorrg.RGLB = libId
                    aorrg.RGSM = sampleId
Peter van 't Hof's avatar
Peter van 't Hof committed
193
194
                    aorrg.RGPL = "unknown"
                    aorrg.RGPU = "na"
Peter van 't Hof's avatar
Peter van 't Hof committed
195
196
197
198
199
                    aorrg.isIntermediate = true
                    qscript.add(aorrg)
                  } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                    "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
                } else {
200
                  val oldBamFile: File = inputBam.get
Peter van 't Hof's avatar
Peter van 't Hof committed
201
                  val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
Peter van 't Hof's avatar
Peter van 't Hof committed
202
                  val newIndex: File = new File(libDir, bamFile.get.getName.stripSuffix(".bam") + ".bai")
Peter van 't Hof's avatar
Peter van 't Hof committed
203
204
205
206
207
208
                  val baiLn = Ln(qscript, oldIndex, newIndex)
                  add(baiLn)

                  val bamLn = Ln(qscript, oldBamFile, bamFile.get)
                  bamLn.deps :+= baiLn.output
                  add(bamLn)
209
210
211
212
213
214
215
216
217
218

                  val bamMetrics = new BamMetrics(qscript)
                  bamMetrics.sampleId = Some(sampleId)
                  bamMetrics.libId = Some(libId)
                  bamMetrics.inputBam = bamFile.get
                  bamMetrics.outputDir = new File(libDir, "metrics")
                  bamMetrics.init()
                  bamMetrics.biopetScript()
                  addAll(bamMetrics.functions)
                  addSummaryQScript(bamMetrics)
Peter van 't Hof's avatar
Peter van 't Hof committed
219
220
                }
            }
Peter van 't Hof's avatar
Peter van 't Hof committed
221
222
223
224
225
          }
          case _ => logger.warn("Sample: " + sampleId + "  Library: " + libId + ", no reads found")
        }

        mapping.foreach(mapping => {
226
227
          mapping.init()
          mapping.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
228
229
230
231
          addAll(mapping.functions) // Add functions of mapping to curent function pool
          addSummaryQScript(mapping)
        })

Peter van 't Hof's avatar
Peter van 't Hof committed
232
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
233
234
          vc.sampleId = Some(sampleId)
          vc.libId = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
235
236
237
          vc.outputDir = new File(libDir, "variantcalling")
          if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil
          else vc.inputBams = bamFile.get :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
238
239
          vc.init()
          vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
240
          addAll(vc.functions)
241
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
242
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
243
244
245
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
246
    /** This will add jobs for the double preprocessing */
Peter van 't Hof's avatar
Peter van 't Hof committed
247
    protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
248
249
      if (input == Nil) None
      else if (input.tail == Nil) {
Peter van 't Hof's avatar
Peter van 't Hof committed
250
        val bamFile = new File(sampleDir, s"$sampleId.bam")
Peter van 't Hof's avatar
Peter van 't Hof committed
251
        val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
Peter van 't Hof's avatar
Peter van 't Hof committed
252
        val newIndex: File = new File(sampleDir, s"$sampleId.bai")
Peter van 't Hof's avatar
Peter van 't Hof committed
253
254
255
256
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

        val bamLn = Ln(qscript, input.head, bamFile)
bow's avatar
bow committed
257
        bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
258
259
260
261
262
263
        add(bamLn)
        Some(bamFile)
      } else {
        val md = new MarkDuplicates(qscript)
        md.input = input
        md.output = new File(sampleDir, sampleId + ".dedup.bam")
264
        md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
265
        //FIXME: making this file intermediate make the pipeline restart unnessery jobs
266
        //md.isIntermediate = isIntermediate
Peter van 't Hof's avatar
Peter van 't Hof committed
267
        add(md)
268
        addSummarizable(md, "mark_duplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
269
270
271
272
        Some(md.output)
      }
    }

273
    lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => {
Peter van 't Hof's avatar
Peter van 't Hof committed
274
275
276
      (lib._2.bamFile, lib._2.preProcessBam) match {
        case (_, Some(file)) => Some(file)
        case (Some(file), _) => Some(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
277
278
        case _               => None
      }
279
    }).toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
280

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
281
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
282
      Some(makeVariantcalling(multisample = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
283
284
    } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
285
    /** This will add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
286
287
288
    def addJobs(): Unit = {
      addPerLibJobs()

289
      preProcessBam.foreach { bam =>
290
        val bamMetrics = new BamMetrics(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
291
        bamMetrics.sampleId = Some(sampleId)
292
        bamMetrics.inputBam = bam
293
        bamMetrics.outputDir = new File(sampleDir, "metrics")
294
295
        bamMetrics.init()
        bamMetrics.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
296
297
298
        addAll(bamMetrics.functions)
        addSummaryQScript(bamMetrics)

Peter van 't Hof's avatar
Peter van 't Hof committed
299
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
300
301
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
302
          vc.inputBams = bam :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
303
304
          vc.init()
          vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
305
306
          addAll(vc.functions)
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
307
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
308
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
309
310
311
    }
  }

312
  lazy val multisampleVariantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
313
314
315
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
316
  lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
317
    Some(new ShivaSvCalling(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
318
319
  } else None

320
321
322
323
324
  lazy val annotation = if (multisampleVariantCalling.isDefined &&
    config("annotation", default = false).asBoolean) {
    Some(new Toucan(this))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
325
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
326
  def addMultiSampleJobs(): Unit = {
327
    multisampleVariantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
328
      vc.outputDir = new File(outputDir, "variantcalling")
329
      vc.inputBams = samples.flatMap(_._2.preProcessBam).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
330
331
      vc.init()
      vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
332
      addAll(vc.functions)
333
      addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
334

335
      annotation.foreach { toucan =>
Peter van 't Hof's avatar
Peter van 't Hof committed
336
337
338
339
340
341
342
        toucan.outputDir = new File(outputDir, "annotation")
        toucan.inputVCF = vc.finalFile
        toucan.init()
        toucan.biopetScript()
        addAll(toucan.functions)
        addSummaryQScript(toucan)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
343
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
344
345

    svCalling.foreach(sv => {
Peter van 't Hof's avatar
Peter van 't Hof committed
346
      sv.outputDir = new File(outputDir, "sv_calling")
Peter van 't Hof's avatar
Peter van 't Hof committed
347
      samples.foreach(x => x._2.preProcessBam.foreach(bam => sv.addBamFile(bam, Some(x._1))))
Peter van 't Hof's avatar
Peter van 't Hof committed
348
349
350
351
352
      sv.init()
      sv.biopetScript()
      addAll(sv.functions)
      addSummaryQScript(sv)
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
353
354
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
355
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
356
357
  def summaryFile = new File(outputDir, "Shiva.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
358
  /** Settings of pipeline for summary */
359
360
361
362
363
  def summarySettings = {
    val roiBedFiles: List[File] = config("regions_of_interest", Nil)
    val ampliconBedFile: Option[File] = config("amplicon_bed")

    Map(
364
      "reference" -> referenceSummary,
365
      "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
366
367
368
369
      "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")),
      "annotation" -> annotation.isDefined,
      "multisample_variantcalling" -> multisampleVariantCalling.isDefined,
      "sv_calling" -> svCalling.isDefined
370
371
    )
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
372

Peter van 't Hof's avatar
Peter van 't Hof committed
373
  /** Files for the summary */
374
  def summaryFiles = Map("referenceFasta" -> referenceFasta())
Peter van 't Hof's avatar
Peter van 't Hof committed
375
}