ShivaTrait.scala 12.9 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
19
20
21
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.File

import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.Ln
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
Peter van 't Hof's avatar
Peter van 't Hof committed
28

Peter van 't Hof's avatar
Peter van 't Hof committed
29
30
31
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
 * Created by pjvan_thof on 2/26/15.
 */
36
trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
37
38
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
39
  /** Executed before running the script */
Peter van 't Hof's avatar
Peter van 't Hof committed
40
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
41
42
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  /** Method to add jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
44
  def biopetScript(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
    addSamplesJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
47
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
  }

50
51
52
53
54
55
56
  override def reportClass = {
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
    shiva.summaryFile = summaryFile
    Some(shiva)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
57
  /** Method to make the variantcalling submodule of shiva */
58
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
Peter van 't Hof's avatar
Peter van 't Hof committed
59
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
60
      override def namePrefix = "multisample"
Peter van 't Hof's avatar
Peter van 't Hof committed
61
62
63
64
65
66
67
68
      override def configName = "shivavariantcalling"
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
      override def configName = "shivavariantcalling"
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
69
  /** Method to make a sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
70
  def makeSample(id: String) = new Sample(id)
71

Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
74
75
76
  /** Class that will generate jobs for a sample */
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /** Sample specific files to add to summary */
    def summaryFiles: Map[String, File] = {
      preProcessBam match {
77
78
        case Some(b) => Map("preProcessBam" -> b)
        case _       => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
      }
    }
81

Peter van 't Hof's avatar
Peter van 't Hof committed
82
    /** Sample specific stats to add to summary */
83
84
    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
85
    /** Method to make a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
86
87
    def makeLibrary(id: String) = new Library(id)

Peter van 't Hof's avatar
Peter van 't Hof committed
88
    /** Class to generate jobs for a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
89
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
92
      /** Library specific files to add to the summary */
      def summaryFiles: Map[String, File] = {
        (bamFile, preProcessBam) match {
93
94
95
          case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb)
          case (Some(b), _)        => Map("bamFile" -> b)
          case _                   => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
98

Peter van 't Hof's avatar
Peter van 't Hof committed
99
      /** Library specific stats to add to summary */
100
101
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
102
      /** Method to execute library preprocess */
Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
      def preProcess(input: File): Option[File] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
105
      /** Method to make the mapping submodule */
Peter van 't Hof's avatar
Peter van 't Hof committed
106
107
108
109
110
      def makeMapping = {
        val mapping = new Mapping(qscript)
        mapping.sampleId = Some(sampleId)
        mapping.libId = Some(libId)
        mapping.outputDir = libDir
111
        mapping.outputName = sampleId + "-" + libId
Peter van 't Hof's avatar
Peter van 't Hof committed
112
113
114
115
116
117
118
119
120
        (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
      }

      lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => makeMapping // Default starting from fastq files
          case (false, true) => // Starting from bam file
            config("bam_to_fastq", default = false).asBoolean match {
              case true => makeMapping // bam file will be converted to fastq
121
              case false =>
Peter van 't Hof's avatar
Peter van 't Hof committed
122
123
124
125
126
127
                val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
                (None, Some(file), preProcess(file))
            }
          case _ => (None, None, None)
        }

128
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
129
130
131
132
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
133
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
134
135
136
137
138
      def addJobs(): Unit = {
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => mapping.foreach(mapping => {
            mapping.input_R1 = config("R1")
            mapping.input_R2 = config("R2")
Peter van 't Hof's avatar
Peter van 't Hof committed
139
140
            inputFiles :+= InputFile(mapping.input_R1, config("R1_md5"))
            mapping.input_R2.foreach(inputFiles :+= InputFile(_, config("R2_md5")))
Peter van 't Hof's avatar
Peter van 't Hof committed
141
          })
Peter van 't Hof's avatar
Peter van 't Hof committed
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
          case (false, true) => {
            inputFiles :+= InputFile(config("bam"), config("bam_md5"))
            config("bam_to_fastq", default = false).asBoolean match {
              case true =>
                val samToFastq = SamToFastq(qscript, config("bam"),
                  new File(libDir, sampleId + "-" + libId + ".R1.fastq"),
                  new File(libDir, sampleId + "-" + libId + ".R2.fastq"))
                samToFastq.isIntermediate = true
                qscript.add(samToFastq)
                mapping.foreach(mapping => {
                  mapping.input_R1 = samToFastq.fastqR1
                  mapping.input_R2 = Some(samToFastq.fastqR2)
                })
              case false =>
                val inputSam = SamReaderFactory.makeDefault.open(config("bam"))
                val readGroups = inputSam.getFileHeader.getReadGroups

                val readGroupOke = readGroups.forall(readGroup => {
                  if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
                  if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
                  readGroup.getSample == sampleId && readGroup.getLibrary == libId
                })
                inputSam.close()

                if (!readGroupOke) {
                  if (config("correct_readgroups", default = false).asBoolean) {
                    logger.info("Correcting readgroups, file:" + config("bam"))
                    val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get)
                    aorrg.RGID = sampleId + "-" + libId
                    aorrg.RGLB = libId
                    aorrg.RGSM = sampleId
                    aorrg.isIntermediate = true
                    qscript.add(aorrg)
                  } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                    "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
                } else {
                  val oldBamFile: File = config("bam")
                  val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
                  val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai")
                  val baiLn = Ln(qscript, oldIndex, newIndex)
                  add(baiLn)

                  val bamLn = Ln(qscript, oldBamFile, bamFile.get)
                  bamLn.deps :+= baiLn.output
                  add(bamLn)
                }
            }
Peter van 't Hof's avatar
Peter van 't Hof committed
189
190
191
192
193
          }
          case _ => logger.warn("Sample: " + sampleId + "  Library: " + libId + ", no reads found")
        }

        mapping.foreach(mapping => {
194
195
          mapping.init()
          mapping.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
198
199
          addAll(mapping.functions) // Add functions of mapping to curent function pool
          addSummaryQScript(mapping)
        })

Peter van 't Hof's avatar
Peter van 't Hof committed
200
        variantcalling.foreach(vc => {
201
202
          vc.sampleId = Some(libId)
          vc.libId = Some(sampleId)
Peter van 't Hof's avatar
Peter van 't Hof committed
203
204
205
          vc.outputDir = new File(libDir, "variantcalling")
          if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil
          else vc.inputBams = bamFile.get :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
206
207
          vc.init()
          vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
208
          addAll(vc.functions)
209
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
210
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
211
212
213
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
214
    /** This will add jobs for the double preprocessing */
Peter van 't Hof's avatar
Peter van 't Hof committed
215
    protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
216
217
218
219
220
221
222
223
224
      if (input == Nil) None
      else if (input.tail == Nil) {
        val bamFile = new File(sampleDir, input.head.getName)
        val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
        val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai")
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

        val bamLn = Ln(qscript, input.head, bamFile)
bow's avatar
bow committed
225
        bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
226
227
228
229
230
231
        add(bamLn)
        Some(bamFile)
      } else {
        val md = new MarkDuplicates(qscript)
        md.input = input
        md.output = new File(sampleDir, sampleId + ".dedup.bam")
232
        md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
233
234
        md.isIntermediate = isIntermediate
        add(md)
235
        addSummarizable(md, "mark_duplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
236
237
238
239
        Some(md.output)
      }
    }

240
    lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => {
Peter van 't Hof's avatar
Peter van 't Hof committed
241
242
243
      (lib._2.bamFile, lib._2.preProcessBam) match {
        case (_, Some(file)) => Some(file)
        case (Some(file), _) => Some(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
244
245
        case _               => None
      }
246
    }).toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
247

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
248
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
249
250
251
      Some(makeVariantcalling(multisample = true))
    } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
252
    /** This will add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
253
254
255
    def addJobs(): Unit = {
      addPerLibJobs()

256
      preProcessBam.foreach { bam =>
257
        val bamMetrics = new BamMetrics(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
258
        bamMetrics.sampleId = Some(sampleId)
259
        bamMetrics.inputBam = bam
260
        bamMetrics.outputDir = new File(sampleDir, "metrics")
261
262
        bamMetrics.init()
        bamMetrics.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
263
264
265
        addAll(bamMetrics.functions)
        addSummaryQScript(bamMetrics)

266
267
268
269
270
        val oldIndex: File = new File(bam.getAbsolutePath.stripSuffix(".bam") + ".bai")
        val newIndex: File = new File(bam + ".bai")
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

Peter van 't Hof's avatar
Peter van 't Hof committed
271
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
272
273
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
274
          vc.inputBams = bam :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
275
276
          vc.init()
          vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
277
278
          addAll(vc.functions)
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
279
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
280
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
281
282
283
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
284
  lazy val variantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
285
286
287
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
288
  lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
289
    Some(new ShivaSvCalling(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
290
291
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
292
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
293
  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
294
    variantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
295
      vc.outputDir = new File(outputDir, "variantcalling")
296
      vc.inputBams = samples.flatMap(_._2.preProcessBam).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
297
298
      vc.init()
      vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
299
      addAll(vc.functions)
300
      addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
301
302
303
304
305
306
307
308
309
310

      if (config("annotation", default = true).asBoolean) {
        val toucan = new Toucan(this)
        toucan.outputDir = new File(outputDir, "annotation")
        toucan.inputVCF = vc.finalFile
        toucan.init()
        toucan.biopetScript()
        addAll(toucan.functions)
        addSummaryQScript(toucan)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
311
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
312
313

    svCalling.foreach(sv => {
Peter van 't Hof's avatar
Peter van 't Hof committed
314
      sv.outputDir = new File(outputDir, "sv_calling")
Peter van 't Hof's avatar
Peter van 't Hof committed
315
      samples.foreach(x => x._2.preProcessBam.foreach(bam => sv.addBamFile(bam, Some(x._1))))
Peter van 't Hof's avatar
Peter van 't Hof committed
316
317
318
319
320
      sv.init()
      sv.biopetScript()
      addAll(sv.functions)
      addSummaryQScript(sv)
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
321
322
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
323
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
324
325
  def summaryFile = new File(outputDir, "Shiva.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
326
  /** Settings of pipeline for summary */
327
328
329
330
331
  def summarySettings = {
    val roiBedFiles: List[File] = config("regions_of_interest", Nil)
    val ampliconBedFile: Option[File] = config("amplicon_bed")

    Map(
332
      "reference" -> referenceSummary,
333
334
335
336
      "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
      "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
    )
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
337

Peter van 't Hof's avatar
Peter van 't Hof committed
338
  /** Files for the summary */
339
  def summaryFiles = Map("referenceFasta" -> referenceFasta())
Peter van 't Hof's avatar
Peter van 't Hof committed
340
}