ShivaTrait.scala 12.8 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
19
20
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.File

import htsjdk.samtools.SamReaderFactory
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.extensions.Ln
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
Peter van 't Hof's avatar
Peter van 't Hof committed
27

Peter van 't Hof's avatar
Peter van 't Hof committed
28
29
30
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
 * Created by pjvan_thof on 2/26/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
35
trait ShivaTrait extends MultiSampleQScript with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  /** Executed before running the script */
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
42
  /** Method to add jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  def biopetScript(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
44
45
    addSamplesJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
46
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
47
48
  }

49
50
51
52
53
54
55
  override def reportClass = {
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
    shiva.summaryFile = summaryFile
    Some(shiva)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
56
  /** Method to make the variantcalling submodule of shiva */
57
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
59
      override def namePrefix = "multisample"
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
62
63
64
65
66
67
      override def configName = "shivavariantcalling"
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
      override def configName = "shivavariantcalling"
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
68
  /** Method to make a sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
69
  def makeSample(id: String) = new Sample(id)
70

Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
73
74
75
  /** Class that will generate jobs for a sample */
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /** Sample specific files to add to summary */
    def summaryFiles: Map[String, File] = {
      preProcessBam match {
76
77
        case Some(b) => Map("preProcessBam" -> b)
        case _       => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
      }
    }
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
    /** Sample specific stats to add to summary */
82
83
    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
84
    /** Method to make a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
    def makeLibrary(id: String) = new Library(id)

Peter van 't Hof's avatar
Peter van 't Hof committed
87
    /** Class to generate jobs for a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
88
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
91
      /** Library specific files to add to the summary */
      def summaryFiles: Map[String, File] = {
        (bamFile, preProcessBam) match {
92
93
94
          case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb)
          case (Some(b), _)        => Map("bamFile" -> b)
          case _                   => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
95
96
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
97

Peter van 't Hof's avatar
Peter van 't Hof committed
98
      /** Library specific stats to add to summary */
99
100
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
101
      /** Method to execute library preprocess */
Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
      def preProcess(input: File): Option[File] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
104
      /** Method to make the mapping submodule */
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
107
108
109
      def makeMapping = {
        val mapping = new Mapping(qscript)
        mapping.sampleId = Some(sampleId)
        mapping.libId = Some(libId)
        mapping.outputDir = libDir
110
        mapping.outputName = sampleId + "-" + libId
Peter van 't Hof's avatar
Peter van 't Hof committed
111
112
113
114
115
116
117
118
119
        (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
      }

      lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => makeMapping // Default starting from fastq files
          case (false, true) => // Starting from bam file
            config("bam_to_fastq", default = false).asBoolean match {
              case true => makeMapping // bam file will be converted to fastq
120
              case false =>
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
123
124
125
126
                val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
                (None, Some(file), preProcess(file))
            }
          case _ => (None, None, None)
        }

127
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
130
131
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
132
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
135
136
137
      def addJobs(): Unit = {
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => mapping.foreach(mapping => {
            mapping.input_R1 = config("R1")
            mapping.input_R2 = config("R2")
Peter van 't Hof's avatar
Peter van 't Hof committed
138
139
            inputFiles :+= new InputFile(mapping.input_R1, config("R1_md5"))
            mapping.input_R2.foreach(inputFiles :+= new InputFile(_, config("R2_md5")))
Peter van 't Hof's avatar
Peter van 't Hof committed
140
          })
Peter van 't Hof's avatar
Peter van 't Hof committed
141
          case (false, true) => {
Peter van 't Hof's avatar
Peter van 't Hof committed
142
            inputFiles :+= new InputFile(config("bam"), config("bam_md5"))
Peter van 't Hof's avatar
Peter van 't Hof committed
143
144
145
            config("bam_to_fastq", default = false).asBoolean match {
              case true =>
                val samToFastq = SamToFastq(qscript, config("bam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
146
147
                  new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"),
                  new File(libDir, sampleId + "-" + libId + ".R2.fq.gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
                samToFastq.isIntermediate = true
                qscript.add(samToFastq)
                mapping.foreach(mapping => {
                  mapping.input_R1 = samToFastq.fastqR1
                  mapping.input_R2 = Some(samToFastq.fastqR2)
                })
              case false =>
                val inputSam = SamReaderFactory.makeDefault.open(config("bam"))
                val readGroups = inputSam.getFileHeader.getReadGroups

                val readGroupOke = readGroups.forall(readGroup => {
                  if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
                  if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
                  readGroup.getSample == sampleId && readGroup.getLibrary == libId
                })
                inputSam.close()

                if (!readGroupOke) {
                  if (config("correct_readgroups", default = false).asBoolean) {
                    logger.info("Correcting readgroups, file:" + config("bam"))
                    val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get)
                    aorrg.RGID = sampleId + "-" + libId
                    aorrg.RGLB = libId
                    aorrg.RGSM = sampleId
                    aorrg.isIntermediate = true
                    qscript.add(aorrg)
                  } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                    "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
                } else {
                  val oldBamFile: File = config("bam")
                  val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
                  val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai")
                  val baiLn = Ln(qscript, oldIndex, newIndex)
                  add(baiLn)

                  val bamLn = Ln(qscript, oldBamFile, bamFile.get)
                  bamLn.deps :+= baiLn.output
                  add(bamLn)
                }
            }
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
190
191
192
          }
          case _ => logger.warn("Sample: " + sampleId + "  Library: " + libId + ", no reads found")
        }

        mapping.foreach(mapping => {
193
194
          mapping.init()
          mapping.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
195
196
197
198
          addAll(mapping.functions) // Add functions of mapping to curent function pool
          addSummaryQScript(mapping)
        })

Peter van 't Hof's avatar
Peter van 't Hof committed
199
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
          vc.sampleId = Some(sampleId)
          vc.libId = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
202
203
204
          vc.outputDir = new File(libDir, "variantcalling")
          if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil
          else vc.inputBams = bamFile.get :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
          vc.init()
          vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
207
          addAll(vc.functions)
208
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
209
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
210
211
212
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
213
    /** This will add jobs for the double preprocessing */
Peter van 't Hof's avatar
Peter van 't Hof committed
214
    protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
215
216
217
218
219
220
221
222
223
      if (input == Nil) None
      else if (input.tail == Nil) {
        val bamFile = new File(sampleDir, input.head.getName)
        val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
        val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai")
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

        val bamLn = Ln(qscript, input.head, bamFile)
bow's avatar
bow committed
224
        bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
225
226
227
228
229
230
        add(bamLn)
        Some(bamFile)
      } else {
        val md = new MarkDuplicates(qscript)
        md.input = input
        md.output = new File(sampleDir, sampleId + ".dedup.bam")
231
        md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
232
233
        md.isIntermediate = isIntermediate
        add(md)
234
        addSummarizable(md, "mark_duplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
235
236
237
238
        Some(md.output)
      }
    }

239
    lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => {
Peter van 't Hof's avatar
Peter van 't Hof committed
240
241
242
      (lib._2.bamFile, lib._2.preProcessBam) match {
        case (_, Some(file)) => Some(file)
        case (Some(file), _) => Some(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
243
244
        case _               => None
      }
245
    }).toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
246

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
247
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
248
249
250
      Some(makeVariantcalling(multisample = true))
    } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
251
    /** This will add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
252
253
254
    def addJobs(): Unit = {
      addPerLibJobs()

255
      preProcessBam.foreach { bam =>
256
        val bamMetrics = new BamMetrics(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
257
        bamMetrics.sampleId = Some(sampleId)
258
        bamMetrics.inputBam = bam
259
        bamMetrics.outputDir = new File(sampleDir, "metrics")
260
261
        bamMetrics.init()
        bamMetrics.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
262
263
264
        addAll(bamMetrics.functions)
        addSummaryQScript(bamMetrics)

265
266
267
268
269
        val oldIndex: File = new File(bam.getAbsolutePath.stripSuffix(".bam") + ".bai")
        val newIndex: File = new File(bam + ".bai")
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

Peter van 't Hof's avatar
Peter van 't Hof committed
270
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
271
272
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
273
          vc.inputBams = bam :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
274
275
          vc.init()
          vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
276
277
          addAll(vc.functions)
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
278
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
279
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
280
281
282
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
283
  lazy val variantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
284
285
286
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
287
  lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
288
    Some(new ShivaSvCalling(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
289
290
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
291
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
292
  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
293
    variantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
294
      vc.outputDir = new File(outputDir, "variantcalling")
295
      vc.inputBams = samples.flatMap(_._2.preProcessBam).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
296
297
      vc.init()
      vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
298
      addAll(vc.functions)
299
      addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
300

Peter van 't Hof's avatar
Peter van 't Hof committed
301
      if (config("annotation", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
302
303
304
305
306
307
308
309
        val toucan = new Toucan(this)
        toucan.outputDir = new File(outputDir, "annotation")
        toucan.inputVCF = vc.finalFile
        toucan.init()
        toucan.biopetScript()
        addAll(toucan.functions)
        addSummaryQScript(toucan)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
310
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
311
312

    svCalling.foreach(sv => {
Peter van 't Hof's avatar
Peter van 't Hof committed
313
      sv.outputDir = new File(outputDir, "sv_calling")
Peter van 't Hof's avatar
Peter van 't Hof committed
314
      samples.foreach(x => x._2.preProcessBam.foreach(bam => sv.addBamFile(bam, Some(x._1))))
Peter van 't Hof's avatar
Peter van 't Hof committed
315
316
317
318
319
      sv.init()
      sv.biopetScript()
      addAll(sv.functions)
      addSummaryQScript(sv)
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
320
321
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
322
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
323
324
  def summaryFile = new File(outputDir, "Shiva.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
325
  /** Settings of pipeline for summary */
326
327
328
329
330
  def summarySettings = {
    val roiBedFiles: List[File] = config("regions_of_interest", Nil)
    val ampliconBedFile: Option[File] = config("amplicon_bed")

    Map(
331
      "reference" -> referenceSummary,
332
333
334
335
      "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
      "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
    )
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
336

Peter van 't Hof's avatar
Peter van 't Hof committed
337
  /** Files for the summary */
338
  def summaryFiles = Map("referenceFasta" -> referenceFasta())
Peter van 't Hof's avatar
Peter van 't Hof committed
339
}