ShivaTrait.scala 12 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
19
20
21
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.File

import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.Ln
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
Peter van 't Hof's avatar
Peter van 't Hof committed
27

Peter van 't Hof's avatar
Peter van 't Hof committed
28
29
30
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
 * Created by pjvan_thof on 2/26/15.
 */
35
trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  /** Executed before running the script */
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
42
  /** Method to add jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  def biopetScript(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
44
45
    addSamplesJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
46
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
47
48
  }

49
50
51
52
53
54
55
  override def reportClass = {
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
    shiva.summaryFile = summaryFile
    Some(shiva)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
56
  /** Method to make the variantcalling submodule of shiva */
57
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
59
      override def namePrefix = "multisample"
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
62
63
64
65
66
67
      override def configName = "shivavariantcalling"
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
      override def configName = "shivavariantcalling"
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
68
  /** Method to make a sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
69
  def makeSample(id: String) = new Sample(id)
70

Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
73
74
75
  /** Class that will generate jobs for a sample */
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /** Sample specific files to add to summary */
    def summaryFiles: Map[String, File] = {
      preProcessBam match {
76
77
        case Some(b) => Map("preProcessBam" -> b)
        case _       => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
      }
    }
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
    /** Sample specific stats to add to summary */
82
83
    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
84
    /** Method to make a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
    def makeLibrary(id: String) = new Library(id)

Peter van 't Hof's avatar
Peter van 't Hof committed
87
    /** Class to generate jobs for a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
88
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
91
      /** Library specific files to add to the summary */
      def summaryFiles: Map[String, File] = {
        (bamFile, preProcessBam) match {
92
93
94
          case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb)
          case (Some(b), _)        => Map("bamFile" -> b)
          case _                   => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
95
96
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
97

Peter van 't Hof's avatar
Peter van 't Hof committed
98
      /** Library specific stats to add to summary */
99
100
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
101
      /** Method to execute library preprocess */
Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
      def preProcess(input: File): Option[File] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
104
      /** Method to make the mapping submodule */
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
107
108
109
      def makeMapping = {
        val mapping = new Mapping(qscript)
        mapping.sampleId = Some(sampleId)
        mapping.libId = Some(libId)
        mapping.outputDir = libDir
110
        mapping.outputName = sampleId + "-" + libId
Peter van 't Hof's avatar
Peter van 't Hof committed
111
112
113
114
115
116
117
118
119
        (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
      }

      lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => makeMapping // Default starting from fastq files
          case (false, true) => // Starting from bam file
            config("bam_to_fastq", default = false).asBoolean match {
              case true => makeMapping // bam file will be converted to fastq
120
              case false =>
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
123
124
125
126
                val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
                (None, Some(file), preProcess(file))
            }
          case _ => (None, None, None)
        }

127
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
130
131
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
132
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
135
136
137
138
      def addJobs(): Unit = {
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => mapping.foreach(mapping => {
            mapping.input_R1 = config("R1")
            mapping.input_R2 = config("R2")
          })
139
          case (false, true) => config("bam_to_fastq", default = false).asBoolean match {
140
            case true =>
Peter van 't Hof's avatar
Peter van 't Hof committed
141
142
143
144
145
146
147
148
149
              val samToFastq = SamToFastq(qscript, config("bam"),
                new File(libDir, sampleId + "-" + libId + ".R1.fastq"),
                new File(libDir, sampleId + "-" + libId + ".R2.fastq"))
              samToFastq.isIntermediate = true
              qscript.add(samToFastq)
              mapping.foreach(mapping => {
                mapping.input_R1 = samToFastq.fastqR1
                mapping.input_R2 = Some(samToFastq.fastqR2)
              })
150
            case false =>
Peter van 't Hof's avatar
Peter van 't Hof committed
151
152
153
              val inputSam = SamReaderFactory.makeDefault.open(config("bam"))
              val readGroups = inputSam.getFileHeader.getReadGroups

154
              val readGroupOke = readGroups.forall(readGroup => {
Peter van 't Hof's avatar
Peter van 't Hof committed
155
156
157
158
                if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
                if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
                readGroup.getSample == sampleId && readGroup.getLibrary == libId
              })
159
              inputSam.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

              if (!readGroupOke) {
                if (config("correct_readgroups", default = false).asBoolean) {
                  logger.info("Correcting readgroups, file:" + config("bam"))
                  val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get)
                  aorrg.RGID = sampleId + "-" + libId
                  aorrg.RGLB = libId
                  aorrg.RGSM = sampleId
                  aorrg.isIntermediate = true
                  qscript.add(aorrg)
                } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                  "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
              } else {
                val oldBamFile: File = config("bam")
                val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
                val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai")
                val baiLn = Ln(qscript, oldIndex, newIndex)
                add(baiLn)

                val bamLn = Ln(qscript, oldBamFile, bamFile.get)
bow's avatar
bow committed
180
                bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
181
182
183
184
185
186
187
                add(bamLn)
              }
          }
          case _ => logger.warn("Sample: " + sampleId + "  Library: " + libId + ", no reads found")
        }

        mapping.foreach(mapping => {
188
189
          mapping.init()
          mapping.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
192
193
          addAll(mapping.functions) // Add functions of mapping to curent function pool
          addSummaryQScript(mapping)
        })

Peter van 't Hof's avatar
Peter van 't Hof committed
194
        variantcalling.foreach(vc => {
195
196
          vc.sampleId = Some(libId)
          vc.libId = Some(sampleId)
Peter van 't Hof's avatar
Peter van 't Hof committed
197
198
199
          vc.outputDir = new File(libDir, "variantcalling")
          if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil
          else vc.inputBams = bamFile.get :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
          vc.init()
          vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
202
          addAll(vc.functions)
203
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
204
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
207
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
208
    /** This will add jobs for the double preprocessing */
Peter van 't Hof's avatar
Peter van 't Hof committed
209
    protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
210
211
212
213
214
215
216
217
218
      if (input == Nil) None
      else if (input.tail == Nil) {
        val bamFile = new File(sampleDir, input.head.getName)
        val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
        val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai")
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

        val bamLn = Ln(qscript, input.head, bamFile)
bow's avatar
bow committed
219
        bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
220
221
222
223
224
225
        add(bamLn)
        Some(bamFile)
      } else {
        val md = new MarkDuplicates(qscript)
        md.input = input
        md.output = new File(sampleDir, sampleId + ".dedup.bam")
226
        md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
227
228
        md.isIntermediate = isIntermediate
        add(md)
229
        addSummarizable(md, "mark_duplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
230
231
232
233
        Some(md.output)
      }
    }

234
    lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => {
Peter van 't Hof's avatar
Peter van 't Hof committed
235
236
237
      (lib._2.bamFile, lib._2.preProcessBam) match {
        case (_, Some(file)) => Some(file)
        case (Some(file), _) => Some(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
238
239
        case _               => None
      }
240
    }).toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
241

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
242
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
243
244
245
      Some(makeVariantcalling(multisample = true))
    } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
246
    /** This will add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
247
248
249
    def addJobs(): Unit = {
      addPerLibJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
250
      if (preProcessBam.isDefined) {
251
        val bamMetrics = new BamMetrics(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
252
253
        bamMetrics.sampleId = Some(sampleId)
        bamMetrics.inputBam = preProcessBam.get
254
        bamMetrics.outputDir = new File(sampleDir, "metrics")
255
256
        bamMetrics.init()
        bamMetrics.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
257
258
259
        addAll(bamMetrics.functions)
        addSummaryQScript(bamMetrics)

Peter van 't Hof's avatar
Peter van 't Hof committed
260
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
261
262
263
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
          vc.inputBams = preProcessBam.get :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
264
265
          vc.init()
          vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
          addAll(vc.functions)
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
268
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
269
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
270
271
272
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
273
  lazy val variantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
274
275
276
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
277
278
279
280
281
282
  lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
    val svCalling = new ShivaSvCalling(this)
    samples.foreach(x => x._2.preProcessBam.foreach(bam => svCalling.addBamFile(bam, Some(x._1))))
    Some(svCalling)
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
283
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
284
  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
285
    variantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
286
      vc.outputDir = new File(outputDir, "variantcalling")
287
      vc.inputBams = samples.flatMap(_._2.preProcessBam).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
288
289
      vc.init()
      vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
290
      addAll(vc.functions)
291
      addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
292
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
293
294

    svCalling.foreach(sv => {
Peter van 't Hof's avatar
Peter van 't Hof committed
295
      sv.outputDir = new File(outputDir, "sv_calling")
Peter van 't Hof's avatar
Peter van 't Hof committed
296
297
298
299
300
      sv.init()
      sv.biopetScript()
      addAll(sv.functions)
      addSummaryQScript(sv)
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
301
302
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
303
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
304
305
  def summaryFile = new File(outputDir, "Shiva.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
306
  /** Settings of pipeline for summary */
307
308
309
310
311
  def summarySettings = {
    val roiBedFiles: List[File] = config("regions_of_interest", Nil)
    val ampliconBedFile: Option[File] = config("amplicon_bed")

    Map(
312
      "reference" -> referenceSummary,
313
314
315
316
      "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
      "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
    )
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
317

Peter van 't Hof's avatar
Peter van 't Hof committed
318
  /** Files for the summary */
319
  def summaryFiles = Map("referenceFasta" -> referenceFasta())
Peter van 't Hof's avatar
Peter van 't Hof committed
320
}