ShivaTrait.scala 11.6 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
19
20
21
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.File

import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.Ln
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
Peter van 't Hof's avatar
Peter van 't Hof committed
27

Peter van 't Hof's avatar
Peter van 't Hof committed
28
29
30
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
 * Created by pjvan_thof on 2/26/15.
 */
35
trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  /** Executed before running the script */
Peter van 't Hof's avatar
Peter van 't Hof committed
39
40
41
  def init: Unit = {
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
42
  /** Method to add jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
45
46
47
48
  def biopetScript: Unit = {
    addSamplesJobs()

    addSummaryJobs
  }

49
50
51
52
53
54
55
  override def reportClass = {
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
    shiva.summaryFile = summaryFile
    Some(shiva)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
56
  /** Method to make the variantcalling submodule of shiva */
57
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
59
      override def namePrefix = "multisample"
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
62
63
64
65
66
67
      override def configName = "shivavariantcalling"
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
      override def configName = "shivavariantcalling"
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
68
  /** Method to make a sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
69
  def makeSample(id: String) = new Sample(id)
70

Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
73
74
75
  /** Class that will generate jobs for a sample */
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /** Sample specific files to add to summary */
    def summaryFiles: Map[String, File] = {
      preProcessBam match {
76
77
        case Some(b) => Map("preProcessBam" -> b)
        case _       => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
      }
    }
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
    /** Sample specific stats to add to summary */
82
83
    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
84
    /** Method to make a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
    def makeLibrary(id: String) = new Library(id)

Peter van 't Hof's avatar
Peter van 't Hof committed
87
    /** Class to generate jobs for a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
88
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
91
      /** Library specific files to add to the summary */
      def summaryFiles: Map[String, File] = {
        (bamFile, preProcessBam) match {
92
93
94
          case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb)
          case (Some(b), _)        => Map("bamFile" -> b)
          case _                   => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
95
96
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
97

Peter van 't Hof's avatar
Peter van 't Hof committed
98
      /** Library specific stats to add to summary */
99
100
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
101
      /** Method to execute library preprocess */
Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
      def preProcess(input: File): Option[File] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
104
      /** Method to make the mapping submodule */
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
107
108
109
      def makeMapping = {
        val mapping = new Mapping(qscript)
        mapping.sampleId = Some(sampleId)
        mapping.libId = Some(libId)
        mapping.outputDir = libDir
110
        mapping.outputName = sampleId + "-" + libId
Peter van 't Hof's avatar
Peter van 't Hof committed
111
112
113
114
115
116
117
118
119
        (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
      }

      lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => makeMapping // Default starting from fastq files
          case (false, true) => // Starting from bam file
            config("bam_to_fastq", default = false).asBoolean match {
              case true => makeMapping // bam file will be converted to fastq
120
              case false =>
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
123
124
125
126
                val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
                (None, Some(file), preProcess(file))
            }
          case _ => (None, None, None)
        }

127
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
130
131
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
132
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
135
136
137
138
      def addJobs(): Unit = {
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => mapping.foreach(mapping => {
            mapping.input_R1 = config("R1")
            mapping.input_R2 = config("R2")
          })
139
          case (false, true) => config("bam_to_fastq", default = false).asBoolean match {
140
            case true =>
Peter van 't Hof's avatar
Peter van 't Hof committed
141
142
143
144
145
146
147
148
149
              val samToFastq = SamToFastq(qscript, config("bam"),
                new File(libDir, sampleId + "-" + libId + ".R1.fastq"),
                new File(libDir, sampleId + "-" + libId + ".R2.fastq"))
              samToFastq.isIntermediate = true
              qscript.add(samToFastq)
              mapping.foreach(mapping => {
                mapping.input_R1 = samToFastq.fastqR1
                mapping.input_R2 = Some(samToFastq.fastqR2)
              })
150
            case false =>
Peter van 't Hof's avatar
Peter van 't Hof committed
151
152
153
              val inputSam = SamReaderFactory.makeDefault.open(config("bam"))
              val readGroups = inputSam.getFileHeader.getReadGroups

154
              val readGroupOke = readGroups.forall(readGroup => {
Peter van 't Hof's avatar
Peter van 't Hof committed
155
156
157
158
                if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
                if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
                readGroup.getSample == sampleId && readGroup.getLibrary == libId
              })
159
              inputSam.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

              if (!readGroupOke) {
                if (config("correct_readgroups", default = false).asBoolean) {
                  logger.info("Correcting readgroups, file:" + config("bam"))
                  val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get)
                  aorrg.RGID = sampleId + "-" + libId
                  aorrg.RGLB = libId
                  aorrg.RGSM = sampleId
                  aorrg.isIntermediate = true
                  qscript.add(aorrg)
                } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                  "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
              } else {
                val oldBamFile: File = config("bam")
                val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
                val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai")
                val baiLn = Ln(qscript, oldIndex, newIndex)
                add(baiLn)

                val bamLn = Ln(qscript, oldBamFile, bamFile.get)
bow's avatar
bow committed
180
                bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
181
182
183
184
185
186
187
                add(bamLn)
              }
          }
          case _ => logger.warn("Sample: " + sampleId + "  Library: " + libId + ", no reads found")
        }

        mapping.foreach(mapping => {
188
189
          mapping.init()
          mapping.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
192
193
          addAll(mapping.functions) // Add functions of mapping to curent function pool
          addSummaryQScript(mapping)
        })

Peter van 't Hof's avatar
Peter van 't Hof committed
194
        variantcalling.foreach(vc => {
195
196
          vc.sampleId = Some(libId)
          vc.libId = Some(sampleId)
Peter van 't Hof's avatar
Peter van 't Hof committed
197
198
199
200
201
202
          vc.outputDir = new File(libDir, "variantcalling")
          if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil
          else vc.inputBams = bamFile.get :: Nil
          vc.init
          vc.biopetScript
          addAll(vc.functions)
203
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
204
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
207
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
208
    /** This will add jobs for the double preprocessing */
Peter van 't Hof's avatar
Peter van 't Hof committed
209
    protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
210
211
212
213
214
215
216
217
218
      if (input == Nil) None
      else if (input.tail == Nil) {
        val bamFile = new File(sampleDir, input.head.getName)
        val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
        val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai")
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

        val bamLn = Ln(qscript, input.head, bamFile)
bow's avatar
bow committed
219
        bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
220
221
222
223
224
225
        add(bamLn)
        Some(bamFile)
      } else {
        val md = new MarkDuplicates(qscript)
        md.input = input
        md.output = new File(sampleDir, sampleId + ".dedup.bam")
226
        md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
227
228
        md.isIntermediate = isIntermediate
        add(md)
229
        addSummarizable(md, "mark_duplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
230
231
232
233
        Some(md.output)
      }
    }

234
    lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => {
Peter van 't Hof's avatar
Peter van 't Hof committed
235
236
237
      (lib._2.bamFile, lib._2.preProcessBam) match {
        case (_, Some(file)) => Some(file)
        case (Some(file), _) => Some(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
238
239
        case _               => None
      }
240
    }).toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
241

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
242
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
243
244
245
      Some(makeVariantcalling(multisample = true))
    } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
246
    /** This will add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
247
248
249
    def addJobs(): Unit = {
      addPerLibJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
250
      if (preProcessBam.isDefined) {
251
        val bamMetrics = new BamMetrics(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
252
253
        bamMetrics.sampleId = Some(sampleId)
        bamMetrics.inputBam = preProcessBam.get
254
        bamMetrics.outputDir = new File(sampleDir, "metrics")
255
256
        bamMetrics.init()
        bamMetrics.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
257
258
259
        addAll(bamMetrics.functions)
        addSummaryQScript(bamMetrics)

Peter van 't Hof's avatar
Peter van 't Hof committed
260
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
261
262
263
264
265
266
267
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
          vc.inputBams = preProcessBam.get :: Nil
          vc.init
          vc.biopetScript
          addAll(vc.functions)
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
268
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
269
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
270
271
272
    }
  }

Sander Bollen's avatar
Sander Bollen committed
273
  lazy val variantcalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
274
275
276
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
277
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
278
  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
279
    variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
280
      vc.outputDir = new File(outputDir, "variantcalling")
281
      vc.inputBams = samples.flatMap(_._2.preProcessBam).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
282
283
284
      vc.init
      vc.biopetScript
      addAll(vc.functions)
285
      addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
286
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
287
288
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
289
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
290
291
  def summaryFile = new File(outputDir, "Shiva.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
292
  /** Settings of pipeline for summary */
293
294
295
296
297
  def summarySettings = {
    val roiBedFiles: List[File] = config("regions_of_interest", Nil)
    val ampliconBedFile: Option[File] = config("amplicon_bed")

    Map(
298
      "reference" -> referenceSummary,
299
300
301
302
      "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
      "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
    )
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
303

Peter van 't Hof's avatar
Peter van 't Hof committed
304
  /** Files for the summary */
305
  def summaryFiles = Map("referenceFasta" -> referenceFasta())
Peter van 't Hof's avatar
Peter van 't Hof committed
306
}