ShivaTrait.scala 11.5 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
19
20
21
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.File

import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
22
import nl.lumc.sasc.biopet.core.{ Reference, MultiSampleQScript }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.Ln
24
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, SamToFastq, MarkDuplicates }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
28
29
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
30
31
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
 * Created by pjvan_thof on 2/26/15.
 */
34
trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
37
  /** Executed before running the script */
Peter van 't Hof's avatar
Peter van 't Hof committed
38
39
40
  def init: Unit = {
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
41
  /** Method to add jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
42
43
44
45
46
47
  def biopetScript: Unit = {
    addSamplesJobs()

    addSummaryJobs
  }

48
49
50
51
52
53
54
  override def reportClass = {
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
    shiva.summaryFile = summaryFile
    Some(shiva)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
55
  /** Method to make the variantcalling submodule of shiva */
56
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
Peter van 't Hof's avatar
Peter van 't Hof committed
57
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
      override def namePrefix = "multisample"
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
61
62
63
64
65
66
      override def configName = "shivavariantcalling"
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
      override def configName = "shivavariantcalling"
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
67
  /** Method to make a sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
68
  def makeSample(id: String) = new Sample(id)
69

Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
72
73
74
  /** Class that will generate jobs for a sample */
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /** Sample specific files to add to summary */
    def summaryFiles: Map[String, File] = {
      preProcessBam match {
75
76
        case Some(b) => Map("preProcessBam" -> b)
        case _       => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
      }
    }
79

Peter van 't Hof's avatar
Peter van 't Hof committed
80
    /** Sample specific stats to add to summary */
81
82
    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
83
    /** Method to make a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
    def makeLibrary(id: String) = new Library(id)

Peter van 't Hof's avatar
Peter van 't Hof committed
86
    /** Class to generate jobs for a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
87
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
90
      /** Library specific files to add to the summary */
      def summaryFiles: Map[String, File] = {
        (bamFile, preProcessBam) match {
91
92
93
          case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb)
          case (Some(b), _)        => Map("bamFile" -> b)
          case _                   => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
96

Peter van 't Hof's avatar
Peter van 't Hof committed
97
      /** Library specific stats to add to summary */
98
99
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
100
      /** Method to execute library preprocess */
Peter van 't Hof's avatar
Peter van 't Hof committed
101
102
      def preProcess(input: File): Option[File] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
103
      /** Method to make the mapping submodule */
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106
107
108
      def makeMapping = {
        val mapping = new Mapping(qscript)
        mapping.sampleId = Some(sampleId)
        mapping.libId = Some(libId)
        mapping.outputDir = libDir
109
        mapping.outputName = sampleId + "-" + libId
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
112
113
114
115
116
117
118
        (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
      }

      lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => makeMapping // Default starting from fastq files
          case (false, true) => // Starting from bam file
            config("bam_to_fastq", default = false).asBoolean match {
              case true => makeMapping // bam file will be converted to fastq
119
              case false =>
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
122
123
124
125
                val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
                (None, Some(file), preProcess(file))
            }
          case _ => (None, None, None)
        }

126
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
127
128
129
130
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
131
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
132
133
134
135
136
137
      def addJobs(): Unit = {
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => mapping.foreach(mapping => {
            mapping.input_R1 = config("R1")
            mapping.input_R2 = config("R2")
          })
138
          case (false, true) => config("bam_to_fastq", default = false).asBoolean match {
139
            case true =>
Peter van 't Hof's avatar
Peter van 't Hof committed
140
141
142
143
144
145
146
147
148
              val samToFastq = SamToFastq(qscript, config("bam"),
                new File(libDir, sampleId + "-" + libId + ".R1.fastq"),
                new File(libDir, sampleId + "-" + libId + ".R2.fastq"))
              samToFastq.isIntermediate = true
              qscript.add(samToFastq)
              mapping.foreach(mapping => {
                mapping.input_R1 = samToFastq.fastqR1
                mapping.input_R2 = Some(samToFastq.fastqR2)
              })
149
            case false =>
Peter van 't Hof's avatar
Peter van 't Hof committed
150
151
152
              val inputSam = SamReaderFactory.makeDefault.open(config("bam"))
              val readGroups = inputSam.getFileHeader.getReadGroups

153
              val readGroupOke = readGroups.forall(readGroup => {
Peter van 't Hof's avatar
Peter van 't Hof committed
154
155
156
157
                if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
                if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
                readGroup.getSample == sampleId && readGroup.getLibrary == libId
              })
158
              inputSam.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178

              if (!readGroupOke) {
                if (config("correct_readgroups", default = false).asBoolean) {
                  logger.info("Correcting readgroups, file:" + config("bam"))
                  val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get)
                  aorrg.RGID = sampleId + "-" + libId
                  aorrg.RGLB = libId
                  aorrg.RGSM = sampleId
                  aorrg.isIntermediate = true
                  qscript.add(aorrg)
                } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                  "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
              } else {
                val oldBamFile: File = config("bam")
                val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
                val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai")
                val baiLn = Ln(qscript, oldIndex, newIndex)
                add(baiLn)

                val bamLn = Ln(qscript, oldBamFile, bamFile.get)
bow's avatar
bow committed
179
                bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
180
181
182
183
184
185
186
                add(bamLn)
              }
          }
          case _ => logger.warn("Sample: " + sampleId + "  Library: " + libId + ", no reads found")
        }

        mapping.foreach(mapping => {
187
188
          mapping.init()
          mapping.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
189
190
191
192
          addAll(mapping.functions) // Add functions of mapping to curent function pool
          addSummaryQScript(mapping)
        })

Peter van 't Hof's avatar
Peter van 't Hof committed
193
        variantcalling.foreach(vc => {
194
195
          vc.sampleId = Some(libId)
          vc.libId = Some(sampleId)
Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
198
199
200
201
          vc.outputDir = new File(libDir, "variantcalling")
          if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil
          else vc.inputBams = bamFile.get :: Nil
          vc.init
          vc.biopetScript
          addAll(vc.functions)
202
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
203
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
204
205
206
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
207
    /** This will add jobs for the double preprocessing */
Peter van 't Hof's avatar
Peter van 't Hof committed
208
    protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
209
210
211
212
213
214
215
216
217
      if (input == Nil) None
      else if (input.tail == Nil) {
        val bamFile = new File(sampleDir, input.head.getName)
        val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
        val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai")
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

        val bamLn = Ln(qscript, input.head, bamFile)
bow's avatar
bow committed
218
        bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
219
220
221
222
223
224
        add(bamLn)
        Some(bamFile)
      } else {
        val md = new MarkDuplicates(qscript)
        md.input = input
        md.output = new File(sampleDir, sampleId + ".dedup.bam")
225
        md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
226
227
        md.isIntermediate = isIntermediate
        add(md)
228
        addSummarizable(md, "mark_duplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
229
230
231
232
        Some(md.output)
      }
    }

233
    lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => {
Peter van 't Hof's avatar
Peter van 't Hof committed
234
235
236
      (lib._2.bamFile, lib._2.preProcessBam) match {
        case (_, Some(file)) => Some(file)
        case (Some(file), _) => Some(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
237
238
        case _               => None
      }
239
    }).toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
240

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
241
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
242
243
244
      Some(makeVariantcalling(multisample = true))
    } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
245
    /** This will add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
246
247
248
    def addJobs(): Unit = {
      addPerLibJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
249
      if (preProcessBam.isDefined) {
250
        val bamMetrics = new BamMetrics(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
251
252
        bamMetrics.sampleId = Some(sampleId)
        bamMetrics.inputBam = preProcessBam.get
253
        bamMetrics.outputDir = sampleDir
254
255
        bamMetrics.init()
        bamMetrics.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
256
257
258
        addAll(bamMetrics.functions)
        addSummaryQScript(bamMetrics)

Peter van 't Hof's avatar
Peter van 't Hof committed
259
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
260
261
262
263
264
265
266
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
          vc.inputBams = preProcessBam.get :: Nil
          vc.init
          vc.biopetScript
          addAll(vc.functions)
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
267
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
268
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
269
270
271
    }
  }

272
  lazy val variantcalling = if (config("multisample_sample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
273
274
275
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
276
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
277
  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
278
    variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
279
      vc.outputDir = new File(outputDir, "variantcalling")
280
      vc.inputBams = samples.flatMap(_._2.preProcessBam).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
281
282
283
      vc.init
      vc.biopetScript
      addAll(vc.functions)
284
      addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
285
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
286
287
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
288
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
289
290
  def summaryFile = new File(outputDir, "Shiva.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
291
  /** Settings of pipeline for summary */
292
293
294
295
296
  def summarySettings = {
    val roiBedFiles: List[File] = config("regions_of_interest", Nil)
    val ampliconBedFile: Option[File] = config("amplicon_bed")

    Map(
297
      "reference" -> referenceSummary,
298
299
300
301
      "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
      "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
    )
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
302

Peter van 't Hof's avatar
Peter van 't Hof committed
303
  /** Files for the summary */
304
  def summaryFiles = Map("referenceFasta" -> referenceFasta())
Peter van 't Hof's avatar
Peter van 't Hof committed
305
}