ShivaTrait.scala 11.3 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
19
20
21
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.File

import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
22
import nl.lumc.sasc.biopet.core.MultiSampleQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.Ln
24
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, SamToFastq, MarkDuplicates }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
28
29
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
30
31
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
34
35
36
 * Created by pjvan_thof on 2/26/15.
 */
trait ShivaTrait extends MultiSampleQScript with SummaryQScript {
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
37
  /** Executed before running the script */
Peter van 't Hof's avatar
Peter van 't Hof committed
38
39
40
  def init: Unit = {
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
41
  /** Method to add jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
42
43
44
45
46
47
  def biopetScript: Unit = {
    addSamplesJobs()

    addSummaryJobs
  }

48
49
50
51
52
53
54
  override def reportClass = {
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
    shiva.summaryFile = summaryFile
    Some(shiva)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
55
  /** Method to make the variantcalling submodule of shiva */
56
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
Peter van 't Hof's avatar
Peter van 't Hof committed
57
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
      override def namePrefix = "multisample"
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
61
62
63
64
65
66
      override def configName = "shivavariantcalling"
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
      override def configName = "shivavariantcalling"
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
67
  /** Method to make a sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
68
  def makeSample(id: String) = new Sample(id)
69

Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
72
73
74
  /** Class that will generate jobs for a sample */
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /** Sample specific files to add to summary */
    def summaryFiles: Map[String, File] = {
      preProcessBam match {
75
        case Some(preProcessBam) => Map("preProcessBam" -> preProcessBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
78
        case _                   => Map()
      }
    }
79

Peter van 't Hof's avatar
Peter van 't Hof committed
80
    /** Sample specific stats to add to summary */
81
82
    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
83
    /** Method to make a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
    def makeLibrary(id: String) = new Library(id)

Peter van 't Hof's avatar
Peter van 't Hof committed
86
    /** Class to generate jobs for a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
87
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
90
91
92
93
94
95
      /** Library specific files to add to the summary */
      def summaryFiles: Map[String, File] = {
        (bamFile, preProcessBam) match {
          case (Some(bamFile), Some(preProcessBam)) => Map("bamFile" -> bamFile, "preProcessBam" -> preProcessBam)
          case (Some(bamFile), _)                   => Map("bamFile" -> bamFile)
          case _                                    => Map()
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
96

Peter van 't Hof's avatar
Peter van 't Hof committed
97
      /** Library specific stats to add to summary */
98
99
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
100
      /** Method to execute library preprocess */
Peter van 't Hof's avatar
Peter van 't Hof committed
101
102
      def preProcess(input: File): Option[File] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
103
      /** Method to make the mapping submodule */
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106
107
108
      def makeMapping = {
        val mapping = new Mapping(qscript)
        mapping.sampleId = Some(sampleId)
        mapping.libId = Some(libId)
        mapping.outputDir = libDir
109
        mapping.outputName = sampleId + "-" + libId
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
        (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
      }

      lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => makeMapping // Default starting from fastq files
          case (false, true) => // Starting from bam file
            config("bam_to_fastq", default = false).asBoolean match {
              case true => makeMapping // bam file will be converted to fastq
              case false => {
                val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
                (None, Some(file), preProcess(file))
              }
            }
          case _ => (None, None, None)
        }

127
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
130
131
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
132
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
135
136
137
138
      def addJobs(): Unit = {
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => mapping.foreach(mapping => {
            mapping.input_R1 = config("R1")
            mapping.input_R2 = config("R2")
          })
139
          case (false, true) => config("bam_to_fastq", default = false).asBoolean match {
Peter van 't Hof's avatar
Peter van 't Hof committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
            case true => {
              val samToFastq = SamToFastq(qscript, config("bam"),
                new File(libDir, sampleId + "-" + libId + ".R1.fastq"),
                new File(libDir, sampleId + "-" + libId + ".R2.fastq"))
              samToFastq.isIntermediate = true
              qscript.add(samToFastq)
              mapping.foreach(mapping => {
                mapping.input_R1 = samToFastq.fastqR1
                mapping.input_R2 = Some(samToFastq.fastqR2)
              })
            }
            case false => {
              val inputSam = SamReaderFactory.makeDefault.open(config("bam"))
              val readGroups = inputSam.getFileHeader.getReadGroups

155
              val readGroupOke = readGroups.forall(readGroup => {
Peter van 't Hof's avatar
Peter van 't Hof committed
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
                if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
                if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
                readGroup.getSample == sampleId && readGroup.getLibrary == libId
              })
              inputSam.close

              if (!readGroupOke) {
                if (config("correct_readgroups", default = false).asBoolean) {
                  logger.info("Correcting readgroups, file:" + config("bam"))
                  val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get)
                  aorrg.RGID = sampleId + "-" + libId
                  aorrg.RGLB = libId
                  aorrg.RGSM = sampleId
                  aorrg.isIntermediate = true
                  qscript.add(aorrg)
                } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                  "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
              } else {
                val oldBamFile: File = config("bam")
                val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
                val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai")
                val baiLn = Ln(qscript, oldIndex, newIndex)
                add(baiLn)

                val bamLn = Ln(qscript, oldBamFile, bamFile.get)
bow's avatar
bow committed
181
                bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
                add(bamLn)
              }

            }
          }
          case _ => logger.warn("Sample: " + sampleId + "  Library: " + libId + ", no reads found")
        }

        mapping.foreach(mapping => {
          mapping.init
          mapping.biopetScript
          addAll(mapping.functions) // Add functions of mapping to curent function pool
          addSummaryQScript(mapping)
        })

Peter van 't Hof's avatar
Peter van 't Hof committed
197
        variantcalling.foreach(vc => {
198
199
          vc.sampleId = Some(libId)
          vc.libId = Some(sampleId)
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
202
203
204
205
          vc.outputDir = new File(libDir, "variantcalling")
          if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil
          else vc.inputBams = bamFile.get :: Nil
          vc.init
          vc.biopetScript
          addAll(vc.functions)
206
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
207
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
208
209
210
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
211
    /** This will add jobs for the double preprocessing */
Peter van 't Hof's avatar
Peter van 't Hof committed
212
    protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
213
214
215
216
217
218
219
220
221
      if (input == Nil) None
      else if (input.tail == Nil) {
        val bamFile = new File(sampleDir, input.head.getName)
        val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
        val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai")
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

        val bamLn = Ln(qscript, input.head, bamFile)
bow's avatar
bow committed
222
        bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
223
224
225
226
227
228
        add(bamLn)
        Some(bamFile)
      } else {
        val md = new MarkDuplicates(qscript)
        md.input = input
        md.output = new File(sampleDir, sampleId + ".dedup.bam")
229
        md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
230
231
        md.isIntermediate = isIntermediate
        add(md)
232
        addSummarizable(md, "mark_duplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
233
234
235
236
        Some(md.output)
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
237
    lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.map(lib => {
Peter van 't Hof's avatar
Peter van 't Hof committed
238
239
240
      (lib._2.bamFile, lib._2.preProcessBam) match {
        case (_, Some(file)) => Some(file)
        case (Some(file), _) => Some(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
241
242
243
        case _               => None
      }
    }).flatten.toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
244

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
245
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
246
247
248
      Some(makeVariantcalling(multisample = true))
    } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
249
    /** This will add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
250
251
252
    def addJobs(): Unit = {
      addPerLibJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
253
      if (preProcessBam.isDefined) {
254
        val bamMetrics = new BamMetrics(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
255
256
        bamMetrics.sampleId = Some(sampleId)
        bamMetrics.inputBam = preProcessBam.get
257
        bamMetrics.outputDir = sampleDir
Peter van 't Hof's avatar
Peter van 't Hof committed
258
259
260
261
262
        bamMetrics.init
        bamMetrics.biopetScript
        addAll(bamMetrics.functions)
        addSummaryQScript(bamMetrics)

Peter van 't Hof's avatar
Peter van 't Hof committed
263
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
264
265
266
267
268
269
270
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
          vc.inputBams = preProcessBam.get :: Nil
          vc.init
          vc.biopetScript
          addAll(vc.functions)
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
271
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
272
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
273
274
275
    }
  }

276
  lazy val variantcalling = if (config("multisample_sample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
277
278
279
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
280
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
281
  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
282
    variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
283
284
285
286
287
      vc.outputDir = new File(outputDir, "variantcalling")
      vc.inputBams = samples.map(_._2.preProcessBam).flatten.toList
      vc.init
      vc.biopetScript
      addAll(vc.functions)
288
      addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
289
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
290
291
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
292
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
293
294
  def summaryFile = new File(outputDir, "Shiva.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
295
  /** Settings of pipeline for summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
296
297
  def summarySettings = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
298
  /** Files for the summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
299
300
  def summaryFiles = Map()
}