ShivaTrait.scala 11.1 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
19
20
21
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.File

import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
22
import nl.lumc.sasc.biopet.core.MultiSampleQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.Ln
24
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, SamToFastq, MarkDuplicates }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
28
29
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
30
31
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
34
35
36
 * Created by pjvan_thof on 2/26/15.
 */
trait ShivaTrait extends MultiSampleQScript with SummaryQScript {
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
37
  /** Executed before running the script */
Peter van 't Hof's avatar
Peter van 't Hof committed
38
39
40
  def init: Unit = {
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
41
  /** Method to add jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
42
43
44
45
46
47
  def biopetScript: Unit = {
    addSamplesJobs()

    addSummaryJobs
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  /** Method to make the variantcalling submodule of shiva */
49
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
Peter van 't Hof's avatar
Peter van 't Hof committed
50
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
51
      override def namePrefix = "multisample"
Peter van 't Hof's avatar
Peter van 't Hof committed
52
53
54
55
56
57
58
59
      override def configName = "shivavariantcalling"
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
      override def configName = "shivavariantcalling"
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
60
  /** Method to make a sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
61
  def makeSample(id: String) = new Sample(id)
62

Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
65
66
67
68
69
70
71
  /** Class that will generate jobs for a sample */
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /** Sample specific files to add to summary */
    def summaryFiles: Map[String, File] = {
      preProcessBam match {
        case Some(preProcessBam) => Map("bamFile" -> preProcessBam)
        case _                   => Map()
      }
    }
72

Peter van 't Hof's avatar
Peter van 't Hof committed
73
    /** Sample specific stats to add to summary */
74
75
    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
76
    /** Method to make a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
    def makeLibrary(id: String) = new Library(id)

Peter van 't Hof's avatar
Peter van 't Hof committed
79
    /** Class to generate jobs for a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
80
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
83
84
85
86
87
88
      /** Library specific files to add to the summary */
      def summaryFiles: Map[String, File] = {
        (bamFile, preProcessBam) match {
          case (Some(bamFile), Some(preProcessBam)) => Map("bamFile" -> bamFile, "preProcessBam" -> preProcessBam)
          case (Some(bamFile), _)                   => Map("bamFile" -> bamFile)
          case _                                    => Map()
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
89

Peter van 't Hof's avatar
Peter van 't Hof committed
90
      /** Library specific stats to add to summary */
91
92
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
93
      /** Method to execute library preprocess */
Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
      def preProcess(input: File): Option[File] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
96
      /** Method to make the mapping submodule */
Peter van 't Hof's avatar
Peter van 't Hof committed
97
98
99
100
101
      def makeMapping = {
        val mapping = new Mapping(qscript)
        mapping.sampleId = Some(sampleId)
        mapping.libId = Some(libId)
        mapping.outputDir = libDir
102
        mapping.outputName = sampleId + "-" + libId
Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
        (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
      }

      lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => makeMapping // Default starting from fastq files
          case (false, true) => // Starting from bam file
            config("bam_to_fastq", default = false).asBoolean match {
              case true => makeMapping // bam file will be converted to fastq
              case false => {
                val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
                (None, Some(file), preProcess(file))
              }
            }
          case _ => (None, None, None)
        }

120
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
123
124
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
125
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
126
127
128
129
130
131
      def addJobs(): Unit = {
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => mapping.foreach(mapping => {
            mapping.input_R1 = config("R1")
            mapping.input_R2 = config("R2")
          })
132
          case (false, true) => config("bam_to_fastq", default = false).asBoolean match {
Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
            case true => {
              val samToFastq = SamToFastq(qscript, config("bam"),
                new File(libDir, sampleId + "-" + libId + ".R1.fastq"),
                new File(libDir, sampleId + "-" + libId + ".R2.fastq"))
              samToFastq.isIntermediate = true
              qscript.add(samToFastq)
              mapping.foreach(mapping => {
                mapping.input_R1 = samToFastq.fastqR1
                mapping.input_R2 = Some(samToFastq.fastqR2)
              })
            }
            case false => {
              val inputSam = SamReaderFactory.makeDefault.open(config("bam"))
              val readGroups = inputSam.getFileHeader.getReadGroups

148
              val readGroupOke = readGroups.forall(readGroup => {
Peter van 't Hof's avatar
Peter van 't Hof committed
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
                if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
                if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
                readGroup.getSample == sampleId && readGroup.getLibrary == libId
              })
              inputSam.close

              if (!readGroupOke) {
                if (config("correct_readgroups", default = false).asBoolean) {
                  logger.info("Correcting readgroups, file:" + config("bam"))
                  val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get)
                  aorrg.RGID = sampleId + "-" + libId
                  aorrg.RGLB = libId
                  aorrg.RGSM = sampleId
                  aorrg.isIntermediate = true
                  qscript.add(aorrg)
                } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                  "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
              } else {
                val oldBamFile: File = config("bam")
                val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
                val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai")
                val baiLn = Ln(qscript, oldIndex, newIndex)
                add(baiLn)

                val bamLn = Ln(qscript, oldBamFile, bamFile.get)
bow's avatar
bow committed
174
                bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
                add(bamLn)
              }

            }
          }
          case _ => logger.warn("Sample: " + sampleId + "  Library: " + libId + ", no reads found")
        }

        mapping.foreach(mapping => {
          mapping.init
          mapping.biopetScript
          addAll(mapping.functions) // Add functions of mapping to curent function pool
          addSummaryQScript(mapping)
        })

Peter van 't Hof's avatar
Peter van 't Hof committed
190
        variantcalling.foreach(vc => {
191
192
          vc.sampleId = Some(libId)
          vc.libId = Some(sampleId)
Peter van 't Hof's avatar
Peter van 't Hof committed
193
194
195
196
197
198
          vc.outputDir = new File(libDir, "variantcalling")
          if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil
          else vc.inputBams = bamFile.get :: Nil
          vc.init
          vc.biopetScript
          addAll(vc.functions)
199
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
200
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
203
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
204
    /** This will add jobs for the double preprocessing */
Peter van 't Hof's avatar
Peter van 't Hof committed
205
    protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
206
207
208
209
210
211
212
213
214
      if (input == Nil) None
      else if (input.tail == Nil) {
        val bamFile = new File(sampleDir, input.head.getName)
        val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
        val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai")
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

        val bamLn = Ln(qscript, input.head, bamFile)
bow's avatar
bow committed
215
        bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
216
217
218
219
220
221
222
223
224
        add(bamLn)
        Some(bamFile)
      } else {
        val md = new MarkDuplicates(qscript)
        md.input = input
        md.output = new File(sampleDir, sampleId + ".dedup.bam")
        md.outputMetrics = new File(sampleDir, sampleId + ".dedup.bam")
        md.isIntermediate = isIntermediate
        add(md)
225
        addSummarizable(md, "mark_duplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
226
227
228
229
        Some(md.output)
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
230
    lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.map(lib => {
Peter van 't Hof's avatar
Peter van 't Hof committed
231
232
233
      (lib._2.bamFile, lib._2.preProcessBam) match {
        case (_, Some(file)) => Some(file)
        case (Some(file), _) => Some(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
234
235
236
        case _               => None
      }
    }).flatten.toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
237

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
238
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
239
240
241
      Some(makeVariantcalling(multisample = true))
    } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
242
    /** This will add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
243
244
245
    def addJobs(): Unit = {
      addPerLibJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
246
      if (preProcessBam.isDefined) {
247
        val bamMetrics = new BamMetrics(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
248
249
        bamMetrics.sampleId = Some(sampleId)
        bamMetrics.inputBam = preProcessBam.get
250
        bamMetrics.outputDir = sampleDir
Peter van 't Hof's avatar
Peter van 't Hof committed
251
252
253
254
255
        bamMetrics.init
        bamMetrics.biopetScript
        addAll(bamMetrics.functions)
        addSummaryQScript(bamMetrics)

Peter van 't Hof's avatar
Peter van 't Hof committed
256
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
257
258
259
260
261
262
263
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
          vc.inputBams = preProcessBam.get :: Nil
          vc.init
          vc.biopetScript
          addAll(vc.functions)
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
264
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
265
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
268
    }
  }

269
  lazy val variantcalling = if (config("multisample_sample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
270
271
272
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
273
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
274
  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
275
    variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
276
277
278
279
280
      vc.outputDir = new File(outputDir, "variantcalling")
      vc.inputBams = samples.map(_._2.preProcessBam).flatten.toList
      vc.init
      vc.biopetScript
      addAll(vc.functions)
281
      addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
282
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
283
284
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
285
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
286
287
  def summaryFile = new File(outputDir, "Shiva.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
288
  /** Settings of pipeline for summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
289
290
  def summarySettings = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
291
  /** Files for the summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
292
293
  def summaryFiles = Map()
}