ShivaTrait.scala 10.4 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.File

import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
7
import nl.lumc.sasc.biopet.core.MultiSampleQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import nl.lumc.sasc.biopet.extensions.Ln
9
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, SamToFastq, MarkDuplicates }
Peter van 't Hof's avatar
Peter van 't Hof committed
10
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
11
12
13
14
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
17
18
19
20
21
 * Created by pjvan_thof on 2/26/15.
 */
trait ShivaTrait extends MultiSampleQScript with SummaryQScript {
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
22
  /** Executed before running the script */
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
25
  def init: Unit = {
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
26
  /** Method to add jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
27
28
29
30
31
32
  def biopetScript: Unit = {
    addSamplesJobs()

    addSummaryJobs
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
33
  /** Method to make the variantcalling submodule of shiva */
34
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
Peter van 't Hof's avatar
Peter van 't Hof committed
35
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
36
      override def namePrefix = "multisample"
Peter van 't Hof's avatar
Peter van 't Hof committed
37
38
39
40
41
42
43
44
      override def configName = "shivavariantcalling"
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
      override def configName = "shivavariantcalling"
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  /** Method to make a sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
46
  def makeSample(id: String) = new Sample(id)
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
50
51
52
53
54
55
56
  /** Class that will generate jobs for a sample */
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /** Sample specific files to add to summary */
    def summaryFiles: Map[String, File] = {
      preProcessBam match {
        case Some(preProcessBam) => Map("bamFile" -> preProcessBam)
        case _                   => Map()
      }
    }
57

Peter van 't Hof's avatar
Peter van 't Hof committed
58
    /** Sample specific stats to add to summary */
59
60
    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
61
    /** Method to make a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
62
63
    def makeLibrary(id: String) = new Library(id)

Peter van 't Hof's avatar
Peter van 't Hof committed
64
    /** Class to generate jobs for a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
65
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
68
69
70
71
72
73
      /** Library specific files to add to the summary */
      def summaryFiles: Map[String, File] = {
        (bamFile, preProcessBam) match {
          case (Some(bamFile), Some(preProcessBam)) => Map("bamFile" -> bamFile, "preProcessBam" -> preProcessBam)
          case (Some(bamFile), _)                   => Map("bamFile" -> bamFile)
          case _                                    => Map()
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
74

Peter van 't Hof's avatar
Peter van 't Hof committed
75
      /** Library specific stats to add to summary */
76
77
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
78
      /** Method to execute library preprocess */
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
      def preProcess(input: File): Option[File] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
81
      /** Method to make the mapping submodule */
Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
84
85
86
      def makeMapping = {
        val mapping = new Mapping(qscript)
        mapping.sampleId = Some(sampleId)
        mapping.libId = Some(libId)
        mapping.outputDir = libDir
87
        mapping.outputName = sampleId + "-" + libId
Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
        (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
      }

      lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => makeMapping // Default starting from fastq files
          case (false, true) => // Starting from bam file
            config("bam_to_fastq", default = false).asBoolean match {
              case true => makeMapping // bam file will be converted to fastq
              case false => {
                val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
                (None, Some(file), preProcess(file))
              }
            }
          case _ => (None, None, None)
        }

105
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
106
107
108
109
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
110
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
111
112
113
114
115
116
      def addJobs(): Unit = {
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => mapping.foreach(mapping => {
            mapping.input_R1 = config("R1")
            mapping.input_R2 = config("R2")
          })
117
          case (false, true) => config("bam_to_fastq", default = false).asBoolean match {
Peter van 't Hof's avatar
Peter van 't Hof committed
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
            case true => {
              val samToFastq = SamToFastq(qscript, config("bam"),
                new File(libDir, sampleId + "-" + libId + ".R1.fastq"),
                new File(libDir, sampleId + "-" + libId + ".R2.fastq"))
              samToFastq.isIntermediate = true
              qscript.add(samToFastq)
              mapping.foreach(mapping => {
                mapping.input_R1 = samToFastq.fastqR1
                mapping.input_R2 = Some(samToFastq.fastqR2)
              })
            }
            case false => {
              val inputSam = SamReaderFactory.makeDefault.open(config("bam"))
              val readGroups = inputSam.getFileHeader.getReadGroups

133
              val readGroupOke = readGroups.forall(readGroup => {
Peter van 't Hof's avatar
Peter van 't Hof committed
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
                if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
                if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
                readGroup.getSample == sampleId && readGroup.getLibrary == libId
              })
              inputSam.close

              if (!readGroupOke) {
                if (config("correct_readgroups", default = false).asBoolean) {
                  logger.info("Correcting readgroups, file:" + config("bam"))
                  val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get)
                  aorrg.RGID = sampleId + "-" + libId
                  aorrg.RGLB = libId
                  aorrg.RGSM = sampleId
                  aorrg.isIntermediate = true
                  qscript.add(aorrg)
                } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                  "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
              } else {
                val oldBamFile: File = config("bam")
                val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
                val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai")
                val baiLn = Ln(qscript, oldIndex, newIndex)
                add(baiLn)

                val bamLn = Ln(qscript, oldBamFile, bamFile.get)
                bamLn.deps :+= baiLn.out
                add(bamLn)
              }

            }
          }
          case _ => logger.warn("Sample: " + sampleId + "  Library: " + libId + ", no reads found")
        }

        mapping.foreach(mapping => {
          mapping.init
          mapping.biopetScript
          addAll(mapping.functions) // Add functions of mapping to curent function pool
          addSummaryQScript(mapping)
        })

Peter van 't Hof's avatar
Peter van 't Hof committed
175
        variantcalling.foreach(vc => {
176
177
          vc.sampleId = Some(libId)
          vc.libId = Some(sampleId)
Peter van 't Hof's avatar
Peter van 't Hof committed
178
179
180
181
182
183
          vc.outputDir = new File(libDir, "variantcalling")
          if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil
          else vc.inputBams = bamFile.get :: Nil
          vc.init
          vc.biopetScript
          addAll(vc.functions)
184
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
185
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
186
187
188
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
189
    /** This will add jobs for the double preprocessing */
Peter van 't Hof's avatar
Peter van 't Hof committed
190
    protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
      if (input == Nil) None
      else if (input.tail == Nil) {
        val bamFile = new File(sampleDir, input.head.getName)
        val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
        val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai")
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

        val bamLn = Ln(qscript, input.head, bamFile)
        bamLn.deps :+= baiLn.out
        add(bamLn)
        Some(bamFile)
      } else {
        val md = new MarkDuplicates(qscript)
        md.input = input
        md.output = new File(sampleDir, sampleId + ".dedup.bam")
        md.outputMetrics = new File(sampleDir, sampleId + ".dedup.bam")
        md.isIntermediate = isIntermediate
        add(md)
210
        addSummarizable(md, "mark_duplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
211
212
213
214
        Some(md.output)
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
215
    lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.map(lib => {
Peter van 't Hof's avatar
Peter van 't Hof committed
216
217
218
      (lib._2.bamFile, lib._2.preProcessBam) match {
        case (_, Some(file)) => Some(file)
        case (Some(file), _) => Some(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
219
220
221
        case _               => None
      }
    }).flatten.toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
222

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
223
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
224
225
226
      Some(makeVariantcalling(multisample = true))
    } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
227
    /** This will add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
228
229
230
    def addJobs(): Unit = {
      addPerLibJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
231
      if (preProcessBam.isDefined) {
232
        val bamMetrics = new BamMetrics(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
233
234
        bamMetrics.sampleId = Some(sampleId)
        bamMetrics.inputBam = preProcessBam.get
235
        bamMetrics.outputDir = sampleDir
Peter van 't Hof's avatar
Peter van 't Hof committed
236
237
238
239
240
        bamMetrics.init
        bamMetrics.biopetScript
        addAll(bamMetrics.functions)
        addSummaryQScript(bamMetrics)

Peter van 't Hof's avatar
Peter van 't Hof committed
241
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
242
243
244
245
246
247
248
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
          vc.inputBams = preProcessBam.get :: Nil
          vc.init
          vc.biopetScript
          addAll(vc.functions)
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
249
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
250
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
251
252
253
    }
  }

254
  lazy val variantcalling = if (config("multisample_sample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
255
256
257
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
258
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
259
  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
260
    variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
261
262
263
264
265
      vc.outputDir = new File(outputDir, "variantcalling")
      vc.inputBams = samples.map(_._2.preProcessBam).flatten.toList
      vc.init
      vc.biopetScript
      addAll(vc.functions)
266
      addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
267
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
268
269
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
270
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
271
272
  def summaryFile = new File(outputDir, "Shiva.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
273
  /** Settings of pipeline for summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
274
275
  def summarySettings = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
276
  /** Files for the summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
277
278
  def summaryFiles = Map()
}