Mapping.scala 18.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
package nl.lumc.sasc.biopet.pipelines.mapping

18
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import java.util.Date
20

21
import nl.lumc.sasc.biopet.core._
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.config.Configurable
23
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse }
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam }
import nl.lumc.sasc.biopet.extensions.{ Gsnap, Tophat, _ }
27
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition
31
import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter
32
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
35
import org.broadinstitute.gatk.queue.QScript

import scala.math._
36

37
// TODO: documentation
38
class Mapping(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  def this() = this(null)
bow's avatar
bow committed
41
42

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var input_R1: File = _
bow's avatar
bow committed
44
45

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
46
  var input_R2: Option[File] = None
bow's avatar
bow committed
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  /** Output name */
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  var outputName: String = _
bow's avatar
bow committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
52
  /** Skip flexiprep */
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
bow's avatar
bow committed
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
  /** Skip mark duplicates */
  protected var skipMarkduplicates: Boolean = config("skip_markduplicates", default = false)
bow's avatar
bow committed
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
  /** Skip metrics */
  protected var skipMetrics: Boolean = config("skip_metrics", default = false)
59

Peter van 't Hof's avatar
Peter van 't Hof committed
60
  /** Aligner */
Peter van 't Hof's avatar
Peter van 't Hof committed
61
  protected var aligner: String = config("aligner", default = "bwa-mem")
bow's avatar
bow committed
62

Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
  /** Number of chunks, when not defined pipeline will automatic calculate number of chunks */
  protected var numberChunks: Option[Int] = config("number_chunks")
65

Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
  /** Enable chunking */
  protected var chunking: Boolean = config("chunking", numberChunks.getOrElse(1) > 1)
bow's avatar
bow committed
68

69
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
  /** Readgroup ID */
  protected var readgroupId: String = _
bow's avatar
bow committed
72

73
74
  // TODO: hide sampleId and libId from the command line so they do not interfere with our config values

Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
  /** Readgroup Platform */
  protected var platform: String = config("platform", default = "illumina")
bow's avatar
bow committed
77

Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
  /** Readgroup platform unit */
  protected var platformUnit: String = config("platform_unit", default = "na")
bow's avatar
bow committed
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
  /** Readgroup sequencing center */
  protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
bow's avatar
bow committed
83

Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
  /** Readgroup description */
  protected var readgroupDescription: Option[String] = config("readgroup_description")
bow's avatar
bow committed
86

Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
  /** Readgroup sequencing date */
  protected var readgroupDate: Date = _
bow's avatar
bow committed
89

Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
  /** Readgroup predicted insert size */
  protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
bow's avatar
bow committed
92

Peter van 't Hof's avatar
Peter van 't Hof committed
93
  protected var paired: Boolean = false
94
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
95
  def finalBamFile: File = new File(outputDir, outputName + ".final.bam")
96

Peter van 't Hof's avatar
Peter van 't Hof committed
97
  /** location of summary file */
98
99
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")

100
101
102
103
104
  override def defaults = Map("gsnap" -> Map("batch" -> 4))

  override def fixedValues = Map(
    "gsnap" -> Map("format" -> "sam"),
    "bowtie" -> Map("sam" -> true)
Peter van 't Hof's avatar
Peter van 't Hof committed
105
  )
106

Peter van 't Hof's avatar
Peter van 't Hof committed
107
  /** File to add to the summary */
108
109
  def summaryFiles: Map[String, File] = Map("output_bamfile" -> finalBamFile, "input_R1" -> input_R1,
    "reference" -> referenceFasta()) ++
110
    (if (input_R2.isDefined) Map("input_R2" -> input_R2.get) else Map())
111

Peter van 't Hof's avatar
Peter van 't Hof committed
112
  /** Settings to add to summary */
113
114
115
116
117
118
119
  def summarySettings = Map(
    "skip_metrics" -> skipMetrics,
    "skip_flexiprep" -> skipFlexiprep,
    "skip_markduplicates" -> skipMarkduplicates,
    "aligner" -> aligner,
    "chunking" -> chunking,
    "numberChunks" -> numberChunks.getOrElse(1)
120
  ) ++ (if (root == null) Map("reference" -> referenceSummary) else Map())
121

122
123
124
125
126
127
128
129
130
131
  override def reportClass = {
    val mappingReport = new MappingReport(this)
    mappingReport.outputDir = new File(outputDir, "report")
    mappingReport.summaryFile = summaryFile
    mappingReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(mappingReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
132
  /** Will be executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
133
  def init() {
134
135
    require(outputDir != null, "Missing output directory on mapping module")
    require(input_R1 != null, "Missing output directory on mapping module")
136
137
    require(sampleId.isDefined, "Missing sample ID on mapping module")
    require(libId.isDefined, "Missing library ID on mapping module")
138

Peter van 't Hof's avatar
Peter van 't Hof committed
139
140
    inputFiles :+= new InputFile(input_R1)
    input_R2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
141

Peter van 't Hof's avatar
Peter van 't Hof committed
142
    paired = input_R2.isDefined
143

144
    if (readgroupId == null) readgroupId = sampleId.get + "-" + libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
145
    else if (readgroupId == null) readgroupId = config("readgroup_id")
bow's avatar
bow committed
146

Peter van 't Hof's avatar
Peter van 't Hof committed
147
    if (outputName == null) outputName = readgroupId
bow's avatar
bow committed
148

Peter van 't Hof's avatar
Peter van 't Hof committed
149
    if (chunking) {
150
      if (numberChunks.isEmpty) {
151
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
152
        else {
153
          val chunkSize: Int = config("chunksize", 1 << 30)
154
          val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
155
          else input_R1.length
156
157
158
159
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
160
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
161
  }
bow's avatar
bow committed
162

Peter van 't Hof's avatar
Peter van 't Hof committed
163
  /** Adds all jobs of the pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
164
  def biopetScript() {
165
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
166
      flexiprep.outputDir = new File(outputDir, "flexiprep")
Peter van 't Hof's avatar
Peter van 't Hof committed
167
      flexiprep.input_R1 = input_R1
Peter van 't Hof's avatar
Peter van 't Hof committed
168
      flexiprep.input_R2 = input_R2
Peter van 't Hof's avatar
Peter van 't Hof committed
169
      flexiprep.sampleId = this.sampleId
170
      flexiprep.libId = this.libId
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
      flexiprep.init()
      flexiprep.runInitialJobs()
173
    }
bow's avatar
bow committed
174
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
175
176
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
177

Peter van 't Hof's avatar
Peter van 't Hof committed
178
    val chunks: Map[File, (File, Option[File])] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
181
182
183
184
      if (chunking) (for (t <- 1 to numberChunks.getOrElse(1)) yield {
        val chunkDir = new File(outputDir, "chunks" + File.separator + t)
        chunkDir -> (new File(chunkDir, input_R1.getName),
          if (paired) Some(new File(chunkDir, input_R2.get.getName)) else None)
      }).toMap
      else if (skipFlexiprep) Map(outputDir -> (input_R1, if (paired) input_R2 else None))
Peter van 't Hof's avatar
Peter van 't Hof committed
185
      else Map(outputDir -> (flexiprep.input_R1, flexiprep.input_R2))
186
    }
bow's avatar
bow committed
187

Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
190
      fastSplitter_R1.input = input_R1
Peter van 't Hof's avatar
Peter van 't Hof committed
191
      for ((chunkDir, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
192
193
      fastSplitter_R1.isIntermediate = true
      add(fastSplitter_R1)
bow's avatar
bow committed
194

Peter van 't Hof's avatar
Peter van 't Hof committed
195
196
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
197
        fastSplitter_R2.input = input_R2.get
Peter van 't Hof's avatar
Peter van 't Hof committed
198
        for ((chunkDir, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2.get
199
200
        fastSplitter_R2.isIntermediate = true
        add(fastSplitter_R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
      }
    }
bow's avatar
bow committed
203

Peter van 't Hof's avatar
Peter van 't Hof committed
204
    for ((chunkDir, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
207
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
208
        val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
209
        logger.debug(chunkDir + " - " + flexiout)
Peter van 't Hof's avatar
Peter van 't Hof committed
210
211
212
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
        fastq_R1_output :+= R1
Peter van 't Hof's avatar
Peter van 't Hof committed
213
        R2.foreach(R2 => fastq_R2_output :+= R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
214
      }
215

Peter van 't Hof's avatar
Peter van 't Hof committed
216
      val outputBam = new File(chunkDir, outputName + ".bam")
217
218
      bamFiles :+= outputBam
      aligner match {
Peter van 't Hof's avatar
Peter van 't Hof committed
219
220
221
222
        case "bwa-mem"    => addBwaMem(R1, R2, outputBam)
        case "bwa-aln"    => addBwaAln(R1, R2, outputBam)
        case "bowtie"     => addBowtie(R1, R2, outputBam)
        case "gsnap"      => addGsnap(R1, R2, outputBam)
bow's avatar
bow committed
223
        // TODO: make TopHat here accept multiple input files
Peter van 't Hof's avatar
Peter van 't Hof committed
224
225
226
227
        case "tophat"     => addTophat(R1, R2, outputBam)
        case "stampy"     => addStampy(R1, R2, outputBam)
        case "star"       => addStar(R1, R2, outputBam)
        case "star-2pass" => addStar2pass(R1, R2, outputBam)
228
        case _            => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
229
      }
230
      if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
231
        addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics"), sampleId, libId).functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
232
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
233
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
234
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
235
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
236
      addSummaryQScript(flexiprep)
Peter van 't Hof's avatar
Peter van 't Hof committed
237
    }
bow's avatar
bow committed
238

239
240
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
241
      bamFile = new File(outputDir, outputName + ".dedup.bam")
242
243
244
      val md = MarkDuplicates(this, bamFiles, bamFile)
      add(md)
      addSummarizable(md, "mark_duplicates")
245
246
247
248
249
    } else if (skipMarkduplicates && chunking) {
      val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir)
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
250

Peter van 't Hof's avatar
Peter van 't Hof committed
251
    if (!skipMetrics) {
Peter van 't Hof's avatar
Peter van 't Hof committed
252
      val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"), sampleId, libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
253
254
255
      addAll(bamMetrics.functions)
      addSummaryQScript(bamMetrics)
    }
256

Peter van 't Hof's avatar
Peter van 't Hof committed
257
258
    add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai")))
    add(Ln(this, bamFile, finalBamFile))
259
    outputFiles += ("finalBamFile" -> finalBamFile.getAbsoluteFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
260
261
262

    if (config("generate_wig", default = false).asBoolean)
      addAll(Bam2Wig(this, finalBamFile).functions)
263

Peter van 't Hof's avatar
Peter van 't Hof committed
264
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
265
  }
bow's avatar
bow committed
266

Peter van 't Hof's avatar
Peter van 't Hof committed
267
  /** Add bwa aln jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
268
  def addBwaAln(R1: File, R2: Option[File], output: File): File = {
269
270
271
    val bwaAlnR1 = new BwaAln(this)
    bwaAlnR1.fastq = R1
    bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
272
    bwaAlnR1.isIntermediate = true
273
274
275
    add(bwaAlnR1)

    val samFile: File = if (paired) {
276
      val bwaAlnR2 = new BwaAln(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
277
      bwaAlnR2.fastq = R2.get
278
      bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
279
      bwaAlnR2.isIntermediate = true
280
281
282
283
      add(bwaAlnR2)

      val bwaSampe = new BwaSampe(this)
      bwaSampe.fastqR1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
284
      bwaSampe.fastqR2 = R2.get
285
286
      bwaSampe.saiR1 = bwaAlnR1.output
      bwaSampe.saiR2 = bwaAlnR2.output
287
      bwaSampe.r = getReadGroupBwa
288
      bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
289
      bwaSampe.isIntermediate = true
290
291
292
293
294
295
296
      add(bwaSampe)

      bwaSampe.output
    } else {
      val bwaSamse = new BwaSamse(this)
      bwaSamse.fastq = R1
      bwaSamse.sai = bwaAlnR1.output
297
      bwaSamse.r = getReadGroupBwa
298
      bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
299
      bwaSamse.isIntermediate = true
300
301
302
303
304
305
306
307
      add(bwaSamse)

      bwaSamse.output
    }

    val sortSam = SortSam(this, samFile, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
Peter van 't Hof's avatar
Peter van 't Hof committed
308
    sortSam.output
309
310
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
311
  /** Adds bwa mem jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
312
  def addBwaMem(R1: File, R2: Option[File], output: File): File = {
313
    val bwaCommand = new BwaMem(this)
314
    bwaCommand.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
315
    if (paired) bwaCommand.R2 = R2.get
316
    bwaCommand.R = Some(getReadGroupBwa)
317
318
    val sortSam = new SortSam(this)
    sortSam.output = output
Peter van 't Hof's avatar
Peter van 't Hof committed
319
    add(bwaCommand | sortSam, chunking || !skipMarkduplicates)
Peter van 't Hof's avatar
Peter van 't Hof committed
320
    output
321
  }
322

Peter van 't Hof's avatar
Peter van 't Hof committed
323
  def addGsnap(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
324
    val gsnapCommand = new Gsnap(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
325
    gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
bow's avatar
bow committed
326
327
328
329
    gsnapCommand.output = swapExt(output.getParent, output, ".bam", ".sam")
    gsnapCommand.isIntermediate = true
    add(gsnapCommand)

330
    val reorderSam = new ReorderSam(this)
331
    reorderSam.input = gsnapCommand.output
332
333
334
335
    reorderSam.output = swapExt(output.getParent, output, ".sorted.bam", ".reordered.bam")
    add(reorderSam)

    addAddOrReplaceReadGroups(reorderSam.output, output)
bow's avatar
bow committed
336
337
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
338
  def addTophat(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
339
340
341
    // TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
    val tophat = new Tophat(this)
    tophat.R1 = tophat.R1 :+ R1
Peter van 't Hof's avatar
Peter van 't Hof committed
342
    if (paired) tophat.R2 = tophat.R2 :+ R2.get
bow's avatar
bow committed
343
344
345
    tophat.output_dir = new File(outputDir, "tophat_out")
    // always output BAM
    tophat.no_convert_bam = false
346
347
    // and always keep input ordering
    tophat.keep_fasta_order = true
bow's avatar
bow committed
348
349
    add(tophat)

350
351
352
353
    // fix unmapped file coordinates
    val fixedUnmapped = new File(tophat.output_dir, "unmapped_fixup.sam")
    val fixer = new TophatRecondition(this)
    fixer.inputBam = tophat.outputAcceptedHits
bow's avatar
bow committed
354
    fixer.outputSam = fixedUnmapped.getAbsoluteFile
355
356
357
358
    fixer.isIntermediate = true
    add(fixer)

    // sort fixed SAM file
bow's avatar
bow committed
359
    val sorter = SortSam(this, fixer.outputSam, new File(tophat.output_dir, "unmapped_fixup.sorted.bam"))
360
361
362
    sorter.sortOrder = "coordinate"
    sorter.isIntermediate = true
    add(sorter)
bow's avatar
bow committed
363

364
365
366
367
368
369
370
371
372
373
374
375
376
377
    // merge with mapped file
    val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output),
      tophat.output_dir, "coordinate")
    mergeSamFile.createIndex = true
    mergeSamFile.isIntermediate = true
    add(mergeSamFile)

    // make sure header coordinates are correct
    val reorderSam = new ReorderSam(this)
    reorderSam.input = mergeSamFile.output
    reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
    add(reorderSam)

    addAddOrReplaceReadGroups(reorderSam.output, output)
bow's avatar
bow committed
378
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
379
  /** Adds stampy jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
380
  def addStampy(R1: File, R2: Option[File], output: File): File = {
381

Peter van 't Hof's avatar
Peter van 't Hof committed
382
    var RG: String = "ID:" + readgroupId + ","
383
384
    RG += "SM:" + sampleId.get + ","
    RG += "LB:" + libId.get + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
385
386
387
388
389
390
    if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
    RG += "PU:" + platformUnit + ","
    if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
    if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
    RG += "PL:" + platform
391
392
393

    val stampyCmd = new Stampy(this)
    stampyCmd.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
394
    if (paired) stampyCmd.R2 = R2.get
395
396
397
    stampyCmd.readgroup = RG
    stampyCmd.sanger = true
    stampyCmd.output = this.swapExt(output.getParent, output, ".bam", ".sam")
398
399
    stampyCmd.isIntermediate = true
    add(stampyCmd)
400
401
402
    val sortSam = SortSam(this, stampyCmd.output, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
403
    sortSam.output
404
  }
405

Peter van 't Hof's avatar
Peter van 't Hof committed
406
  /** Adds bowtie jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
407
  def addBowtie(R1: File, R2: Option[File], output: File): File = {
408
    val bowtie = new Bowtie(this)
409
    bowtie.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
410
    if (paired) bowtie.R2 = R2
411
    bowtie.output = this.swapExt(output.getParent, output, ".bam", ".sam")
412
413
    bowtie.isIntermediate = true
    add(bowtie)
Peter van 't Hof's avatar
Peter van 't Hof committed
414
    addAddOrReplaceReadGroups(bowtie.output, output)
415
  }
416

Peter van 't Hof's avatar
Peter van 't Hof committed
417
  /** Adds Star jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
418
419
  def addStar(R1: File, R2: Option[File], output: File): File = {
    val starCommand = Star(this, R1, R2, outputDir, isIntermediate = true)
420
    add(starCommand)
421
    addAddOrReplaceReadGroups(starCommand.outputSam, output)
422
  }
423

Peter van 't Hof's avatar
Peter van 't Hof committed
424
  /** Adds Start 2 pass jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
425
426
  def addStar2pass(R1: File, R2: Option[File], output: File): File = {
    val starCommand = Star._2pass(this, R1, R2, outputDir, isIntermediate = true)
427
    addAll(starCommand._2)
428
    addAddOrReplaceReadGroups(starCommand._1, output)
429
  }
430

Peter van 't Hof's avatar
Peter van 't Hof committed
431
  /** Adds AddOrReplaceReadGroups */
432
433
  def addAddOrReplaceReadGroups(input: File, output: File): File = {
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
434
435
    addOrReplaceReadGroups.createIndex = true

Peter van 't Hof's avatar
Peter van 't Hof committed
436
    addOrReplaceReadGroups.RGID = readgroupId
437
    addOrReplaceReadGroups.RGLB = libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
438
439
    addOrReplaceReadGroups.RGPL = platform
    addOrReplaceReadGroups.RGPU = platformUnit
440
    addOrReplaceReadGroups.RGSM = sampleId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
441
442
    if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
    if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
Peter van 't Hof's avatar
Peter van 't Hof committed
443
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
444
    add(addOrReplaceReadGroups)
bow's avatar
bow committed
445

446
    addOrReplaceReadGroups.output
Peter van 't Hof's avatar
Peter van 't Hof committed
447
  }
bow's avatar
bow committed
448

Peter van 't Hof's avatar
Peter van 't Hof committed
449
  /** Returns readgroup for bwa */
Peter van 't Hof's avatar
Peter van 't Hof committed
450
  def getReadGroupBwa: String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
451
    var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
452
    RG += "LB:" + libId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
453
454
    RG += "PL:" + platform + "\\t"
    RG += "PU:" + platformUnit + "\\t"
455
    RG += "SM:" + sampleId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
456
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
457
458
459
    if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
    if (readgroupDate != null) RG += "DT:" + readgroupDate + "\\t"
    if (predictedInsertsize.isDefined) RG += "PI:" + predictedInsertsize.get + "\\t"
bow's avatar
bow committed
460

461
    RG.substring(0, RG.lastIndexOf("\\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
462
  }
463
464
465
466

  //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
  /**
   * Extracts file if file is compressed
Peter van 't Hof's avatar
Peter van 't Hof committed
467
468
   * @param file input file
   * @param runDir directory to extract when needed
469
470
471
   * @return returns extracted file
   */
  def extractIfNeeded(file: File, runDir: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
472
473
    if (file == null) file
    else if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
474
      var newFile: File = swapExt(runDir, file, ".gz", "")
Peter van 't Hof's avatar
Peter van 't Hof committed
475
      if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
476
477
478
      val zcatCommand = Zcat(this, file, newFile)
      zcatCommand.isIntermediate = true
      add(zcatCommand)
Peter van 't Hof's avatar
Peter van 't Hof committed
479
480
      newFile
    } else if (file.getName.endsWith(".bz2")) {
481
482
483
484
      val newFile = swapExt(runDir, file, ".bz2", "")
      val pbzip2 = Pbzip2(this, file, newFile)
      pbzip2.isIntermediate = true
      add(pbzip2)
Peter van 't Hof's avatar
Peter van 't Hof committed
485
486
      newFile
    } else file
487
488
  }

489
490
}

491
object Mapping extends PipelineCommand