Mapping.scala 19.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
package nl.lumc.sasc.biopet.pipelines.mapping

18
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import java.util.Date
20

21
import nl.lumc.sasc.biopet.core._
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.config.Configurable
23
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse }
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam }
import nl.lumc.sasc.biopet.extensions.{ Gsnap, Tophat, _ }
27
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition
31
import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter
32
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
35
import org.broadinstitute.gatk.queue.QScript

import scala.math._
36

37
// TODO: documentation
38
class Mapping(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  def this() = this(null)
bow's avatar
bow committed
41
42

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var input_R1: File = _
bow's avatar
bow committed
44
45

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
46
  var input_R2: Option[File] = None
bow's avatar
bow committed
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  /** Output name */
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  var outputName: String = _
bow's avatar
bow committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
52
  /** Skip flexiprep */
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
bow's avatar
bow committed
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
  /** Skip mark duplicates */
  protected var skipMarkduplicates: Boolean = config("skip_markduplicates", default = false)
bow's avatar
bow committed
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
  /** Skip metrics */
  protected var skipMetrics: Boolean = config("skip_metrics", default = false)
59

Peter van 't Hof's avatar
Peter van 't Hof committed
60
  /** Aligner */
Peter van 't Hof's avatar
Peter van 't Hof committed
61
  protected var aligner: String = config("aligner", default = "bwa-mem")
bow's avatar
bow committed
62

Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
  /** Number of chunks, when not defined pipeline will automatic calculate number of chunks */
  protected var numberChunks: Option[Int] = config("number_chunks")
65

Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
  /** Enable chunking */
  protected var chunking: Boolean = config("chunking", numberChunks.getOrElse(1) > 1)
bow's avatar
bow committed
68

69
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
  /** Readgroup ID */
  protected var readgroupId: String = _
bow's avatar
bow committed
72

73
74
  // TODO: hide sampleId and libId from the command line so they do not interfere with our config values

Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
  /** Readgroup Platform */
  protected var platform: String = config("platform", default = "illumina")
bow's avatar
bow committed
77

Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
  /** Readgroup platform unit */
  protected var platformUnit: String = config("platform_unit", default = "na")
bow's avatar
bow committed
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
  /** Readgroup sequencing center */
  protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
bow's avatar
bow committed
83

Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
  /** Readgroup description */
  protected var readgroupDescription: Option[String] = config("readgroup_description")
bow's avatar
bow committed
86

Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
  /** Readgroup sequencing date */
  protected var readgroupDate: Date = _
bow's avatar
bow committed
89

Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
  /** Readgroup predicted insert size */
  protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
bow's avatar
bow committed
92

Peter van 't Hof's avatar
Peter van 't Hof committed
93
  protected var paired: Boolean = false
94
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
95
  def finalBamFile: File = new File(outputDir, outputName + ".final.bam")
96

Peter van 't Hof's avatar
Peter van 't Hof committed
97
  /** location of summary file */
98
99
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")

100
101
102
103
104
  override def defaults = Map("gsnap" -> Map("batch" -> 4))

  override def fixedValues = Map(
    "gsnap" -> Map("format" -> "sam"),
    "bowtie" -> Map("sam" -> true)
Peter van 't Hof's avatar
Peter van 't Hof committed
105
  )
106

Peter van 't Hof's avatar
Peter van 't Hof committed
107
  /** File to add to the summary */
108
109
  def summaryFiles: Map[String, File] = Map("output_bamfile" -> finalBamFile, "input_R1" -> input_R1,
    "reference" -> referenceFasta()) ++
110
    (if (input_R2.isDefined) Map("input_R2" -> input_R2.get) else Map())
111

Peter van 't Hof's avatar
Peter van 't Hof committed
112
  /** Settings to add to summary */
113
114
115
116
117
118
  def summarySettings = Map(
    "skip_metrics" -> skipMetrics,
    "skip_flexiprep" -> skipFlexiprep,
    "skip_markduplicates" -> skipMarkduplicates,
    "aligner" -> aligner,
    "chunking" -> chunking,
119
    "numberChunks" -> (if (chunking) numberChunks.getOrElse(1) else None)
120
  ) ++ (if (root == null) Map("reference" -> referenceSummary) else Map())
121

122
123
124
125
126
127
128
129
130
131
  override def reportClass = {
    val mappingReport = new MappingReport(this)
    mappingReport.outputDir = new File(outputDir, "report")
    mappingReport.summaryFile = summaryFile
    mappingReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(mappingReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
132
  /** Will be executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
133
  def init() {
134
135
    require(outputDir != null, "Missing output directory on mapping module")
    require(input_R1 != null, "Missing output directory on mapping module")
136
137
    require(sampleId.isDefined, "Missing sample ID on mapping module")
    require(libId.isDefined, "Missing library ID on mapping module")
138

Peter van 't Hof's avatar
Peter van 't Hof committed
139
140
    inputFiles :+= new InputFile(input_R1)
    input_R2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
141

Peter van 't Hof's avatar
Peter van 't Hof committed
142
    paired = input_R2.isDefined
143

Peter van 't Hof's avatar
Peter van 't Hof committed
144
145
    if (readgroupId == null)
      readgroupId = config("readgroup_id", default = sampleId.get + "-" + libId.get)
bow's avatar
bow committed
146

Peter van 't Hof's avatar
Peter van 't Hof committed
147
    if (outputName == null) outputName = readgroupId
bow's avatar
bow committed
148

Peter van 't Hof's avatar
Peter van 't Hof committed
149
    if (chunking) {
150
      if (numberChunks.isEmpty) {
151
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
152
        else {
153
          val chunkSize: Int = config("chunksize", 1 << 30)
154
          val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
155
          else input_R1.length
156
157
158
159
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
160
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
161
  }
bow's avatar
bow committed
162

Peter van 't Hof's avatar
Peter van 't Hof committed
163
  /** Adds all jobs of the pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
164
  def biopetScript() {
165
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
166
      flexiprep.outputDir = new File(outputDir, "flexiprep")
Peter van 't Hof's avatar
Peter van 't Hof committed
167
      flexiprep.input_R1 = input_R1
Peter van 't Hof's avatar
Peter van 't Hof committed
168
      flexiprep.input_R2 = input_R2
Peter van 't Hof's avatar
Peter van 't Hof committed
169
      flexiprep.sampleId = this.sampleId
170
      flexiprep.libId = this.libId
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
      flexiprep.init()
      flexiprep.runInitialJobs()
173
    }
bow's avatar
bow committed
174
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
175
176
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
177

Peter van 't Hof's avatar
Peter van 't Hof committed
178
    val chunks: Map[File, (File, Option[File])] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
181
182
183
184
      if (chunking) (for (t <- 1 to numberChunks.getOrElse(1)) yield {
        val chunkDir = new File(outputDir, "chunks" + File.separator + t)
        chunkDir -> (new File(chunkDir, input_R1.getName),
          if (paired) Some(new File(chunkDir, input_R2.get.getName)) else None)
      }).toMap
      else if (skipFlexiprep) Map(outputDir -> (input_R1, if (paired) input_R2 else None))
Peter van 't Hof's avatar
Peter van 't Hof committed
185
      else Map(outputDir -> (flexiprep.input_R1, flexiprep.input_R2))
186
    }
bow's avatar
bow committed
187

Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
190
      fastSplitter_R1.input = input_R1
Peter van 't Hof's avatar
Peter van 't Hof committed
191
      for ((chunkDir, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
192
193
      fastSplitter_R1.isIntermediate = true
      add(fastSplitter_R1)
bow's avatar
bow committed
194

Peter van 't Hof's avatar
Peter van 't Hof committed
195
196
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
197
        fastSplitter_R2.input = input_R2.get
Peter van 't Hof's avatar
Peter van 't Hof committed
198
        for ((chunkDir, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2.get
199
200
        fastSplitter_R2.isIntermediate = true
        add(fastSplitter_R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
      }
    }
bow's avatar
bow committed
203

Peter van 't Hof's avatar
Peter van 't Hof committed
204
    for ((chunkDir, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
207
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
208
        val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
209
        logger.debug(chunkDir + " - " + flexiout)
Peter van 't Hof's avatar
Peter van 't Hof committed
210
211
212
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
        fastq_R1_output :+= R1
Peter van 't Hof's avatar
Peter van 't Hof committed
213
        R2.foreach(R2 => fastq_R2_output :+= R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
214
      }
215

Peter van 't Hof's avatar
Peter van 't Hof committed
216
      val outputBam = new File(chunkDir, outputName + ".bam")
217
218
      bamFiles :+= outputBam
      aligner match {
Peter van 't Hof's avatar
Peter van 't Hof committed
219
220
221
222
        case "bwa-mem"    => addBwaMem(R1, R2, outputBam)
        case "bwa-aln"    => addBwaAln(R1, R2, outputBam)
        case "bowtie"     => addBowtie(R1, R2, outputBam)
        case "gsnap"      => addGsnap(R1, R2, outputBam)
bow's avatar
bow committed
223
        // TODO: make TopHat here accept multiple input files
Peter van 't Hof's avatar
Peter van 't Hof committed
224
225
226
227
        case "tophat"     => addTophat(R1, R2, outputBam)
        case "stampy"     => addStampy(R1, R2, outputBam)
        case "star"       => addStar(R1, R2, outputBam)
        case "star-2pass" => addStar2pass(R1, R2, outputBam)
228
        case _            => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
229
      }
230
      if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
231
        addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics"), sampleId, libId).functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
232
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
233
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
234
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
235
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
236
      addSummaryQScript(flexiprep)
Peter van 't Hof's avatar
Peter van 't Hof committed
237
    }
bow's avatar
bow committed
238

239
240
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
241
      bamFile = new File(outputDir, outputName + ".dedup.bam")
242
243
244
      val md = MarkDuplicates(this, bamFiles, bamFile)
      add(md)
      addSummarizable(md, "mark_duplicates")
245
246
247
248
249
    } else if (skipMarkduplicates && chunking) {
      val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir)
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
250

Peter van 't Hof's avatar
Peter van 't Hof committed
251
    if (!skipMetrics) {
Peter van 't Hof's avatar
Peter van 't Hof committed
252
      val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"), sampleId, libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
253
254
255
      addAll(bamMetrics.functions)
      addSummaryQScript(bamMetrics)
    }
256

Peter van 't Hof's avatar
Peter van 't Hof committed
257
258
    add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai")))
    add(Ln(this, bamFile, finalBamFile))
259
    outputFiles += ("finalBamFile" -> finalBamFile.getAbsoluteFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
260
261
262

    if (config("generate_wig", default = false).asBoolean)
      addAll(Bam2Wig(this, finalBamFile).functions)
263

Peter van 't Hof's avatar
Peter van 't Hof committed
264
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
265
  }
bow's avatar
bow committed
266

Peter van 't Hof's avatar
Peter van 't Hof committed
267
  /** Add bwa aln jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
268
  def addBwaAln(R1: File, R2: Option[File], output: File): File = {
269
270
271
    val bwaAlnR1 = new BwaAln(this)
    bwaAlnR1.fastq = R1
    bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
272
    bwaAlnR1.isIntermediate = true
273
274
275
    add(bwaAlnR1)

    val samFile: File = if (paired) {
276
      val bwaAlnR2 = new BwaAln(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
277
      bwaAlnR2.fastq = R2.get
278
      bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
279
      bwaAlnR2.isIntermediate = true
280
281
282
283
      add(bwaAlnR2)

      val bwaSampe = new BwaSampe(this)
      bwaSampe.fastqR1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
284
      bwaSampe.fastqR2 = R2.get
285
286
      bwaSampe.saiR1 = bwaAlnR1.output
      bwaSampe.saiR2 = bwaAlnR2.output
287
      bwaSampe.r = getReadGroupBwa
288
      bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
289
      bwaSampe.isIntermediate = true
290
291
292
293
294
295
296
      add(bwaSampe)

      bwaSampe.output
    } else {
      val bwaSamse = new BwaSamse(this)
      bwaSamse.fastq = R1
      bwaSamse.sai = bwaAlnR1.output
297
      bwaSamse.r = getReadGroupBwa
298
      bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
299
      bwaSamse.isIntermediate = true
300
301
302
303
304
305
306
307
      add(bwaSamse)

      bwaSamse.output
    }

    val sortSam = SortSam(this, samFile, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
Peter van 't Hof's avatar
Peter van 't Hof committed
308
    sortSam.output
309
310
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
311
  /** Adds bwa mem jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
312
  def addBwaMem(R1: File, R2: Option[File], output: File): File = {
313
    val bwaCommand = new BwaMem(this)
314
    bwaCommand.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
315
    if (paired) bwaCommand.R2 = R2.get
316
    bwaCommand.R = Some(getReadGroupBwa)
317
318
    val sortSam = new SortSam(this)
    sortSam.output = output
319
320
321
322
    val pipe = bwaCommand | sortSam
    pipe.isIntermediate = chunking || !skipMarkduplicates
    pipe.threadsCorrection = -1
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
323
    output
324
  }
325

Peter van 't Hof's avatar
Peter van 't Hof committed
326
  def addGsnap(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
327
    val gsnapCommand = new Gsnap(this)
328
    gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
329
    gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam")
bow's avatar
bow committed
330

331
    val reorderSam = new ReorderSam(this)
332
    reorderSam.input = gsnapCommand.output
Peter van 't Hof's avatar
Peter van 't Hof committed
333
    reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam")
334

Peter van 't Hof's avatar
Peter van 't Hof committed
335
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
336
337
    val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil)
    pipe.threadsCorrection = -2
Peter van 't Hof's avatar
Peter van 't Hof committed
338
339
    add(pipe)
    ar._2
bow's avatar
bow committed
340
341
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
342
  def addTophat(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
343
344
345
    // TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
    val tophat = new Tophat(this)
    tophat.R1 = tophat.R1 :+ R1
Peter van 't Hof's avatar
Peter van 't Hof committed
346
    if (paired) tophat.R2 = tophat.R2 :+ R2.get
bow's avatar
bow committed
347
348
349
    tophat.output_dir = new File(outputDir, "tophat_out")
    // always output BAM
    tophat.no_convert_bam = false
350
351
    // and always keep input ordering
    tophat.keep_fasta_order = true
bow's avatar
bow committed
352
353
    add(tophat)

354
355
356
357
    // fix unmapped file coordinates
    val fixedUnmapped = new File(tophat.output_dir, "unmapped_fixup.sam")
    val fixer = new TophatRecondition(this)
    fixer.inputBam = tophat.outputAcceptedHits
bow's avatar
bow committed
358
    fixer.outputSam = fixedUnmapped.getAbsoluteFile
359
360
361
362
    fixer.isIntermediate = true
    add(fixer)

    // sort fixed SAM file
bow's avatar
bow committed
363
    val sorter = SortSam(this, fixer.outputSam, new File(tophat.output_dir, "unmapped_fixup.sorted.bam"))
364
365
366
    sorter.sortOrder = "coordinate"
    sorter.isIntermediate = true
    add(sorter)
bow's avatar
bow committed
367

368
369
370
371
372
373
374
375
376
377
378
379
380
    // merge with mapped file
    val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output),
      tophat.output_dir, "coordinate")
    mergeSamFile.createIndex = true
    mergeSamFile.isIntermediate = true
    add(mergeSamFile)

    // make sure header coordinates are correct
    val reorderSam = new ReorderSam(this)
    reorderSam.input = mergeSamFile.output
    reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
    add(reorderSam)

Peter van 't Hof's avatar
Peter van 't Hof committed
381
382
383
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
    add(ar._1)
    ar._2
bow's avatar
bow committed
384
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
385
  /** Adds stampy jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
386
  def addStampy(R1: File, R2: Option[File], output: File): File = {
387

Peter van 't Hof's avatar
Peter van 't Hof committed
388
    var RG: String = "ID:" + readgroupId + ","
389
390
    RG += "SM:" + sampleId.get + ","
    RG += "LB:" + libId.get + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
391
392
393
394
395
396
    if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
    RG += "PU:" + platformUnit + ","
    if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
    if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
    RG += "PL:" + platform
397
398
399

    val stampyCmd = new Stampy(this)
    stampyCmd.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
400
    if (paired) stampyCmd.R2 = R2.get
401
402
    stampyCmd.readgroup = RG
    stampyCmd.sanger = true
403
    stampyCmd.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
404
405
    stampyCmd.isIntermediate = true
    add(stampyCmd)
406
407
408
    val sortSam = SortSam(this, stampyCmd.output, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
409
    sortSam.output
410
  }
411

Peter van 't Hof's avatar
Peter van 't Hof committed
412
  /** Adds bowtie jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
413
  def addBowtie(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
414
415
416
417
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))
418
    val bowtie = new Bowtie(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
419
420
421
    bowtie.R1 = zcatR1._2
    if (paired) bowtie.R2 = Some(zcatR2.get._2)
    bowtie.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
422
    bowtie.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
423
424
    val ar = addAddOrReplaceReadGroups(bowtie.output, output)
    val pipe = new BiopetFifoPipe(this, (Some(bowtie) :: Some(ar._1) :: Nil).flatten)
425
    pipe.threadsCorrection = -1
Peter van 't Hof's avatar
Peter van 't Hof committed
426
427
    add(pipe)
    ar._2
428
  }
429

Peter van 't Hof's avatar
Peter van 't Hof committed
430
  /** Adds Star jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
431
  def addStar(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
432
433
434
435
436
437
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    val starCommand = Star(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
    val ar = addAddOrReplaceReadGroups(starCommand.outputSam, output)
    val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: (if (paired) zcatR2.get._1 else None) ::
      Some(starCommand) :: Some(ar._1) :: Nil).flatten)
438
439
440
    pipe.threadsCorrection = -1
    zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
    zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
Peter van 't Hof's avatar
Peter van 't Hof committed
441
442
    add(pipe)
    ar._2
443
  }
444

Peter van 't Hof's avatar
Peter van 't Hof committed
445
  /** Adds Start 2 pass jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
446
  def addStar2pass(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
447
448
449
450
451
452
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))

    val starCommand = Star._2pass(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
453
    addAll(starCommand._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
454
455
456
    val ar = addAddOrReplaceReadGroups(starCommand._1, output)
    add(ar._1)
    ar._2
457
  }
458

Peter van 't Hof's avatar
Peter van 't Hof committed
459
  /** Adds AddOrReplaceReadGroups */
Peter van 't Hof's avatar
Peter van 't Hof committed
460
  def addAddOrReplaceReadGroups(input: File, output: File): (AddOrReplaceReadGroups, File) = {
461
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
462
463
    addOrReplaceReadGroups.createIndex = true

Peter van 't Hof's avatar
Peter van 't Hof committed
464
    addOrReplaceReadGroups.RGID = readgroupId
465
    addOrReplaceReadGroups.RGLB = libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
466
467
    addOrReplaceReadGroups.RGPL = platform
    addOrReplaceReadGroups.RGPU = platformUnit
468
    addOrReplaceReadGroups.RGSM = sampleId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
469
470
    if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
    if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
Peter van 't Hof's avatar
Peter van 't Hof committed
471
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
bow's avatar
bow committed
472

Peter van 't Hof's avatar
Peter van 't Hof committed
473
    (addOrReplaceReadGroups, addOrReplaceReadGroups.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
474
  }
bow's avatar
bow committed
475

Peter van 't Hof's avatar
Peter van 't Hof committed
476
  /** Returns readgroup for bwa */
Peter van 't Hof's avatar
Peter van 't Hof committed
477
  def getReadGroupBwa: String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
478
    var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
479
    RG += "LB:" + libId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
480
481
    RG += "PL:" + platform + "\\t"
    RG += "PU:" + platformUnit + "\\t"
482
    RG += "SM:" + sampleId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
483
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
484
485
486
    if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
    if (readgroupDate != null) RG += "DT:" + readgroupDate + "\\t"
    if (predictedInsertsize.isDefined) RG += "PI:" + predictedInsertsize.get + "\\t"
bow's avatar
bow committed
487

488
    RG.substring(0, RG.lastIndexOf("\\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
489
  }
490
491
492
493

  //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
  /**
   * Extracts file if file is compressed
Peter van 't Hof's avatar
Peter van 't Hof committed
494
495
   * @param file input file
   * @param runDir directory to extract when needed
496
497
   * @return returns extracted file
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
498
499
500
  def extractIfNeeded(file: File, runDir: File): (Option[BiopetCommandLineFunction], File) = {
    require(file != null)
    if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
501
      var newFile: File = swapExt(runDir, file, ".gz", "")
Peter van 't Hof's avatar
Peter van 't Hof committed
502
      if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
503
      val zcatCommand = Zcat(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
504
      (Some(zcatCommand), newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
505
    } else if (file.getName.endsWith(".bz2")) {
506
507
      val newFile = swapExt(runDir, file, ".bz2", "")
      val pbzip2 = Pbzip2(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
508
509
      (Some(pbzip2), newFile)
    } else (None, file)
510
511
  }

512
513
}

514
object Mapping extends PipelineCommand