Mapping.scala 20.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
package nl.lumc.sasc.biopet.pipelines.mapping

18
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import java.util.Date
20

21
import nl.lumc.sasc.biopet.core._
22
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.bowtie.{ Bowtie2, Bowtie }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.extensions.gmap.Gsnap
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam }
27
import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter
28
import nl.lumc.sasc.biopet.extensions._
29
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
Peter van 't Hof's avatar
Peter van 't Hof committed
31
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
32
import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
Peter van 't Hof's avatar
Peter van 't Hof committed
33
import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition
34
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
37
import org.broadinstitute.gatk.queue.QScript

import scala.math._
38

39
// TODO: documentation
40
class Mapping(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
41

Peter van 't Hof's avatar
Peter van 't Hof committed
42
  def this() = this(null)
bow's avatar
bow committed
43
44

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
45
  var inputR1: File = _
bow's avatar
bow committed
46
47

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
48
  var inputR2: Option[File] = None
bow's avatar
bow committed
49

Peter van 't Hof's avatar
Peter van 't Hof committed
50
  /** Output name */
Peter van 't Hof's avatar
Peter van 't Hof committed
51
  var outputName: String = _
bow's avatar
bow committed
52

Peter van 't Hof's avatar
Peter van 't Hof committed
53
54
  /** Skip flexiprep */
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
bow's avatar
bow committed
55

Peter van 't Hof's avatar
Peter van 't Hof committed
56
57
  /** Skip mark duplicates */
  protected var skipMarkduplicates: Boolean = config("skip_markduplicates", default = false)
bow's avatar
bow committed
58

Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
  /** Skip metrics */
  protected var skipMetrics: Boolean = config("skip_metrics", default = false)
61

Peter van 't Hof's avatar
Peter van 't Hof committed
62
  /** Aligner */
Peter van 't Hof's avatar
Peter van 't Hof committed
63
  protected var aligner: String = config("aligner", default = "bwa-mem")
bow's avatar
bow committed
64

Peter van 't Hof's avatar
Peter van 't Hof committed
65
66
  /** Number of chunks, when not defined pipeline will automatic calculate number of chunks */
  protected var numberChunks: Option[Int] = config("number_chunks")
67

Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
  /** Enable chunking */
  protected var chunking: Boolean = config("chunking", numberChunks.getOrElse(1) > 1)
bow's avatar
bow committed
70

71
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
  /** Readgroup ID */
  protected var readgroupId: String = _
bow's avatar
bow committed
74

75
76
  // TODO: hide sampleId and libId from the command line so they do not interfere with our config values

Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
  /** Readgroup Platform */
  protected var platform: String = config("platform", default = "illumina")
bow's avatar
bow committed
79

Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
  /** Readgroup platform unit */
  protected var platformUnit: String = config("platform_unit", default = "na")
bow's avatar
bow committed
82

Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
  /** Readgroup sequencing center */
  protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
bow's avatar
bow committed
85

Peter van 't Hof's avatar
Peter van 't Hof committed
86
87
  /** Readgroup description */
  protected var readgroupDescription: Option[String] = config("readgroup_description")
bow's avatar
bow committed
88

Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
  /** Readgroup sequencing date */
  protected var readgroupDate: Date = _
bow's avatar
bow committed
91

Peter van 't Hof's avatar
Peter van 't Hof committed
92
93
  /** Readgroup predicted insert size */
  protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
bow's avatar
bow committed
94

Peter van 't Hof's avatar
Peter van 't Hof committed
95
  protected var paired: Boolean = false
96
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
97
  def finalBamFile: File = new File(outputDir, outputName + ".final.bam")
98

Peter van 't Hof's avatar
Peter van 't Hof committed
99
  /** location of summary file */
100
101
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")

102
103
104
105
106
  override def defaults = Map("gsnap" -> Map("batch" -> 4))

  override def fixedValues = Map(
    "gsnap" -> Map("format" -> "sam"),
    "bowtie" -> Map("sam" -> true)
Peter van 't Hof's avatar
Peter van 't Hof committed
107
  )
108

Peter van 't Hof's avatar
Peter van 't Hof committed
109
  /** File to add to the summary */
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
110
  def summaryFiles: Map[String, File] = Map("output_bam" -> finalBamFile, "input_R1" -> inputR1,
111
    "reference" -> referenceFasta()) ++
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
112
    (if (inputR2.isDefined) Map("input_R2" -> inputR2.get) else Map())
113

Peter van 't Hof's avatar
Peter van 't Hof committed
114
  /** Settings to add to summary */
115
116
117
118
119
120
  def summarySettings = Map(
    "skip_metrics" -> skipMetrics,
    "skip_flexiprep" -> skipFlexiprep,
    "skip_markduplicates" -> skipMarkduplicates,
    "aligner" -> aligner,
    "chunking" -> chunking,
121
    "numberChunks" -> (if (chunking) numberChunks.getOrElse(1) else None)
122
  ) ++ (if (root == null) Map("reference" -> referenceSummary) else Map())
123

124
125
126
127
128
129
130
131
132
133
  override def reportClass = {
    val mappingReport = new MappingReport(this)
    mappingReport.outputDir = new File(outputDir, "report")
    mappingReport.summaryFile = summaryFile
    mappingReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(mappingReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
134
  /** Will be executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
135
  def init() {
136
    require(outputDir != null, "Missing output directory on mapping module")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
137
    require(inputR1 != null, "Missing output directory on mapping module")
138
139
    require(sampleId.isDefined, "Missing sample ID on mapping module")
    require(libId.isDefined, "Missing library ID on mapping module")
140

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
141
142
    inputFiles :+= new InputFile(inputR1)
    inputR2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
143

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
144
    paired = inputR2.isDefined
145

Peter van 't Hof's avatar
Peter van 't Hof committed
146
147
    if (readgroupId == null)
      readgroupId = config("readgroup_id", default = sampleId.get + "-" + libId.get)
bow's avatar
bow committed
148

Peter van 't Hof's avatar
Peter van 't Hof committed
149
    if (outputName == null) outputName = readgroupId
bow's avatar
bow committed
150

Peter van 't Hof's avatar
Peter van 't Hof committed
151
    if (chunking) {
152
      if (numberChunks.isEmpty) {
153
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
154
        else {
155
          val chunkSize: Int = config("chunksize", 1 << 30)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
156
157
          val filesize = if (inputR1.getName.endsWith(".gz") || inputR1.getName.endsWith(".gzip")) inputR1.length * 3
          else inputR1.length
158
159
160
161
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
162
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
163
  }
bow's avatar
bow committed
164

Peter van 't Hof's avatar
Peter van 't Hof committed
165
  /** Adds all jobs of the pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
166
  def biopetScript() {
167
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
168
      flexiprep.outputDir = new File(outputDir, "flexiprep")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
169
      flexiprep.inputR1 = inputR1
170
      flexiprep.inputR2 = inputR2
Peter van 't Hof's avatar
Peter van 't Hof committed
171
      flexiprep.sampleId = this.sampleId
172
      flexiprep.libId = this.libId
Peter van 't Hof's avatar
Peter van 't Hof committed
173
174
      flexiprep.init()
      flexiprep.runInitialJobs()
175
    }
bow's avatar
bow committed
176
    var bamFiles: List[File] = Nil
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
177
178
    var fastqR1Output: List[File] = Nil
    var fastqR2Output: List[File] = Nil
bow's avatar
bow committed
179

Peter van 't Hof's avatar
Peter van 't Hof committed
180
    val chunks: Map[File, (File, Option[File])] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
181
182
      if (chunking) (for (t <- 1 to numberChunks.getOrElse(1)) yield {
        val chunkDir = new File(outputDir, "chunks" + File.separator + t)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
183
184
        chunkDir -> (new File(chunkDir, inputR1.getName),
          if (paired) Some(new File(chunkDir, inputR2.get.getName)) else None)
Peter van 't Hof's avatar
Peter van 't Hof committed
185
      }).toMap
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
186
      else if (skipFlexiprep) Map(outputDir -> (inputR1, if (paired) inputR2 else None))
187
      else Map(outputDir -> (flexiprep.inputR1, flexiprep.inputR2))
188
    }
bow's avatar
bow committed
189

Peter van 't Hof's avatar
Peter van 't Hof committed
190
    if (chunking) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
191
192
193
194
195
      val fastSplitterR1 = new FastqSplitter(this)
      fastSplitterR1.input = inputR1
      for ((chunkDir, fastqfile) <- chunks) fastSplitterR1.output :+= fastqfile._1
      fastSplitterR1.isIntermediate = true
      add(fastSplitterR1)
bow's avatar
bow committed
196

Peter van 't Hof's avatar
Peter van 't Hof committed
197
      if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
198
199
200
201
202
        val fastSplitterR2 = new FastqSplitter(this)
        fastSplitterR2.input = inputR2.get
        for ((chunkDir, fastqfile) <- chunks) fastSplitterR2.output :+= fastqfile._2.get
        fastSplitterR2.isIntermediate = true
        add(fastSplitterR2)
Peter van 't Hof's avatar
Peter van 't Hof committed
203
204
      }
    }
bow's avatar
bow committed
205

Peter van 't Hof's avatar
Peter van 't Hof committed
206
    for ((chunkDir, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
207
208
209
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
210
        val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
211
        logger.debug(chunkDir + " - " + flexiout)
Peter van 't Hof's avatar
Peter van 't Hof committed
212
213
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
214
215
        fastqR1Output :+= R1
        R2.foreach(R2 => fastqR2Output :+= R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
216
      }
217

Peter van 't Hof's avatar
Peter van 't Hof committed
218
      val outputBam = new File(chunkDir, outputName + ".bam")
219
220
      bamFiles :+= outputBam
      aligner match {
Peter van 't Hof's avatar
Peter van 't Hof committed
221
222
223
        case "bwa-mem"    => addBwaMem(R1, R2, outputBam)
        case "bwa-aln"    => addBwaAln(R1, R2, outputBam)
        case "bowtie"     => addBowtie(R1, R2, outputBam)
Wai Yi Leung's avatar
Wai Yi Leung committed
224
        case "bowtie2"    => addBowtie2(R1, R2, outputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
225
        case "gsnap"      => addGsnap(R1, R2, outputBam)
bow's avatar
bow committed
226
        // TODO: make TopHat here accept multiple input files
Peter van 't Hof's avatar
Peter van 't Hof committed
227
228
229
230
        case "tophat"     => addTophat(R1, R2, outputBam)
        case "stampy"     => addStampy(R1, R2, outputBam)
        case "star"       => addStar(R1, R2, outputBam)
        case "star-2pass" => addStar2pass(R1, R2, outputBam)
231
        case _            => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
232
      }
233
      if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
234
        addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics"), sampleId, libId).functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
235
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
236
    if (!skipFlexiprep) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
237
      flexiprep.runFinalize(fastqR1Output, fastqR2Output)
Peter van 't Hof's avatar
Peter van 't Hof committed
238
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
239
      addSummaryQScript(flexiprep)
Peter van 't Hof's avatar
Peter van 't Hof committed
240
    }
bow's avatar
bow committed
241

242
243
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
244
      bamFile = new File(outputDir, outputName + ".dedup.bam")
245
246
247
      val md = MarkDuplicates(this, bamFiles, bamFile)
      add(md)
      addSummarizable(md, "mark_duplicates")
248
    } else if (skipMarkduplicates && chunking) {
Peter van 't Hof's avatar
Peter van 't Hof committed
249
      val mergeSamFile = MergeSamFiles(this, bamFiles, new File(outputDir, outputName + ".merge.bam"))
250
251
252
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
253

Peter van 't Hof's avatar
Peter van 't Hof committed
254
    if (!skipMetrics) {
Peter van 't Hof's avatar
Peter van 't Hof committed
255
      val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"), sampleId, libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
256
257
258
      addAll(bamMetrics.functions)
      addSummaryQScript(bamMetrics)
    }
259

Peter van 't Hof's avatar
Peter van 't Hof committed
260
261
    add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai")))
    add(Ln(this, bamFile, finalBamFile))
262
    outputFiles += ("finalBamFile" -> finalBamFile.getAbsoluteFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
263

264
    if (config("unmapped_to_gears", default = false).asBoolean) {
265
      val gears = new GearsSingle(this)
266
      gears.bamFile = Some(finalBamFile)
267
268
      gears.sampleId = sampleId
      gears.libId = libId
269
      gears.outputDir = new File(outputDir, "gears")
270
      add(gears)
271
272
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
273
274
    if (config("generate_wig", default = false).asBoolean)
      addAll(Bam2Wig(this, finalBamFile).functions)
275

Peter van 't Hof's avatar
Peter van 't Hof committed
276
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
277
  }
bow's avatar
bow committed
278

Peter van 't Hof's avatar
Peter van 't Hof committed
279
  /** Add bwa aln jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
280
  def addBwaAln(R1: File, R2: Option[File], output: File): File = {
281
282
283
    val bwaAlnR1 = new BwaAln(this)
    bwaAlnR1.fastq = R1
    bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
284
    bwaAlnR1.isIntermediate = true
285
286
287
    add(bwaAlnR1)

    val samFile: File = if (paired) {
288
      val bwaAlnR2 = new BwaAln(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
289
      bwaAlnR2.fastq = R2.get
290
      bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
291
      bwaAlnR2.isIntermediate = true
292
293
294
295
      add(bwaAlnR2)

      val bwaSampe = new BwaSampe(this)
      bwaSampe.fastqR1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
296
      bwaSampe.fastqR2 = R2.get
297
298
      bwaSampe.saiR1 = bwaAlnR1.output
      bwaSampe.saiR2 = bwaAlnR2.output
299
      bwaSampe.r = getReadGroupBwa
300
      bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
301
      bwaSampe.isIntermediate = true
302
303
304
305
306
307
308
      add(bwaSampe)

      bwaSampe.output
    } else {
      val bwaSamse = new BwaSamse(this)
      bwaSamse.fastq = R1
      bwaSamse.sai = bwaAlnR1.output
309
      bwaSamse.r = getReadGroupBwa
310
      bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
311
      bwaSamse.isIntermediate = true
312
313
314
315
316
317
318
319
      add(bwaSamse)

      bwaSamse.output
    }

    val sortSam = SortSam(this, samFile, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
Peter van 't Hof's avatar
Peter van 't Hof committed
320
    sortSam.output
321
322
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
323
  /** Adds bwa mem jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
324
  def addBwaMem(R1: File, R2: Option[File], output: File): File = {
325
    val bwaCommand = new BwaMem(this)
326
    bwaCommand.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
327
    if (paired) bwaCommand.R2 = R2.get
328
    bwaCommand.R = Some(getReadGroupBwa)
329
330
    val sortSam = new SortSam(this)
    sortSam.output = output
331
332
333
334
    val pipe = bwaCommand | sortSam
    pipe.isIntermediate = chunking || !skipMarkduplicates
    pipe.threadsCorrection = -1
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
335
    output
336
  }
337

Peter van 't Hof's avatar
Peter van 't Hof committed
338
  def addGsnap(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
339
    val gsnapCommand = new Gsnap(this)
340
    gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
341
    gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam")
bow's avatar
bow committed
342

343
    val reorderSam = new ReorderSam(this)
344
    reorderSam.input = gsnapCommand.output
Peter van 't Hof's avatar
Peter van 't Hof committed
345
    reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam")
346

Peter van 't Hof's avatar
Peter van 't Hof committed
347
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
348
349
    val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil)
    pipe.threadsCorrection = -2
Peter van 't Hof's avatar
Peter van 't Hof committed
350
351
    add(pipe)
    ar._2
bow's avatar
bow committed
352
353
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
354
  def addTophat(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
355
356
357
    // TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
    val tophat = new Tophat(this)
    tophat.R1 = tophat.R1 :+ R1
Peter van 't Hof's avatar
Peter van 't Hof committed
358
    if (paired) tophat.R2 = tophat.R2 :+ R2.get
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
359
    tophat.outputDir = new File(outputDir, "tophat_out")
bow's avatar
bow committed
360
    // always output BAM
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
361
    tophat.noConvertBam = false
362
    // and always keep input ordering
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
363
    tophat.keepFastaOrder = true
bow's avatar
bow committed
364
365
    add(tophat)

366
    // fix unmapped file coordinates
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
367
    val fixedUnmapped = new File(tophat.outputDir, "unmapped_fixup.sam")
368
369
    val fixer = new TophatRecondition(this)
    fixer.inputBam = tophat.outputAcceptedHits
bow's avatar
bow committed
370
    fixer.outputSam = fixedUnmapped.getAbsoluteFile
371
372
373
374
    fixer.isIntermediate = true
    add(fixer)

    // sort fixed SAM file
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
375
    val sorter = SortSam(this, fixer.outputSam, new File(tophat.outputDir, "unmapped_fixup.sorted.bam"))
376
377
378
    sorter.sortOrder = "coordinate"
    sorter.isIntermediate = true
    add(sorter)
bow's avatar
bow committed
379

380
381
    // merge with mapped file
    val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output),
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
382
      new File(tophat.outputDir, "fixed_merged.bam"), sortOrder = "coordinate")
383
384
385
386
387
388
389
390
391
392
    mergeSamFile.createIndex = true
    mergeSamFile.isIntermediate = true
    add(mergeSamFile)

    // make sure header coordinates are correct
    val reorderSam = new ReorderSam(this)
    reorderSam.input = mergeSamFile.output
    reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
    add(reorderSam)

Peter van 't Hof's avatar
Peter van 't Hof committed
393
394
395
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
    add(ar._1)
    ar._2
bow's avatar
bow committed
396
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
397

Peter van 't Hof's avatar
Peter van 't Hof committed
398
  /** Adds stampy jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
399
  def addStampy(R1: File, R2: Option[File], output: File): File = {
400

Peter van 't Hof's avatar
Peter van 't Hof committed
401
    var RG: String = "ID:" + readgroupId + ","
402
403
    RG += "SM:" + sampleId.get + ","
    RG += "LB:" + libId.get + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
404
405
406
407
408
409
    if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
    RG += "PU:" + platformUnit + ","
    if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
    if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
    RG += "PL:" + platform
410
411
412

    val stampyCmd = new Stampy(this)
    stampyCmd.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
413
    if (paired) stampyCmd.R2 = R2.get
414
415
    stampyCmd.readgroup = RG
    stampyCmd.sanger = true
416
    stampyCmd.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
417
418
    stampyCmd.isIntermediate = true
    add(stampyCmd)
419
420
421
    val sortSam = SortSam(this, stampyCmd.output, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
422
    sortSam.output
423
  }
424

Peter van 't Hof's avatar
Peter van 't Hof committed
425
  /** Adds bowtie jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
426
  def addBowtie(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
427
428
429
430
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))
431
    val bowtie = new Bowtie(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
432
433
434
    bowtie.R1 = zcatR1._2
    if (paired) bowtie.R2 = Some(zcatR2.get._2)
    bowtie.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
435
    bowtie.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
436
437
    val ar = addAddOrReplaceReadGroups(bowtie.output, output)
    val pipe = new BiopetFifoPipe(this, (Some(bowtie) :: Some(ar._1) :: Nil).flatten)
438
    pipe.threadsCorrection = -1
Peter van 't Hof's avatar
Peter van 't Hof committed
439
440
    add(pipe)
    ar._2
441
  }
442

443
444
445
  /** Add bowtie2 jobs **/
  def addBowtie2(R1: File, R2: Option[File], output: File): File = {
    val bowtie2 = new Bowtie2(this)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
446
    bowtie2.rgId = Some(readgroupId)
Peter van 't Hof's avatar
Peter van 't Hof committed
447
    bowtie2.rg +:= ("LB:" + libId.get)
Wai Yi Leung's avatar
Wai Yi Leung committed
448
449
    bowtie2.rg +:= ("PL:" + platform)
    bowtie2.rg +:= ("PU:" + platformUnit)
Peter van 't Hof's avatar
Peter van 't Hof committed
450
    bowtie2.rg +:= ("SM:" + sampleId.get)
451
    bowtie2.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
452
    bowtie2.R2 = R2
453
454
455
456
457
458
459
460
461
    val sortSam = new SortSam(this)
    sortSam.output = output
    val pipe = bowtie2 | sortSam
    pipe.isIntermediate = chunking || !skipMarkduplicates
    pipe.threadsCorrection = -1
    add(pipe)
    output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
462
  /** Adds Star jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
463
  def addStar(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
464
465
466
467
468
469
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    val starCommand = Star(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
    val ar = addAddOrReplaceReadGroups(starCommand.outputSam, output)
    val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: (if (paired) zcatR2.get._1 else None) ::
      Some(starCommand) :: Some(ar._1) :: Nil).flatten)
470
471
472
    pipe.threadsCorrection = -1
    zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
    zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
Peter van 't Hof's avatar
Peter van 't Hof committed
473
474
    add(pipe)
    ar._2
475
  }
476

Peter van 't Hof's avatar
Peter van 't Hof committed
477
  /** Adds Start 2 pass jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
478
  def addStar2pass(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
479
480
481
482
483
484
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))

    val starCommand = Star._2pass(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
485
    addAll(starCommand._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
486
487
488
    val ar = addAddOrReplaceReadGroups(starCommand._1, output)
    add(ar._1)
    ar._2
489
  }
490

Peter van 't Hof's avatar
Peter van 't Hof committed
491
  /** Adds AddOrReplaceReadGroups */
Peter van 't Hof's avatar
Peter van 't Hof committed
492
  def addAddOrReplaceReadGroups(input: File, output: File): (AddOrReplaceReadGroups, File) = {
493
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
494
495
    addOrReplaceReadGroups.createIndex = true

Peter van 't Hof's avatar
Peter van 't Hof committed
496
    addOrReplaceReadGroups.RGID = readgroupId
497
    addOrReplaceReadGroups.RGLB = libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
498
499
    addOrReplaceReadGroups.RGPL = platform
    addOrReplaceReadGroups.RGPU = platformUnit
500
    addOrReplaceReadGroups.RGSM = sampleId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
501
502
    if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
    if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
Peter van 't Hof's avatar
Peter van 't Hof committed
503
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
bow's avatar
bow committed
504

Peter van 't Hof's avatar
Peter van 't Hof committed
505
    (addOrReplaceReadGroups, addOrReplaceReadGroups.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
506
  }
bow's avatar
bow committed
507

Peter van 't Hof's avatar
Peter van 't Hof committed
508
  /** Returns readgroup for bwa */
Peter van 't Hof's avatar
Peter van 't Hof committed
509
  def getReadGroupBwa: String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
510
    var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
511
    RG += "LB:" + libId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
512
513
    RG += "PL:" + platform + "\\t"
    RG += "PU:" + platformUnit + "\\t"
514
    RG += "SM:" + sampleId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
515
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
516
517
518
    if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
    if (readgroupDate != null) RG += "DT:" + readgroupDate + "\\t"
    if (predictedInsertsize.isDefined) RG += "PI:" + predictedInsertsize.get + "\\t"
bow's avatar
bow committed
519

520
    RG.substring(0, RG.lastIndexOf("\\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
521
  }
522
523
524
525

  //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
  /**
   * Extracts file if file is compressed
Peter van 't Hof's avatar
Peter van 't Hof committed
526
527
   * @param file input file
   * @param runDir directory to extract when needed
528
529
   * @return returns extracted file
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
530
531
532
  def extractIfNeeded(file: File, runDir: File): (Option[BiopetCommandLineFunction], File) = {
    require(file != null)
    if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
533
      var newFile: File = swapExt(runDir, file, ".gz", "")
Peter van 't Hof's avatar
Peter van 't Hof committed
534
      if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
535
      val zcatCommand = Zcat(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
536
      (Some(zcatCommand), newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
537
    } else if (file.getName.endsWith(".bz2")) {
538
539
      val newFile = swapExt(runDir, file, ".bz2", "")
      val pbzip2 = Pbzip2(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
540
541
      (Some(pbzip2), newFile)
    } else (None, file)
542
543
  }

544
545
}

546
object Mapping extends PipelineCommand