Mapping.scala 21.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
package nl.lumc.sasc.biopet.pipelines.mapping

17
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.util.Date
19

20
import nl.lumc.sasc.biopet.core._
21
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.extensions.bowtie.{ Bowtie2, Bowtie }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.gmap.Gsnap
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam }
26
import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter
27
import nl.lumc.sasc.biopet.extensions._
28
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
31
import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
Peter van 't Hof's avatar
Peter van 't Hof committed
32
import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition
33
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
36
import org.broadinstitute.gatk.queue.QScript

import scala.math._
37

38
// TODO: documentation
39
class Mapping(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
40

Peter van 't Hof's avatar
Peter van 't Hof committed
41
  def this() = this(null)
bow's avatar
bow committed
42

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  @Input(doc = "R1 fastq file", shortName = "R1", fullName = "inputR1", required = true)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
44
  var inputR1: File = _
bow's avatar
bow committed
45

Peter van 't Hof's avatar
Peter van 't Hof committed
46
  @Input(doc = "R2 fastq file", shortName = "R2", fullName = "inputR2", required = false)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
47
  var inputR2: Option[File] = None
bow's avatar
bow committed
48

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  /** Output name */
Peter van 't Hof's avatar
Peter van 't Hof committed
50
  var outputName: String = _
bow's avatar
bow committed
51

Peter van 't Hof's avatar
Peter van 't Hof committed
52
53
  /** Skip flexiprep */
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
bow's avatar
bow committed
54

Peter van 't Hof's avatar
Peter van 't Hof committed
55
56
  /** Skip mark duplicates */
  protected var skipMarkduplicates: Boolean = config("skip_markduplicates", default = false)
bow's avatar
bow committed
57

Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
  /** Skip metrics */
  protected var skipMetrics: Boolean = config("skip_metrics", default = false)
60

Peter van 't Hof's avatar
Peter van 't Hof committed
61
  /** Aligner */
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  protected var aligner: String = config("aligner", default = "bwa-mem")
bow's avatar
bow committed
63

Peter van 't Hof's avatar
Peter van 't Hof committed
64
65
  /** Number of chunks, when not defined pipeline will automatic calculate number of chunks */
  protected var numberChunks: Option[Int] = config("number_chunks")
66

Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
  /** Enable chunking */
  protected var chunking: Boolean = config("chunking", numberChunks.getOrElse(1) > 1)
bow's avatar
bow committed
69

70
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
  /** Readgroup ID */
  protected var readgroupId: String = _
bow's avatar
bow committed
73

74
75
  // TODO: hide sampleId and libId from the command line so they do not interfere with our config values

Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
  /** Readgroup Platform */
  protected var platform: String = config("platform", default = "illumina")
bow's avatar
bow committed
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
  /** Readgroup platform unit */
  protected var platformUnit: String = config("platform_unit", default = "na")
bow's avatar
bow committed
81

Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
  /** Readgroup sequencing center */
  protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
bow's avatar
bow committed
84

Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
  /** Readgroup description */
  protected var readgroupDescription: Option[String] = config("readgroup_description")
bow's avatar
bow committed
87

Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
  /** Readgroup sequencing date */
  protected var readgroupDate: Date = _
bow's avatar
bow committed
90

Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
  /** Readgroup predicted insert size */
  protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
bow's avatar
bow committed
93

Peter van 't Hof's avatar
Peter van 't Hof committed
94
  protected var paired: Boolean = false
95
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
96
  def finalBamFile: File = new File(outputDir, outputName + ".final.bam")
97

Peter van 't Hof's avatar
Peter van 't Hof committed
98
  /** location of summary file */
99
100
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")

101
102
103
104
  override def defaults = Map(
    "gsnap" -> Map("batch" -> 4),
    "star" -> Map("outSAMunmapped" -> "Within")
  )
105
106
107
108

  override def fixedValues = Map(
    "gsnap" -> Map("format" -> "sam"),
    "bowtie" -> Map("sam" -> true)
Peter van 't Hof's avatar
Peter van 't Hof committed
109
  )
110

Peter van 't Hof's avatar
Peter van 't Hof committed
111
  /** File to add to the summary */
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
112
  def summaryFiles: Map[String, File] = Map("output_bam" -> finalBamFile, "input_R1" -> inputR1,
113
    "reference" -> referenceFasta()) ++
114
115
116
117
118
119
120
121
    (if (inputR2.isDefined) Map("input_R2" -> inputR2.get) else Map()) ++
    (bam2wig match {
      case Some(b) => Map(
        "output_wigle" -> b.outputWigleFile,
        "output_tdf" -> b.outputTdfFile,
        "output_bigwig" -> b.outputBwFile)
      case _ => Map()
    })
122

Peter van 't Hof's avatar
Peter van 't Hof committed
123
  /** Settings to add to summary */
124
125
126
127
128
129
  def summarySettings = Map(
    "skip_metrics" -> skipMetrics,
    "skip_flexiprep" -> skipFlexiprep,
    "skip_markduplicates" -> skipMarkduplicates,
    "aligner" -> aligner,
    "chunking" -> chunking,
130
    "numberChunks" -> (if (chunking) numberChunks.getOrElse(1) else None)
131
  ) ++ (if (root == null) Map("reference" -> referenceSummary) else Map())
132

133
134
135
136
137
138
139
140
141
142
  override def reportClass = {
    val mappingReport = new MappingReport(this)
    mappingReport.outputDir = new File(outputDir, "report")
    mappingReport.summaryFile = summaryFile
    mappingReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(mappingReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
143
  /** Will be executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
144
  def init() {
145
    require(outputDir != null, "Missing output directory on mapping module")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
146
    require(inputR1 != null, "Missing output directory on mapping module")
147
148
    require(sampleId.isDefined, "Missing sample ID on mapping module")
    require(libId.isDefined, "Missing library ID on mapping module")
149

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
150
151
    inputFiles :+= new InputFile(inputR1)
    inputR2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
152

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
153
    paired = inputR2.isDefined
154

Peter van 't Hof's avatar
Peter van 't Hof committed
155
156
    if (readgroupId == null)
      readgroupId = config("readgroup_id", default = sampleId.get + "-" + libId.get)
bow's avatar
bow committed
157

Peter van 't Hof's avatar
Peter van 't Hof committed
158
    if (outputName == null) outputName = readgroupId
bow's avatar
bow committed
159

Peter van 't Hof's avatar
Peter van 't Hof committed
160
    if (chunking) {
161
      if (numberChunks.isEmpty) {
162
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
163
        else {
164
          val chunkSize: Int = config("chunksize", 1 << 30)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
165
166
          val filesize = if (inputR1.getName.endsWith(".gz") || inputR1.getName.endsWith(".gzip")) inputR1.length * 3
          else inputR1.length
167
168
169
170
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
171
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
172
  }
bow's avatar
bow committed
173

Peter van 't Hof's avatar
Peter van 't Hof committed
174
  /** Adds all jobs of the pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
175
  def biopetScript() {
176
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
177
      flexiprep.outputDir = new File(outputDir, "flexiprep")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
178
      flexiprep.inputR1 = inputR1
179
      flexiprep.inputR2 = inputR2
Peter van 't Hof's avatar
Peter van 't Hof committed
180
      flexiprep.sampleId = this.sampleId
181
      flexiprep.libId = this.libId
Peter van 't Hof's avatar
Peter van 't Hof committed
182
183
      flexiprep.init()
      flexiprep.runInitialJobs()
184
    }
bow's avatar
bow committed
185
    var bamFiles: List[File] = Nil
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
186
187
    var fastqR1Output: List[File] = Nil
    var fastqR2Output: List[File] = Nil
bow's avatar
bow committed
188

Peter van 't Hof's avatar
Peter van 't Hof committed
189
    val chunks: Map[File, (File, Option[File])] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
      if (chunking) (for (t <- 1 to numberChunks.getOrElse(1)) yield {
        val chunkDir = new File(outputDir, "chunks" + File.separator + t)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
192
193
        chunkDir -> (new File(chunkDir, inputR1.getName),
          if (paired) Some(new File(chunkDir, inputR2.get.getName)) else None)
Peter van 't Hof's avatar
Peter van 't Hof committed
194
      }).toMap
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
195
      else if (skipFlexiprep) Map(outputDir -> (inputR1, if (paired) inputR2 else None))
196
      else Map(outputDir -> (flexiprep.inputR1, flexiprep.inputR2))
197
    }
bow's avatar
bow committed
198

Peter van 't Hof's avatar
Peter van 't Hof committed
199
    if (chunking) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
200
201
202
203
204
      val fastSplitterR1 = new FastqSplitter(this)
      fastSplitterR1.input = inputR1
      for ((chunkDir, fastqfile) <- chunks) fastSplitterR1.output :+= fastqfile._1
      fastSplitterR1.isIntermediate = true
      add(fastSplitterR1)
bow's avatar
bow committed
205

Peter van 't Hof's avatar
Peter van 't Hof committed
206
      if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
207
208
209
210
211
        val fastSplitterR2 = new FastqSplitter(this)
        fastSplitterR2.input = inputR2.get
        for ((chunkDir, fastqfile) <- chunks) fastSplitterR2.output :+= fastqfile._2.get
        fastSplitterR2.isIntermediate = true
        add(fastSplitterR2)
Peter van 't Hof's avatar
Peter van 't Hof committed
212
213
      }
    }
bow's avatar
bow committed
214

Peter van 't Hof's avatar
Peter van 't Hof committed
215
    for ((chunkDir, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
216
217
218
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
219
        val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
220
        logger.debug(chunkDir + " - " + flexiout)
Peter van 't Hof's avatar
Peter van 't Hof committed
221
222
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
223
224
        fastqR1Output :+= R1
        R2.foreach(R2 => fastqR2Output :+= R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
225
      }
226

Peter van 't Hof's avatar
Peter van 't Hof committed
227
      val outputBam = new File(chunkDir, outputName + ".bam")
228
229
      bamFiles :+= outputBam
      aligner match {
Peter van 't Hof's avatar
Peter van 't Hof committed
230
231
232
        case "bwa-mem"    => addBwaMem(R1, R2, outputBam)
        case "bwa-aln"    => addBwaAln(R1, R2, outputBam)
        case "bowtie"     => addBowtie(R1, R2, outputBam)
Wai Yi Leung's avatar
Wai Yi Leung committed
233
        case "bowtie2"    => addBowtie2(R1, R2, outputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
234
        case "gsnap"      => addGsnap(R1, R2, outputBam)
bow's avatar
bow committed
235
        // TODO: make TopHat here accept multiple input files
Peter van 't Hof's avatar
Peter van 't Hof committed
236
237
238
239
        case "tophat"     => addTophat(R1, R2, outputBam)
        case "stampy"     => addStampy(R1, R2, outputBam)
        case "star"       => addStar(R1, R2, outputBam)
        case "star-2pass" => addStar2pass(R1, R2, outputBam)
240
        case _            => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
241
      }
242
      if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
243
        addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics"), sampleId, libId).functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
244
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
245
    if (!skipFlexiprep) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
246
      flexiprep.runFinalize(fastqR1Output, fastqR2Output)
Peter van 't Hof's avatar
Peter van 't Hof committed
247
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
248
      addSummaryQScript(flexiprep)
Peter van 't Hof's avatar
Peter van 't Hof committed
249
    }
bow's avatar
bow committed
250

251
252
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
253
      bamFile = new File(outputDir, outputName + ".dedup.bam")
254
255
256
      val md = MarkDuplicates(this, bamFiles, bamFile)
      add(md)
      addSummarizable(md, "mark_duplicates")
257
    } else if (skipMarkduplicates && chunking) {
Peter van 't Hof's avatar
Peter van 't Hof committed
258
      val mergeSamFile = MergeSamFiles(this, bamFiles, new File(outputDir, outputName + ".merge.bam"))
259
260
261
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
262

Peter van 't Hof's avatar
Peter van 't Hof committed
263
    if (!skipMetrics) {
Peter van 't Hof's avatar
Peter van 't Hof committed
264
      val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"), sampleId, libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
265
266
267
      addAll(bamMetrics.functions)
      addSummaryQScript(bamMetrics)
    }
268

Peter van 't Hof's avatar
Peter van 't Hof committed
269
270
    add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai")))
    add(Ln(this, bamFile, finalBamFile))
271
    outputFiles += ("finalBamFile" -> finalBamFile.getAbsoluteFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
272

273
    if (config("unmapped_to_gears", default = false).asBoolean) {
274
      val gears = new GearsSingle(this)
275
      gears.bamFile = Some(finalBamFile)
276
277
      gears.sampleId = sampleId
      gears.libId = libId
278
      gears.outputDir = new File(outputDir, "gears")
279
      add(gears)
280
281
    }

282
    bam2wig.foreach(add(_))
283

Peter van 't Hof's avatar
Peter van 't Hof committed
284
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
285
  }
bow's avatar
bow committed
286

287
288
289
290
  protected lazy val bam2wig = if (config("generate_wig", default = false)) {
    Some(Bam2Wig(this, finalBamFile))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
291
  /** Add bwa aln jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
292
  def addBwaAln(R1: File, R2: Option[File], output: File): File = {
293
294
295
    val bwaAlnR1 = new BwaAln(this)
    bwaAlnR1.fastq = R1
    bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
296
    bwaAlnR1.isIntermediate = true
297
298
299
    add(bwaAlnR1)

    val samFile: File = if (paired) {
300
      val bwaAlnR2 = new BwaAln(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
301
      bwaAlnR2.fastq = R2.get
302
      bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
303
      bwaAlnR2.isIntermediate = true
304
305
306
307
      add(bwaAlnR2)

      val bwaSampe = new BwaSampe(this)
      bwaSampe.fastqR1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
308
      bwaSampe.fastqR2 = R2.get
309
310
      bwaSampe.saiR1 = bwaAlnR1.output
      bwaSampe.saiR2 = bwaAlnR2.output
311
      bwaSampe.r = getReadGroupBwa
312
      bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
313
      bwaSampe.isIntermediate = true
314
315
316
317
318
319
320
      add(bwaSampe)

      bwaSampe.output
    } else {
      val bwaSamse = new BwaSamse(this)
      bwaSamse.fastq = R1
      bwaSamse.sai = bwaAlnR1.output
321
      bwaSamse.r = getReadGroupBwa
322
      bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
323
      bwaSamse.isIntermediate = true
324
325
326
327
328
329
330
331
      add(bwaSamse)

      bwaSamse.output
    }

    val sortSam = SortSam(this, samFile, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
Peter van 't Hof's avatar
Peter van 't Hof committed
332
    sortSam.output
333
334
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
335
  /** Adds bwa mem jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
336
  def addBwaMem(R1: File, R2: Option[File], output: File): File = {
337
    val bwaCommand = new BwaMem(this)
338
    bwaCommand.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
339
    if (paired) bwaCommand.R2 = R2.get
340
    bwaCommand.R = Some(getReadGroupBwa)
341
342
    val sortSam = new SortSam(this)
    sortSam.output = output
343
344
345
346
    val pipe = bwaCommand | sortSam
    pipe.isIntermediate = chunking || !skipMarkduplicates
    pipe.threadsCorrection = -1
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
347
    output
348
  }
349

Peter van 't Hof's avatar
Peter van 't Hof committed
350
  def addGsnap(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
351
    val gsnapCommand = new Gsnap(this)
352
    gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
353
    gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam")
bow's avatar
bow committed
354

355
    val reorderSam = new ReorderSam(this)
356
    reorderSam.input = gsnapCommand.output
Peter van 't Hof's avatar
Peter van 't Hof committed
357
    reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam")
358

Peter van 't Hof's avatar
Peter van 't Hof committed
359
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
360
361
    val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil)
    pipe.threadsCorrection = -2
Peter van 't Hof's avatar
Peter van 't Hof committed
362
363
    add(pipe)
    ar._2
bow's avatar
bow committed
364
365
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
366
  def addTophat(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
367
368
369
    // TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
    val tophat = new Tophat(this)
    tophat.R1 = tophat.R1 :+ R1
Peter van 't Hof's avatar
Peter van 't Hof committed
370
    if (paired) tophat.R2 = tophat.R2 :+ R2.get
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
371
    tophat.outputDir = new File(outputDir, "tophat_out")
bow's avatar
bow committed
372
    // always output BAM
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
373
    tophat.noConvertBam = false
374
    // and always keep input ordering
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
375
    tophat.keepFastaOrder = true
bow's avatar
bow committed
376
377
    add(tophat)

378
    // fix unmapped file coordinates
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
379
    val fixedUnmapped = new File(tophat.outputDir, "unmapped_fixup.sam")
380
381
    val fixer = new TophatRecondition(this)
    fixer.inputBam = tophat.outputAcceptedHits
bow's avatar
bow committed
382
    fixer.outputSam = fixedUnmapped.getAbsoluteFile
383
384
385
386
    fixer.isIntermediate = true
    add(fixer)

    // sort fixed SAM file
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
387
    val sorter = SortSam(this, fixer.outputSam, new File(tophat.outputDir, "unmapped_fixup.sorted.bam"))
388
389
390
    sorter.sortOrder = "coordinate"
    sorter.isIntermediate = true
    add(sorter)
bow's avatar
bow committed
391

392
393
    // merge with mapped file
    val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output),
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
394
      new File(tophat.outputDir, "fixed_merged.bam"), sortOrder = "coordinate")
395
396
397
398
399
400
401
402
403
404
    mergeSamFile.createIndex = true
    mergeSamFile.isIntermediate = true
    add(mergeSamFile)

    // make sure header coordinates are correct
    val reorderSam = new ReorderSam(this)
    reorderSam.input = mergeSamFile.output
    reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
    add(reorderSam)

Peter van 't Hof's avatar
Peter van 't Hof committed
405
406
407
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
    add(ar._1)
    ar._2
bow's avatar
bow committed
408
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
409

Peter van 't Hof's avatar
Peter van 't Hof committed
410
  /** Adds stampy jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
411
  def addStampy(R1: File, R2: Option[File], output: File): File = {
412

Peter van 't Hof's avatar
Peter van 't Hof committed
413
    var RG: String = "ID:" + readgroupId + ","
414
415
    RG += "SM:" + sampleId.get + ","
    RG += "LB:" + libId.get + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
416
417
418
419
420
421
    if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
    RG += "PU:" + platformUnit + ","
    if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
    if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
    RG += "PL:" + platform
422
423
424

    val stampyCmd = new Stampy(this)
    stampyCmd.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
425
    if (paired) stampyCmd.R2 = R2.get
426
427
    stampyCmd.readgroup = RG
    stampyCmd.sanger = true
428
    stampyCmd.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
429
430
    stampyCmd.isIntermediate = true
    add(stampyCmd)
431
432
433
    val sortSam = SortSam(this, stampyCmd.output, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
434
    sortSam.output
435
  }
436

Peter van 't Hof's avatar
Peter van 't Hof committed
437
  /** Adds bowtie jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
438
  def addBowtie(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
439
440
441
442
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))
443
    val bowtie = new Bowtie(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
444
445
446
    bowtie.R1 = zcatR1._2
    if (paired) bowtie.R2 = Some(zcatR2.get._2)
    bowtie.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
447
    bowtie.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
448
449
    val ar = addAddOrReplaceReadGroups(bowtie.output, output)
    val pipe = new BiopetFifoPipe(this, (Some(bowtie) :: Some(ar._1) :: Nil).flatten)
450
    pipe.threadsCorrection = -1
Peter van 't Hof's avatar
Peter van 't Hof committed
451
452
    add(pipe)
    ar._2
453
  }
454

455
456
457
  /** Add bowtie2 jobs **/
  def addBowtie2(R1: File, R2: Option[File], output: File): File = {
    val bowtie2 = new Bowtie2(this)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
458
    bowtie2.rgId = Some(readgroupId)
Peter van 't Hof's avatar
Peter van 't Hof committed
459
    bowtie2.rg +:= ("LB:" + libId.get)
Wai Yi Leung's avatar
Wai Yi Leung committed
460
461
    bowtie2.rg +:= ("PL:" + platform)
    bowtie2.rg +:= ("PU:" + platformUnit)
Peter van 't Hof's avatar
Peter van 't Hof committed
462
    bowtie2.rg +:= ("SM:" + sampleId.get)
463
    bowtie2.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
464
    bowtie2.R2 = R2
465
466
467
468
469
470
471
472
473
    val sortSam = new SortSam(this)
    sortSam.output = output
    val pipe = bowtie2 | sortSam
    pipe.isIntermediate = chunking || !skipMarkduplicates
    pipe.threadsCorrection = -1
    add(pipe)
    output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
474
  /** Adds Star jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
475
  def addStar(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
476
477
478
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    val starCommand = Star(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
479
    val ar = addAddOrReplaceReadGroups(starCommand.outputSam, swapExt(outputDir, output, ".bam", ".addAddOrReplaceReadGroups.bam"))
Peter van 't Hof's avatar
Peter van 't Hof committed
480
481
482

    val reorderSam = new ReorderSam(this)
    reorderSam.input = ar._2
Peter van 't Hof's avatar
Peter van 't Hof committed
483
    reorderSam.output = output
Peter van 't Hof's avatar
Peter van 't Hof committed
484

Peter van 't Hof's avatar
Peter van 't Hof committed
485
    val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: zcatR2.flatMap(_._1) ::
Peter van 't Hof's avatar
Peter van 't Hof committed
486
487
      Some(starCommand) :: Some(ar._1) :: Some(reorderSam) :: Nil).flatten)
    pipe.threadsCorrection = -3
488
489
    zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
    zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
Peter van 't Hof's avatar
Peter van 't Hof committed
490
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
491
    reorderSam.output
492
  }
493

Peter van 't Hof's avatar
Peter van 't Hof committed
494
  /** Adds Start 2 pass jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
495
  def addStar2pass(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
496
497
498
499
500
501
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))

    val starCommand = Star._2pass(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
502
    addAll(starCommand._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
503
504
505
    val ar = addAddOrReplaceReadGroups(starCommand._1, output)
    add(ar._1)
    ar._2
506
  }
507

Peter van 't Hof's avatar
Peter van 't Hof committed
508
  /** Adds AddOrReplaceReadGroups */
Peter van 't Hof's avatar
Peter van 't Hof committed
509
  def addAddOrReplaceReadGroups(input: File, output: File): (AddOrReplaceReadGroups, File) = {
510
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
511
512
    addOrReplaceReadGroups.createIndex = true

Peter van 't Hof's avatar
Peter van 't Hof committed
513
    addOrReplaceReadGroups.RGID = readgroupId
514
    addOrReplaceReadGroups.RGLB = libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
515
516
    addOrReplaceReadGroups.RGPL = platform
    addOrReplaceReadGroups.RGPU = platformUnit
517
    addOrReplaceReadGroups.RGSM = sampleId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
518
519
    if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
    if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
Peter van 't Hof's avatar
Peter van 't Hof committed
520
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
bow's avatar
bow committed
521

Peter van 't Hof's avatar
Peter van 't Hof committed
522
    (addOrReplaceReadGroups, addOrReplaceReadGroups.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
523
  }
bow's avatar
bow committed
524

Peter van 't Hof's avatar
Peter van 't Hof committed
525
  /** Returns readgroup for bwa */
Peter van 't Hof's avatar
Peter van 't Hof committed
526
  def getReadGroupBwa: String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
527
    var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
528
    RG += "LB:" + libId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
529
530
    RG += "PL:" + platform + "\\t"
    RG += "PU:" + platformUnit + "\\t"
531
    RG += "SM:" + sampleId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
532
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
533
534
535
    if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
    if (readgroupDate != null) RG += "DT:" + readgroupDate + "\\t"
    if (predictedInsertsize.isDefined) RG += "PI:" + predictedInsertsize.get + "\\t"
bow's avatar
bow committed
536

537
    RG.substring(0, RG.lastIndexOf("\\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
538
  }
539
540
541
542

  //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
  /**
   * Extracts file if file is compressed
Peter van 't Hof's avatar
Peter van 't Hof committed
543
544
   * @param file input file
   * @param runDir directory to extract when needed
545
546
   * @return returns extracted file
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
547
548
549
  def extractIfNeeded(file: File, runDir: File): (Option[BiopetCommandLineFunction], File) = {
    require(file != null)
    if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
550
      var newFile: File = swapExt(runDir, file, ".gz", "")
Peter van 't Hof's avatar
Peter van 't Hof committed
551
      if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
552
      val zcatCommand = Zcat(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
553
      (Some(zcatCommand), newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
554
    } else if (file.getName.endsWith(".bz2")) {
555
556
      val newFile = swapExt(runDir, file, ".bz2", "")
      val pbzip2 = Pbzip2(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
557
558
      (Some(pbzip2), newFile)
    } else (None, file)
559
560
  }

561
562
}

563
object Mapping extends PipelineCommand