Mapping.scala 23 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
package nl.lumc.sasc.biopet.pipelines.mapping

17
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.util.Date
19

20
import nl.lumc.sasc.biopet.core._
21
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
22
23
import nl.lumc.sasc.biopet.extensions.bowtie.{ Bowtie, Bowtie2 }
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.gmap.Gsnap
bow's avatar
bow committed
25
import nl.lumc.sasc.biopet.extensions.hisat.Hisat2
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam }
27
import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter
28
import nl.lumc.sasc.biopet.extensions._
29
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
Peter van 't Hof's avatar
Peter van 't Hof committed
31
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
32
import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
Peter van 't Hof's avatar
Peter van 't Hof committed
33
import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition
Peter van 't Hof's avatar
Peter van 't Hof committed
34
import nl.lumc.sasc.biopet.utils.textToSize
35
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
38
import org.broadinstitute.gatk.queue.QScript

import scala.math._
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
/**
41
42
 * This pipeline doing a alignment to a given reference genome
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
43
class Mapping(val parent: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
44

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  def this() = this(null)
bow's avatar
bow committed
46

Peter van 't Hof's avatar
Peter van 't Hof committed
47
  @Input(doc = "R1 fastq file", shortName = "R1", fullName = "inputR1", required = true)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
48
  var inputR1: File = _
bow's avatar
bow committed
49

Peter van 't Hof's avatar
Peter van 't Hof committed
50
  @Input(doc = "R2 fastq file", shortName = "R2", fullName = "inputR2", required = false)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
51
  var inputR2: Option[File] = None
bow's avatar
bow committed
52

Peter van 't Hof's avatar
Peter van 't Hof committed
53
  /** Output name */
Peter van 't Hof's avatar
Peter van 't Hof committed
54
  var outputName: String = _
bow's avatar
bow committed
55

Peter van 't Hof's avatar
Peter van 't Hof committed
56
57
  /** Skip flexiprep */
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
bow's avatar
bow committed
58

Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
  /** Skip mark duplicates */
  protected var skipMarkduplicates: Boolean = config("skip_markduplicates", default = false)
bow's avatar
bow committed
61

Peter van 't Hof's avatar
Peter van 't Hof committed
62
63
  /** Skip metrics */
  protected var skipMetrics: Boolean = config("skip_metrics", default = false)
64

Peter van 't Hof's avatar
Peter van 't Hof committed
65
  /** Aligner */
Peter van 't Hof's avatar
Peter van 't Hof committed
66
  protected var aligner: String = config("aligner", default = "bwa-mem")
bow's avatar
bow committed
67

Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
  /** Number of chunks, when not defined pipeline will automatic calculate number of chunks */
  protected var numberChunks: Option[Int] = config("number_chunks")
70

Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
  /** Enable chunking */
  protected var chunking: Boolean = config("chunking", numberChunks.getOrElse(1) > 1)
bow's avatar
bow committed
73

74
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
  /** Readgroup ID */
  protected var readgroupId: String = _
bow's avatar
bow committed
77

78
79
  // TODO: hide sampleId and libId from the command line so they do not interfere with our config values

Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
  /** Readgroup Platform */
  protected var platform: String = config("platform", default = "illumina")
bow's avatar
bow committed
82

Peter van 't Hof's avatar
Peter van 't Hof committed
83
  /** Readgroup platform unit */
Peter van 't Hof's avatar
Peter van 't Hof committed
84
  protected var platformUnit: Option[String] = config("platform_unit")
bow's avatar
bow committed
85

Peter van 't Hof's avatar
Peter van 't Hof committed
86
87
  /** Readgroup sequencing center */
  protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
bow's avatar
bow committed
88

Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
  /** Readgroup description */
  protected var readgroupDescription: Option[String] = config("readgroup_description")
bow's avatar
bow committed
91

Peter van 't Hof's avatar
Peter van 't Hof committed
92
93
  /** Readgroup sequencing date */
  protected var readgroupDate: Date = _
bow's avatar
bow committed
94

Peter van 't Hof's avatar
Peter van 't Hof committed
95
96
  /** Readgroup predicted insert size */
  protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
bow's avatar
bow committed
97

98
  val keepFinalBamFile: Boolean = config("keep_final_bam_file", default = true)
99

Peter van 't Hof's avatar
Peter van 't Hof committed
100
  protected var paired: Boolean = false
101
  val flexiprep = new Flexiprep(this)
102
103
104
  def mergedBamFile = new File(outputDir, outputName + ".bam")
  def finalBamFile: File = if (skipMarkduplicates) mergedBamFile
  else new File(outputDir, outputName + ".dedup.bam")
105

Peter van 't Hof's avatar
Peter van 't Hof committed
106
  override def defaults: Map[String, Any] = Map(
107
    "gsnap" -> Map("batch" -> 4),
Wai Yi Leung's avatar
Wai Yi Leung committed
108
    "star" -> Map("outsamunmapped" -> "Within")
109
  )
110

Peter van 't Hof's avatar
Peter van 't Hof committed
111
  override def fixedValues: Map[String, Any] = Map(
112
113
    "gsnap" -> Map("format" -> "sam"),
    "bowtie" -> Map("sam" -> true)
Peter van 't Hof's avatar
Peter van 't Hof committed
114
  )
115

Peter van 't Hof's avatar
Peter van 't Hof committed
116
  /** File to add to the summary */
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
117
  def summaryFiles: Map[String, File] = Map("output_bam" -> finalBamFile, "input_R1" -> inputR1,
118
    "reference" -> referenceFasta()) ++
119
120
121
122
123
124
125
126
    (if (inputR2.isDefined) Map("input_R2" -> inputR2.get) else Map()) ++
    (bam2wig match {
      case Some(b) => Map(
        "output_wigle" -> b.outputWigleFile,
        "output_tdf" -> b.outputTdfFile,
        "output_bigwig" -> b.outputBwFile)
      case _ => Map()
    })
127

Peter van 't Hof's avatar
Peter van 't Hof committed
128
  /** Settings to add to summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
129
  def summarySettings: Map[String, Any] = Map(
130
131
132
    "skip_metrics" -> skipMetrics,
    "skip_flexiprep" -> skipFlexiprep,
    "skip_markduplicates" -> skipMarkduplicates,
133
    "paired" -> inputR2.isDefined,
134
135
    "aligner" -> aligner,
    "chunking" -> chunking,
Sander Bollen's avatar
Sander Bollen committed
136
    "number_of_chunks" -> (if (chunking) numberChunks.getOrElse(1) else None)
Peter van 't Hof's avatar
Peter van 't Hof committed
137
  ) ++ (if (parent == null) Map("reference" -> referenceSummary) else Map())
138

Peter van 't Hof's avatar
Peter van 't Hof committed
139
  override def reportClass: Some[MappingReport] = {
140
141
    val mappingReport = new MappingReport(this)
    mappingReport.outputDir = new File(outputDir, "report")
142
    mappingReport.summaryDbFile = summaryDbFile
143
144
145
146
147
148
    mappingReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(mappingReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
149
  /** Will be executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
150
  def init() {
151
    require(outputDir != null, "Missing output directory on mapping module")
152
    require(inputR1 != null, "Missing inputR1 on mapping module")
153
154
    require(sampleId.isDefined, "Missing sample ID on mapping module")
    require(libId.isDefined, "Missing library ID on mapping module")
155
156
    if (inputR1.exists() && inputR1.length() == 0) logger.warn(s"Input R1 is a empty file: $inputR1")
    inputR2.foreach(r => if (r.exists() && r.length() == 0) logger.warn(s"Input R2 is a empty file: $r"))
157

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
158
159
    inputFiles :+= new InputFile(inputR1)
    inputR2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
160

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
161
    paired = inputR2.isDefined
162

Peter van 't Hof's avatar
Peter van 't Hof committed
163
164
    if (readgroupId == null)
      readgroupId = config("readgroup_id", default = sampleId.get + "-" + libId.get)
bow's avatar
bow committed
165

Peter van 't Hof's avatar
Peter van 't Hof committed
166
    if (outputName == null) outputName = readgroupId
bow's avatar
bow committed
167

Peter van 't Hof's avatar
Peter van 't Hof committed
168
    if (chunking) {
169
      if (numberChunks.isEmpty) {
170
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
171
        else {
Peter van 't Hof's avatar
Peter van 't Hof committed
172
          val chunkSize: String = config("chunksize", default = "5G")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
173
174
          val filesize = if (inputR1.getName.endsWith(".gz") || inputR1.getName.endsWith(".gzip")) inputR1.length * 3
          else inputR1.length
Peter van 't Hof's avatar
Peter van 't Hof committed
175
176
          numberChunks = Some(ceil(filesize.toDouble / textToSize(chunkSize)).toInt)
          if (numberChunks == Some(0)) numberChunks = Some(1)
177
178
179
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
180
      if (numberChunks.getOrElse(1) <= 1) chunking = false
Peter van 't Hof's avatar
Peter van 't Hof committed
181
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
182
  }
bow's avatar
bow committed
183

Peter van 't Hof's avatar
Peter van 't Hof committed
184
  /** Adds all jobs of the pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
185
  def biopetScript() {
186
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
187
      flexiprep.outputDir = new File(outputDir, "flexiprep")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
188
      flexiprep.inputR1 = inputR1
189
      flexiprep.inputR2 = inputR2
Peter van 't Hof's avatar
Peter van 't Hof committed
190
      flexiprep.sampleId = this.sampleId
191
      flexiprep.libId = this.libId
Peter van 't Hof's avatar
Peter van 't Hof committed
192
193
      flexiprep.init()
      flexiprep.runInitialJobs()
194
    }
bow's avatar
bow committed
195
    var bamFiles: List[File] = Nil
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
196
197
    var fastqR1Output: List[File] = Nil
    var fastqR2Output: List[File] = Nil
bow's avatar
bow committed
198

Peter van 't Hof's avatar
Peter van 't Hof committed
199
    val chunks: Map[File, (File, Option[File])] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
      if (chunking) (for (t <- 1 to numberChunks.getOrElse(1)) yield {
        val chunkDir = new File(outputDir, "chunks" + File.separator + t)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
202
203
        chunkDir -> (new File(chunkDir, inputR1.getName),
          if (paired) Some(new File(chunkDir, inputR2.get.getName)) else None)
Peter van 't Hof's avatar
Peter van 't Hof committed
204
      }).toMap
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
205
      else if (skipFlexiprep) Map(outputDir -> (inputR1, if (paired) inputR2 else None))
206
      else Map(outputDir -> (flexiprep.inputR1, flexiprep.inputR2))
207
    }
bow's avatar
bow committed
208

Peter van 't Hof's avatar
Peter van 't Hof committed
209
    if (chunking) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
210
211
212
213
214
      val fastSplitterR1 = new FastqSplitter(this)
      fastSplitterR1.input = inputR1
      for ((chunkDir, fastqfile) <- chunks) fastSplitterR1.output :+= fastqfile._1
      fastSplitterR1.isIntermediate = true
      add(fastSplitterR1)
bow's avatar
bow committed
215

Peter van 't Hof's avatar
Peter van 't Hof committed
216
      if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
217
218
219
220
221
        val fastSplitterR2 = new FastqSplitter(this)
        fastSplitterR2.input = inputR2.get
        for ((chunkDir, fastqfile) <- chunks) fastSplitterR2.output :+= fastqfile._2.get
        fastSplitterR2.isIntermediate = true
        add(fastSplitterR2)
Peter van 't Hof's avatar
Peter van 't Hof committed
222
223
      }
    }
bow's avatar
bow committed
224

Peter van 't Hof's avatar
Peter van 't Hof committed
225
    for ((chunkDir, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
226
227
228
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
229
        val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
230
        logger.debug(chunkDir + " - " + flexiout)
Peter van 't Hof's avatar
Peter van 't Hof committed
231
232
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
233
234
        fastqR1Output :+= R1
        R2.foreach(R2 => fastqR2Output :+= R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
235
      }
236

Peter van 't Hof's avatar
Peter van 't Hof committed
237
      val outputBam = new File(chunkDir, outputName + ".bam")
238
239
      bamFiles :+= outputBam
      aligner match {
Peter van 't Hof's avatar
Peter van 't Hof committed
240
241
242
        case "bwa-mem"    => addBwaMem(R1, R2, outputBam)
        case "bwa-aln"    => addBwaAln(R1, R2, outputBam)
        case "bowtie"     => addBowtie(R1, R2, outputBam)
Wai Yi Leung's avatar
Wai Yi Leung committed
243
        case "bowtie2"    => addBowtie2(R1, R2, outputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
244
        case "gsnap"      => addGsnap(R1, R2, outputBam)
bow's avatar
bow committed
245
        case "hisat2"     => addHisat2(R1, R2, outputBam)
bow's avatar
bow committed
246
        // TODO: make TopHat here accept multiple input files
Peter van 't Hof's avatar
Peter van 't Hof committed
247
248
249
250
        case "tophat"     => addTophat(R1, R2, outputBam)
        case "stampy"     => addStampy(R1, R2, outputBam)
        case "star"       => addStar(R1, R2, outputBam)
        case "star-2pass" => addStar2pass(R1, R2, outputBam)
251
        case _            => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
252
      }
253
      if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
254
        addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics"), sampleId, libId).functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
255
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
256
    if (!skipFlexiprep) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
257
      flexiprep.runFinalize(fastqR1Output, fastqR2Output)
Peter van 't Hof's avatar
Peter van 't Hof committed
258
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
259
      addSummaryQScript(flexiprep)
Peter van 't Hof's avatar
Peter van 't Hof committed
260
    }
bow's avatar
bow committed
261

262
    var bamFile = bamFiles.head
263
264
265
266
267
268
269
270
271

    if (!chunking) require(bamFile == mergedBamFile)
    else {
      val mergeSamFile = MergeSamFiles(this, bamFiles, mergedBamFile)
      mergeSamFile.isIntermediate = !keepFinalBamFile || !skipMarkduplicates
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }

272
    if (!skipMarkduplicates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
273
      bamFile = new File(outputDir, outputName + ".dedup.bam")
274
      val md = MarkDuplicates(this, mergedBamFile :: Nil, finalBamFile)
275
      md.isIntermediate = !keepFinalBamFile
276
277
      add(md)
      addSummarizable(md, "mark_duplicates")
278
    }
279

Peter van 't Hof's avatar
Peter van 't Hof committed
280
    if (!skipMetrics) {
281
      val bamMetrics = BamMetrics(this, finalBamFile, new File(outputDir, "metrics"), sampleId, libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
282
283
284
      addAll(bamMetrics.functions)
      addSummaryQScript(bamMetrics)
    }
285

286
    outputFiles += ("finalBamFile" -> finalBamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
287

288
    if (config("unmapped_to_gears", default = false).asBoolean) {
289
      val gears = new GearsSingle(this)
290
      gears.bamFile = Some(finalBamFile)
291
292
      gears.sampleId = sampleId
      gears.libId = libId
293
      gears.outputDir = new File(outputDir, "gears")
294
      add(gears)
295
296
    }

297
    bam2wig.foreach(add(_))
298

Peter van 't Hof's avatar
Peter van 't Hof committed
299
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
300
  }
bow's avatar
bow committed
301

Peter van 't Hof's avatar
Peter van 't Hof committed
302
  protected lazy val bam2wig: Option[Bam2Wig] = if (config("generate_wig", default = false)) {
303
304
305
    Some(Bam2Wig(this, finalBamFile))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
306
  /** Add bwa aln jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
307
  def addBwaAln(R1: File, R2: Option[File], output: File): File = {
308
309
310
    val bwaAlnR1 = new BwaAln(this)
    bwaAlnR1.fastq = R1
    bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
311
    bwaAlnR1.isIntermediate = true
312
313
314
    add(bwaAlnR1)

    val samFile: File = if (paired) {
315
      val bwaAlnR2 = new BwaAln(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
316
      bwaAlnR2.fastq = R2.get
317
      bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
318
      bwaAlnR2.isIntermediate = true
319
320
321
322
      add(bwaAlnR2)

      val bwaSampe = new BwaSampe(this)
      bwaSampe.fastqR1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
323
      bwaSampe.fastqR2 = R2.get
324
325
      bwaSampe.saiR1 = bwaAlnR1.output
      bwaSampe.saiR2 = bwaAlnR2.output
326
      bwaSampe.r = getReadGroupBwa
327
      bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
328
      bwaSampe.isIntermediate = true
329
330
331
332
333
334
335
      add(bwaSampe)

      bwaSampe.output
    } else {
      val bwaSamse = new BwaSamse(this)
      bwaSamse.fastq = R1
      bwaSamse.sai = bwaAlnR1.output
336
      bwaSamse.r = getReadGroupBwa
337
      bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
338
      bwaSamse.isIntermediate = true
339
340
341
342
343
344
      add(bwaSamse)

      bwaSamse.output
    }

    val sortSam = SortSam(this, samFile, output)
345
    sortSam.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
346
    add(sortSam)
Peter van 't Hof's avatar
Peter van 't Hof committed
347
    sortSam.output
348
349
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
350
  /** Adds bwa mem jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
351
  def addBwaMem(R1: File, R2: Option[File], output: File): File = {
352
    val bwaCommand = new BwaMem(this)
353
    bwaCommand.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
354
    if (paired) bwaCommand.R2 = R2.get
355
    bwaCommand.R = Some(getReadGroupBwa)
356
357
    val sortSam = new SortSam(this)
    sortSam.output = output
358
    val pipe = bwaCommand | sortSam
359
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
360
361
    pipe.threadsCorrection = -1
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
362
    output
363
  }
364

Peter van 't Hof's avatar
Peter van 't Hof committed
365
  def addGsnap(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
366
    val gsnapCommand = new Gsnap(this)
367
    gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
368
    gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam")
bow's avatar
bow committed
369

370
    val reorderSam = new ReorderSam(this)
371
    reorderSam.input = gsnapCommand.output
Peter van 't Hof's avatar
Peter van 't Hof committed
372
    reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam")
373

Peter van 't Hof's avatar
Peter van 't Hof committed
374
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
375
376
    val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil)
    pipe.threadsCorrection = -2
377
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
378
379
    add(pipe)
    ar._2
bow's avatar
bow committed
380
381
  }

bow's avatar
bow committed
382
383
384
385
386
387
  def addHisat2(R1: File, R2: Option[File], output: File): File = {
    val hisat2 = new Hisat2(this)
    hisat2.R1 = R1
    hisat2.R2 = R2
    hisat2.rgId = Some(readgroupId)
    hisat2.rg +:= s"PL:$platform"
Peter van 't Hof's avatar
Peter van 't Hof committed
388
    platformUnit.foreach(x => hisat2.rg +:= s"PU:$x")
bow's avatar
bow committed
389
390
391
392
393
394
395
396
397
398
399
400
    libId match {
      case Some(id)  => hisat2.rg +:= s"LB:$id"
      case otherwise => ;
    }
    sampleId match {
      case Some(id)  => hisat2.rg +:= s"SM:$id"
      case otherwise => ;
    }

    val sortSam = new SortSam(this)
    sortSam.output = output
    val pipe = hisat2 | sortSam
401
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
bow's avatar
bow committed
402
403
404
405
406
407
    pipe.threadsCorrection = 1
    add(pipe)

    output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
408
  def addTophat(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
409
410
411
    // TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
    val tophat = new Tophat(this)
    tophat.R1 = tophat.R1 :+ R1
Peter van 't Hof's avatar
Peter van 't Hof committed
412
    if (paired) tophat.R2 = tophat.R2 :+ R2.get
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
413
    tophat.outputDir = new File(outputDir, "tophat_out")
bow's avatar
bow committed
414
    // always output BAM
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
415
    tophat.noConvertBam = false
416
    // and always keep input ordering
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
417
    tophat.keepFastaOrder = true
bow's avatar
bow committed
418
419
    add(tophat)

420
    // fix unmapped file coordinates
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
421
    val fixedUnmapped = new File(tophat.outputDir, "unmapped_fixup.sam")
422
423
    val fixer = new TophatRecondition(this)
    fixer.inputBam = tophat.outputAcceptedHits
bow's avatar
bow committed
424
    fixer.outputSam = fixedUnmapped.getAbsoluteFile
425
426
427
428
    fixer.isIntermediate = true
    add(fixer)

    // sort fixed SAM file
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
429
    val sorter = SortSam(this, fixer.outputSam, new File(tophat.outputDir, "unmapped_fixup.sorted.bam"))
430
431
432
    sorter.sortOrder = "coordinate"
    sorter.isIntermediate = true
    add(sorter)
bow's avatar
bow committed
433

434
435
    // merge with mapped file
    val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output),
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
436
      new File(tophat.outputDir, "fixed_merged.bam"), sortOrder = "coordinate")
437
438
439
440
441
442
443
444
    mergeSamFile.createIndex = true
    mergeSamFile.isIntermediate = true
    add(mergeSamFile)

    // make sure header coordinates are correct
    val reorderSam = new ReorderSam(this)
    reorderSam.input = mergeSamFile.output
    reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
445
    reorderSam.isIntermediate = true
446
447
    add(reorderSam)

Peter van 't Hof's avatar
Peter van 't Hof committed
448
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
449
    ar._1.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
450
451
    add(ar._1)
    ar._2
bow's avatar
bow committed
452
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
453

Peter van 't Hof's avatar
Peter van 't Hof committed
454
  /** Adds stampy jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
455
  def addStampy(R1: File, R2: Option[File], output: File): File = {
456

Peter van 't Hof's avatar
Peter van 't Hof committed
457
    var RG: String = "ID:" + readgroupId + ","
458
459
    RG += "SM:" + sampleId.get + ","
    RG += "LB:" + libId.get + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
460
    if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
461
    platformUnit.foreach(x => RG += "PU:" + x + ",")
Peter van 't Hof's avatar
Peter van 't Hof committed
462
463
464
465
    if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
    if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
    RG += "PL:" + platform
466
467
468

    val stampyCmd = new Stampy(this)
    stampyCmd.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
469
    if (paired) stampyCmd.R2 = R2.get
470
471
    stampyCmd.readgroup = RG
    stampyCmd.sanger = true
472
    stampyCmd.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
473
474
    stampyCmd.isIntermediate = true
    add(stampyCmd)
475
    val sortSam = SortSam(this, stampyCmd.output, output)
476
    sortSam.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
477
    add(sortSam)
478
    sortSam.output
479
  }
480

Peter van 't Hof's avatar
Peter van 't Hof committed
481
  /** Adds bowtie jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
482
  def addBowtie(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
483
484
485
486
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))
487
    val bowtie = new Bowtie(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
488
489
490
    bowtie.R1 = zcatR1._2
    if (paired) bowtie.R2 = Some(zcatR2.get._2)
    bowtie.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
491
    bowtie.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
492
493
    val ar = addAddOrReplaceReadGroups(bowtie.output, output)
    val pipe = new BiopetFifoPipe(this, (Some(bowtie) :: Some(ar._1) :: Nil).flatten)
494
    pipe.threadsCorrection = -1
495
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
496
497
    add(pipe)
    ar._2
498
  }
499

500
501
502
  /** Add bowtie2 jobs **/
  def addBowtie2(R1: File, R2: Option[File], output: File): File = {
    val bowtie2 = new Bowtie2(this)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
503
    bowtie2.rgId = Some(readgroupId)
Peter van 't Hof's avatar
Peter van 't Hof committed
504
    bowtie2.rg +:= ("LB:" + libId.get)
Wai Yi Leung's avatar
Wai Yi Leung committed
505
    bowtie2.rg +:= ("PL:" + platform)
Peter van 't Hof's avatar
Peter van 't Hof committed
506
    platformUnit.foreach(x => bowtie2.rg +:= ("PU:" + x))
Peter van 't Hof's avatar
Peter van 't Hof committed
507
    bowtie2.rg +:= ("SM:" + sampleId.get)
508
    bowtie2.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
509
    bowtie2.R2 = R2
510
511
512
    val sortSam = new SortSam(this)
    sortSam.output = output
    val pipe = bowtie2 | sortSam
513
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
514
515
516
517
518
    pipe.threadsCorrection = -1
    add(pipe)
    output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
519
  /** Adds Star jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
520
  def addStar(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
521
522
523
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    val starCommand = Star(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
524
    val ar = addAddOrReplaceReadGroups(starCommand.outputSam, swapExt(outputDir, output, ".bam", ".addAddOrReplaceReadGroups.bam"))
Peter van 't Hof's avatar
Peter van 't Hof committed
525
526
527

    val reorderSam = new ReorderSam(this)
    reorderSam.input = ar._2
Peter van 't Hof's avatar
Peter van 't Hof committed
528
    reorderSam.output = output
Peter van 't Hof's avatar
Peter van 't Hof committed
529

Peter van 't Hof's avatar
Peter van 't Hof committed
530
    val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: zcatR2.flatMap(_._1) ::
Peter van 't Hof's avatar
Peter van 't Hof committed
531
532
      Some(starCommand) :: Some(ar._1) :: Some(reorderSam) :: Nil).flatten)
    pipe.threadsCorrection = -3
533
534
    zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
    zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
535
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
536
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
537
    reorderSam.output
538
  }
539

Peter van 't Hof's avatar
Peter van 't Hof committed
540
  /** Adds Start 2 pass jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
541
  def addStar2pass(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
542
543
544
545
546
547
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))

    val starCommand = Star._2pass(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
548
    addAll(starCommand._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
549
    val ar = addAddOrReplaceReadGroups(starCommand._1, output)
550
    ar._1.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
551
552
    add(ar._1)
    ar._2
553
  }
554

Peter van 't Hof's avatar
Peter van 't Hof committed
555
  /** Adds AddOrReplaceReadGroups */
Peter van 't Hof's avatar
Peter van 't Hof committed
556
  def addAddOrReplaceReadGroups(input: File, output: File): (AddOrReplaceReadGroups, File) = {
557
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
558
559
    addOrReplaceReadGroups.createIndex = true

Peter van 't Hof's avatar
Peter van 't Hof committed
560
    addOrReplaceReadGroups.RGID = readgroupId
561
    addOrReplaceReadGroups.RGLB = libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
562
    addOrReplaceReadGroups.RGPL = platform
Peter van 't Hof's avatar
Peter van 't Hof committed
563
    addOrReplaceReadGroups.RGPU = platformUnit.getOrElse(readgroupId)
564
    addOrReplaceReadGroups.RGSM = sampleId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
565
566
    if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
    if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
567
    addOrReplaceReadGroups.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
bow's avatar
bow committed
568

Peter van 't Hof's avatar
Peter van 't Hof committed
569
    (addOrReplaceReadGroups, addOrReplaceReadGroups.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
570
  }
bow's avatar
bow committed
571

Peter van 't Hof's avatar
Peter van 't Hof committed
572
  /** Returns readgroup for bwa */
Peter van 't Hof's avatar
Peter van 't Hof committed
573
  def getReadGroupBwa: String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
574
    var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
575
    RG += "LB:" + libId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
576
    RG += "PL:" + platform + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
577
    platformUnit.foreach(x => RG += "PU:" + x + "\\t")
578
    RG += "SM:" + sampleId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
579
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
580
581
582
    if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
    if (readgroupDate != null) RG += "DT:" + readgroupDate + "\\t"
    if (predictedInsertsize.isDefined) RG += "PI:" + predictedInsertsize.get + "\\t"
bow's avatar
bow committed
583

584
    RG.substring(0, RG.lastIndexOf("\\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
585
  }
586
587
588
589

  //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
  /**
   * Extracts file if file is compressed
Peter van 't Hof's avatar
Peter van 't Hof committed
590
591
   * @param file input file
   * @param runDir directory to extract when needed
592
593
   * @return returns extracted file
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
594
595
596
  def extractIfNeeded(file: File, runDir: File): (Option[BiopetCommandLineFunction], File) = {
    require(file != null)
    if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
597
      var newFile: File = swapExt(runDir, file, ".gz", "")
Peter van 't Hof's avatar
Peter van 't Hof committed
598
      if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
599
      val zcatCommand = Zcat(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
600
      (Some(zcatCommand), newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
601
    } else if (file.getName.endsWith(".bz2")) {
602
603
      val newFile = swapExt(runDir, file, ".bz2", "")
      val pbzip2 = Pbzip2(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
604
605
      (Some(pbzip2), newFile)
    } else (None, file)
606
607
  }

608
609
}

610
object Mapping extends PipelineCommand