Mapping.scala 22.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
package nl.lumc.sasc.biopet.pipelines.mapping

17
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.util.Date
19

20
import nl.lumc.sasc.biopet.core._
21
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
22
23
import nl.lumc.sasc.biopet.extensions.bowtie.{Bowtie, Bowtie2}
import nl.lumc.sasc.biopet.extensions.bwa.{BwaAln, BwaMem, BwaSampe, BwaSamse}
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.gmap.Gsnap
bow's avatar
bow committed
25
import nl.lumc.sasc.biopet.extensions.hisat.Hisat2
26
import nl.lumc.sasc.biopet.extensions.picard.{AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam}
27
import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter
28
import nl.lumc.sasc.biopet.extensions._
29
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
Peter van 't Hof's avatar
Peter van 't Hof committed
31
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
32
import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
Peter van 't Hof's avatar
Peter van 't Hof committed
33
import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition
34
import nl.lumc.sasc.biopet.utils.{Logging, textToSize}
35
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
38
import org.broadinstitute.gatk.queue.QScript

import scala.math._
39

40
// TODO: documentation
41
class Mapping(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
42

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  def this() = this(null)
bow's avatar
bow committed
44

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  @Input(doc = "R1 fastq file", shortName = "R1", fullName = "inputR1", required = true)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
46
  var inputR1: File = _
bow's avatar
bow committed
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  @Input(doc = "R2 fastq file", shortName = "R2", fullName = "inputR2", required = false)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
49
  var inputR2: Option[File] = None
bow's avatar
bow committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  /** Output name */
Peter van 't Hof's avatar
Peter van 't Hof committed
52
  var outputName: String = _
bow's avatar
bow committed
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
  /** Skip flexiprep */
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
bow's avatar
bow committed
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
  /** Skip mark duplicates */
  protected var skipMarkduplicates: Boolean = config("skip_markduplicates", default = false)
bow's avatar
bow committed
59

Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
  /** Skip metrics */
  protected var skipMetrics: Boolean = config("skip_metrics", default = false)
62

Peter van 't Hof's avatar
Peter van 't Hof committed
63
  /** Aligner */
Peter van 't Hof's avatar
Peter van 't Hof committed
64
  protected var aligner: String = config("aligner", default = "bwa-mem")
bow's avatar
bow committed
65

Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
  /** Number of chunks, when not defined pipeline will automatic calculate number of chunks */
  protected var numberChunks: Option[Int] = config("number_chunks")
68

Peter van 't Hof's avatar
Peter van 't Hof committed
69
70
  /** Enable chunking */
  protected var chunking: Boolean = config("chunking", numberChunks.getOrElse(1) > 1)
bow's avatar
bow committed
71

72
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
73
74
  /** Readgroup ID */
  protected var readgroupId: String = _
bow's avatar
bow committed
75

76
77
  // TODO: hide sampleId and libId from the command line so they do not interfere with our config values

Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
  /** Readgroup Platform */
  protected var platform: String = config("platform", default = "illumina")
bow's avatar
bow committed
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
  /** Readgroup platform unit */
  protected var platformUnit: String = config("platform_unit", default = "na")
bow's avatar
bow committed
83

Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
  /** Readgroup sequencing center */
  protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
bow's avatar
bow committed
86

Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
  /** Readgroup description */
  protected var readgroupDescription: Option[String] = config("readgroup_description")
bow's avatar
bow committed
89

Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
  /** Readgroup sequencing date */
  protected var readgroupDate: Date = _
bow's avatar
bow committed
92

Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
  /** Readgroup predicted insert size */
  protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
bow's avatar
bow committed
95

96
  val keepFinalBamFile: Boolean = config("keep_final_bam_file", default = true)
97

Peter van 't Hof's avatar
Peter van 't Hof committed
98
  protected var paired: Boolean = false
99
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
100
  def finalBamFile: File = if (skipMarkduplicates) {
101
102
    new File(outputDir, outputName + ".bam")
  } else new File(outputDir, outputName + ".dedup.bam")
103

Peter van 't Hof's avatar
Peter van 't Hof committed
104
  /** location of summary file */
105
106
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
107
  override def defaults: Map[String, Any] = Map(
108
    "gsnap" -> Map("batch" -> 4),
Wai Yi Leung's avatar
Wai Yi Leung committed
109
    "star" -> Map("outsamunmapped" -> "Within")
110
  )
111

Peter van 't Hof's avatar
Peter van 't Hof committed
112
  override def fixedValues: Map[String, Any] = Map(
113
114
    "gsnap" -> Map("format" -> "sam"),
    "bowtie" -> Map("sam" -> true)
Peter van 't Hof's avatar
Peter van 't Hof committed
115
  )
116

Peter van 't Hof's avatar
Peter van 't Hof committed
117
  /** File to add to the summary */
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
118
  def summaryFiles: Map[String, File] = Map("output_bam" -> finalBamFile, "input_R1" -> inputR1,
119
    "reference" -> referenceFasta()) ++
120
121
122
123
124
125
126
127
    (if (inputR2.isDefined) Map("input_R2" -> inputR2.get) else Map()) ++
    (bam2wig match {
      case Some(b) => Map(
        "output_wigle" -> b.outputWigleFile,
        "output_tdf" -> b.outputTdfFile,
        "output_bigwig" -> b.outputBwFile)
      case _ => Map()
    })
128

Peter van 't Hof's avatar
Peter van 't Hof committed
129
  /** Settings to add to summary */
130
131
132
133
  def summarySettings = Map(
    "skip_metrics" -> skipMetrics,
    "skip_flexiprep" -> skipFlexiprep,
    "skip_markduplicates" -> skipMarkduplicates,
134
    "paired" -> inputR2.isDefined,
135
136
    "aligner" -> aligner,
    "chunking" -> chunking,
Sander Bollen's avatar
Sander Bollen committed
137
    "number_of_chunks" -> (if (chunking) numberChunks.getOrElse(1) else None)
138
  ) ++ (if (root == null) Map("reference" -> referenceSummary) else Map())
139

140
141
142
143
144
145
146
147
148
149
  override def reportClass = {
    val mappingReport = new MappingReport(this)
    mappingReport.outputDir = new File(outputDir, "report")
    mappingReport.summaryFile = summaryFile
    mappingReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(mappingReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
150
  /** Will be executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
151
  def init() {
152
    require(outputDir != null, "Missing output directory on mapping module")
153
    require(inputR1 != null, "Missing inputR1 on mapping module")
154
155
    require(sampleId.isDefined, "Missing sample ID on mapping module")
    require(libId.isDefined, "Missing library ID on mapping module")
156
157
    if (inputR1.exists() && inputR1.length() == 0) logger.warn(s"Input R1 is a empty file: $inputR1")
    inputR2.foreach(r => if (r.exists() && r.length() == 0) logger.warn(s"Input R2 is a empty file: $r"))
158

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
159
160
    inputFiles :+= new InputFile(inputR1)
    inputR2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
161

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
162
    paired = inputR2.isDefined
163

Peter van 't Hof's avatar
Peter van 't Hof committed
164
165
    if (readgroupId == null)
      readgroupId = config("readgroup_id", default = sampleId.get + "-" + libId.get)
bow's avatar
bow committed
166

Peter van 't Hof's avatar
Peter van 't Hof committed
167
    if (outputName == null) outputName = readgroupId
bow's avatar
bow committed
168

Peter van 't Hof's avatar
Peter van 't Hof committed
169
    if (chunking) {
170
      if (numberChunks.isEmpty) {
171
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
172
        else {
Peter van 't Hof's avatar
Peter van 't Hof committed
173
          val chunkSize: String = config("chunksize", default = "5G")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
174
175
          val filesize = if (inputR1.getName.endsWith(".gz") || inputR1.getName.endsWith(".gzip")) inputR1.length * 3
          else inputR1.length
Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
          numberChunks = Some(ceil(filesize.toDouble / textToSize(chunkSize)).toInt)
          if (numberChunks == Some(0)) numberChunks = Some(1)
178
179
180
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
181
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
182
  }
bow's avatar
bow committed
183

Peter van 't Hof's avatar
Peter van 't Hof committed
184
  /** Adds all jobs of the pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
185
  def biopetScript() {
186
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
187
      flexiprep.outputDir = new File(outputDir, "flexiprep")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
188
      flexiprep.inputR1 = inputR1
189
      flexiprep.inputR2 = inputR2
Peter van 't Hof's avatar
Peter van 't Hof committed
190
      flexiprep.sampleId = this.sampleId
191
      flexiprep.libId = this.libId
Peter van 't Hof's avatar
Peter van 't Hof committed
192
193
      flexiprep.init()
      flexiprep.runInitialJobs()
194
    }
bow's avatar
bow committed
195
    var bamFiles: List[File] = Nil
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
196
197
    var fastqR1Output: List[File] = Nil
    var fastqR2Output: List[File] = Nil
bow's avatar
bow committed
198

Peter van 't Hof's avatar
Peter van 't Hof committed
199
    val chunks: Map[File, (File, Option[File])] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
      if (chunking) (for (t <- 1 to numberChunks.getOrElse(1)) yield {
        val chunkDir = new File(outputDir, "chunks" + File.separator + t)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
202
203
        chunkDir -> (new File(chunkDir, inputR1.getName),
          if (paired) Some(new File(chunkDir, inputR2.get.getName)) else None)
Peter van 't Hof's avatar
Peter van 't Hof committed
204
      }).toMap
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
205
      else if (skipFlexiprep) Map(outputDir -> (inputR1, if (paired) inputR2 else None))
206
      else Map(outputDir -> (flexiprep.inputR1, flexiprep.inputR2))
207
    }
bow's avatar
bow committed
208

Peter van 't Hof's avatar
Peter van 't Hof committed
209
    if (chunking) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
210
211
212
213
214
      val fastSplitterR1 = new FastqSplitter(this)
      fastSplitterR1.input = inputR1
      for ((chunkDir, fastqfile) <- chunks) fastSplitterR1.output :+= fastqfile._1
      fastSplitterR1.isIntermediate = true
      add(fastSplitterR1)
bow's avatar
bow committed
215

Peter van 't Hof's avatar
Peter van 't Hof committed
216
      if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
217
218
219
220
221
        val fastSplitterR2 = new FastqSplitter(this)
        fastSplitterR2.input = inputR2.get
        for ((chunkDir, fastqfile) <- chunks) fastSplitterR2.output :+= fastqfile._2.get
        fastSplitterR2.isIntermediate = true
        add(fastSplitterR2)
Peter van 't Hof's avatar
Peter van 't Hof committed
222
223
      }
    }
bow's avatar
bow committed
224

Peter van 't Hof's avatar
Peter van 't Hof committed
225
    for ((chunkDir, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
226
227
228
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
229
        val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
230
        logger.debug(chunkDir + " - " + flexiout)
Peter van 't Hof's avatar
Peter van 't Hof committed
231
232
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
233
234
        fastqR1Output :+= R1
        R2.foreach(R2 => fastqR2Output :+= R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
235
      }
236

Peter van 't Hof's avatar
Peter van 't Hof committed
237
      val outputBam = new File(chunkDir, outputName + ".bam")
238
239
      bamFiles :+= outputBam
      aligner match {
Peter van 't Hof's avatar
Peter van 't Hof committed
240
241
242
        case "bwa-mem"    => addBwaMem(R1, R2, outputBam)
        case "bwa-aln"    => addBwaAln(R1, R2, outputBam)
        case "bowtie"     => addBowtie(R1, R2, outputBam)
Wai Yi Leung's avatar
Wai Yi Leung committed
243
        case "bowtie2"    => addBowtie2(R1, R2, outputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
244
        case "gsnap"      => addGsnap(R1, R2, outputBam)
bow's avatar
bow committed
245
        case "hisat2"     => addHisat2(R1, R2, outputBam)
bow's avatar
bow committed
246
        // TODO: make TopHat here accept multiple input files
Peter van 't Hof's avatar
Peter van 't Hof committed
247
248
249
250
        case "tophat"     => addTophat(R1, R2, outputBam)
        case "stampy"     => addStampy(R1, R2, outputBam)
        case "star"       => addStar(R1, R2, outputBam)
        case "star-2pass" => addStar2pass(R1, R2, outputBam)
251
        case _            => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
252
      }
253
      if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
254
        addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics"), sampleId, libId).functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
255
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
256
    if (!skipFlexiprep) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
257
      flexiprep.runFinalize(fastqR1Output, fastqR2Output)
Peter van 't Hof's avatar
Peter van 't Hof committed
258
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
259
      addSummaryQScript(flexiprep)
Peter van 't Hof's avatar
Peter van 't Hof committed
260
    }
bow's avatar
bow committed
261

262
263
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
264
      bamFile = new File(outputDir, outputName + ".dedup.bam")
265
266
      val md = MarkDuplicates(this, bamFiles, finalBamFile)
      md.isIntermediate = !keepFinalBamFile
267
268
      add(md)
      addSummarizable(md, "mark_duplicates")
269
    } else if (skipMarkduplicates && chunking) {
270
271
      val mergeSamFile = MergeSamFiles(this, bamFiles, finalBamFile)
      mergeSamFile.isIntermediate = !keepFinalBamFile
272
273
274
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
275

Peter van 't Hof's avatar
Peter van 't Hof committed
276
    if (!skipMetrics) {
Peter van 't Hof's avatar
Peter van 't Hof committed
277
      val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"), sampleId, libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
278
279
280
      addAll(bamMetrics.functions)
      addSummaryQScript(bamMetrics)
    }
281

282
    outputFiles += ("finalBamFile" -> finalBamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
283

284
    if (config("unmapped_to_gears", default = false).asBoolean) {
285
      val gears = new GearsSingle(this)
286
      gears.bamFile = Some(finalBamFile)
287
288
      gears.sampleId = sampleId
      gears.libId = libId
289
      gears.outputDir = new File(outputDir, "gears")
290
      add(gears)
291
292
    }

293
    bam2wig.foreach(add(_))
294

Peter van 't Hof's avatar
Peter van 't Hof committed
295
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
296
  }
bow's avatar
bow committed
297

298
299
300
301
  protected lazy val bam2wig = if (config("generate_wig", default = false)) {
    Some(Bam2Wig(this, finalBamFile))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
302
  /** Add bwa aln jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
303
  def addBwaAln(R1: File, R2: Option[File], output: File): File = {
304
305
306
    val bwaAlnR1 = new BwaAln(this)
    bwaAlnR1.fastq = R1
    bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
307
    bwaAlnR1.isIntermediate = true
308
309
310
    add(bwaAlnR1)

    val samFile: File = if (paired) {
311
      val bwaAlnR2 = new BwaAln(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
312
      bwaAlnR2.fastq = R2.get
313
      bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
314
      bwaAlnR2.isIntermediate = true
315
316
317
318
      add(bwaAlnR2)

      val bwaSampe = new BwaSampe(this)
      bwaSampe.fastqR1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
319
      bwaSampe.fastqR2 = R2.get
320
321
      bwaSampe.saiR1 = bwaAlnR1.output
      bwaSampe.saiR2 = bwaAlnR2.output
322
      bwaSampe.r = getReadGroupBwa
323
      bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
324
      bwaSampe.isIntermediate = true
325
326
327
328
329
330
331
      add(bwaSampe)

      bwaSampe.output
    } else {
      val bwaSamse = new BwaSamse(this)
      bwaSamse.fastq = R1
      bwaSamse.sai = bwaAlnR1.output
332
      bwaSamse.r = getReadGroupBwa
333
      bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
334
      bwaSamse.isIntermediate = true
335
336
337
338
339
340
      add(bwaSamse)

      bwaSamse.output
    }

    val sortSam = SortSam(this, samFile, output)
341
    sortSam.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
342
    add(sortSam)
Peter van 't Hof's avatar
Peter van 't Hof committed
343
    sortSam.output
344
345
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
346
  /** Adds bwa mem jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
347
  def addBwaMem(R1: File, R2: Option[File], output: File): File = {
348
    val bwaCommand = new BwaMem(this)
349
    bwaCommand.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
350
    if (paired) bwaCommand.R2 = R2.get
351
    bwaCommand.R = Some(getReadGroupBwa)
352
353
    val sortSam = new SortSam(this)
    sortSam.output = output
354
    val pipe = bwaCommand | sortSam
355
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
356
357
    pipe.threadsCorrection = -1
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
358
    output
359
  }
360

Peter van 't Hof's avatar
Peter van 't Hof committed
361
  def addGsnap(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
362
    val gsnapCommand = new Gsnap(this)
363
    gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
364
    gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam")
bow's avatar
bow committed
365

366
    val reorderSam = new ReorderSam(this)
367
    reorderSam.input = gsnapCommand.output
Peter van 't Hof's avatar
Peter van 't Hof committed
368
    reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam")
369

Peter van 't Hof's avatar
Peter van 't Hof committed
370
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
371
372
    val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil)
    pipe.threadsCorrection = -2
373
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
374
375
    add(pipe)
    ar._2
bow's avatar
bow committed
376
377
  }

bow's avatar
bow committed
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
  def addHisat2(R1: File, R2: Option[File], output: File): File = {
    val hisat2 = new Hisat2(this)
    hisat2.R1 = R1
    hisat2.R2 = R2
    hisat2.rgId = Some(readgroupId)
    hisat2.rg +:= s"PL:$platform"
    hisat2.rg +:= s"PU:$platformUnit"
    libId match {
      case Some(id)  => hisat2.rg +:= s"LB:$id"
      case otherwise => ;
    }
    sampleId match {
      case Some(id)  => hisat2.rg +:= s"SM:$id"
      case otherwise => ;
    }

    val sortSam = new SortSam(this)
    sortSam.output = output
    val pipe = hisat2 | sortSam
397
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
bow's avatar
bow committed
398
399
400
401
402
403
    pipe.threadsCorrection = 1
    add(pipe)

    output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
404
  def addTophat(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
405
406
407
    // TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
    val tophat = new Tophat(this)
    tophat.R1 = tophat.R1 :+ R1
Peter van 't Hof's avatar
Peter van 't Hof committed
408
    if (paired) tophat.R2 = tophat.R2 :+ R2.get
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
409
    tophat.outputDir = new File(outputDir, "tophat_out")
bow's avatar
bow committed
410
    // always output BAM
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
411
    tophat.noConvertBam = false
412
    // and always keep input ordering
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
413
    tophat.keepFastaOrder = true
bow's avatar
bow committed
414
415
    add(tophat)

416
    // fix unmapped file coordinates
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
417
    val fixedUnmapped = new File(tophat.outputDir, "unmapped_fixup.sam")
418
419
    val fixer = new TophatRecondition(this)
    fixer.inputBam = tophat.outputAcceptedHits
bow's avatar
bow committed
420
    fixer.outputSam = fixedUnmapped.getAbsoluteFile
421
422
423
424
    fixer.isIntermediate = true
    add(fixer)

    // sort fixed SAM file
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
425
    val sorter = SortSam(this, fixer.outputSam, new File(tophat.outputDir, "unmapped_fixup.sorted.bam"))
426
427
428
    sorter.sortOrder = "coordinate"
    sorter.isIntermediate = true
    add(sorter)
bow's avatar
bow committed
429

430
431
    // merge with mapped file
    val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output),
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
432
      new File(tophat.outputDir, "fixed_merged.bam"), sortOrder = "coordinate")
433
434
435
436
437
438
439
440
    mergeSamFile.createIndex = true
    mergeSamFile.isIntermediate = true
    add(mergeSamFile)

    // make sure header coordinates are correct
    val reorderSam = new ReorderSam(this)
    reorderSam.input = mergeSamFile.output
    reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
441
    reorderSam.isIntermediate = true
442
443
    add(reorderSam)

Peter van 't Hof's avatar
Peter van 't Hof committed
444
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
445
    ar._1.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
446
447
    add(ar._1)
    ar._2
bow's avatar
bow committed
448
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
449

Peter van 't Hof's avatar
Peter van 't Hof committed
450
  /** Adds stampy jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
451
  def addStampy(R1: File, R2: Option[File], output: File): File = {
452

Peter van 't Hof's avatar
Peter van 't Hof committed
453
    var RG: String = "ID:" + readgroupId + ","
454
455
    RG += "SM:" + sampleId.get + ","
    RG += "LB:" + libId.get + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
456
457
458
459
460
461
    if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
    RG += "PU:" + platformUnit + ","
    if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
    if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
    RG += "PL:" + platform
462
463
464

    val stampyCmd = new Stampy(this)
    stampyCmd.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
465
    if (paired) stampyCmd.R2 = R2.get
466
467
    stampyCmd.readgroup = RG
    stampyCmd.sanger = true
468
    stampyCmd.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
469
470
    stampyCmd.isIntermediate = true
    add(stampyCmd)
471
    val sortSam = SortSam(this, stampyCmd.output, output)
472
    sortSam.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
473
    add(sortSam)
474
    sortSam.output
475
  }
476

Peter van 't Hof's avatar
Peter van 't Hof committed
477
  /** Adds bowtie jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
478
  def addBowtie(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
479
480
481
482
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))
483
    val bowtie = new Bowtie(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
484
485
486
    bowtie.R1 = zcatR1._2
    if (paired) bowtie.R2 = Some(zcatR2.get._2)
    bowtie.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
487
    bowtie.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
488
489
    val ar = addAddOrReplaceReadGroups(bowtie.output, output)
    val pipe = new BiopetFifoPipe(this, (Some(bowtie) :: Some(ar._1) :: Nil).flatten)
490
    pipe.threadsCorrection = -1
491
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
492
493
    add(pipe)
    ar._2
494
  }
495

496
497
498
  /** Add bowtie2 jobs **/
  def addBowtie2(R1: File, R2: Option[File], output: File): File = {
    val bowtie2 = new Bowtie2(this)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
499
    bowtie2.rgId = Some(readgroupId)
Peter van 't Hof's avatar
Peter van 't Hof committed
500
    bowtie2.rg +:= ("LB:" + libId.get)
Wai Yi Leung's avatar
Wai Yi Leung committed
501
502
    bowtie2.rg +:= ("PL:" + platform)
    bowtie2.rg +:= ("PU:" + platformUnit)
Peter van 't Hof's avatar
Peter van 't Hof committed
503
    bowtie2.rg +:= ("SM:" + sampleId.get)
504
    bowtie2.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
505
    bowtie2.R2 = R2
506
507
508
    val sortSam = new SortSam(this)
    sortSam.output = output
    val pipe = bowtie2 | sortSam
509
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
510
511
512
513
514
    pipe.threadsCorrection = -1
    add(pipe)
    output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
515
  /** Adds Star jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
516
  def addStar(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
517
518
519
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    val starCommand = Star(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
520
    val ar = addAddOrReplaceReadGroups(starCommand.outputSam, swapExt(outputDir, output, ".bam", ".addAddOrReplaceReadGroups.bam"))
Peter van 't Hof's avatar
Peter van 't Hof committed
521
522
523

    val reorderSam = new ReorderSam(this)
    reorderSam.input = ar._2
Peter van 't Hof's avatar
Peter van 't Hof committed
524
    reorderSam.output = output
Peter van 't Hof's avatar
Peter van 't Hof committed
525

Peter van 't Hof's avatar
Peter van 't Hof committed
526
    val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: zcatR2.flatMap(_._1) ::
Peter van 't Hof's avatar
Peter van 't Hof committed
527
528
      Some(starCommand) :: Some(ar._1) :: Some(reorderSam) :: Nil).flatten)
    pipe.threadsCorrection = -3
529
530
    zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
    zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
531
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
532
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
533
    reorderSam.output
534
  }
535

Peter van 't Hof's avatar
Peter van 't Hof committed
536
  /** Adds Start 2 pass jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
537
  def addStar2pass(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
538
539
540
541
542
543
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))

    val starCommand = Star._2pass(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
544
    addAll(starCommand._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
545
    val ar = addAddOrReplaceReadGroups(starCommand._1, output)
546
    ar._1.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
547
548
    add(ar._1)
    ar._2
549
  }
550

Peter van 't Hof's avatar
Peter van 't Hof committed
551
  /** Adds AddOrReplaceReadGroups */
Peter van 't Hof's avatar
Peter van 't Hof committed
552
  def addAddOrReplaceReadGroups(input: File, output: File): (AddOrReplaceReadGroups, File) = {
553
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
554
555
    addOrReplaceReadGroups.createIndex = true

Peter van 't Hof's avatar
Peter van 't Hof committed
556
    addOrReplaceReadGroups.RGID = readgroupId
557
    addOrReplaceReadGroups.RGLB = libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
558
559
    addOrReplaceReadGroups.RGPL = platform
    addOrReplaceReadGroups.RGPU = platformUnit
560
    addOrReplaceReadGroups.RGSM = sampleId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
561
562
    if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
    if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
563
    addOrReplaceReadGroups.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
bow's avatar
bow committed
564

Peter van 't Hof's avatar
Peter van 't Hof committed
565
    (addOrReplaceReadGroups, addOrReplaceReadGroups.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
566
  }
bow's avatar
bow committed
567

Peter van 't Hof's avatar
Peter van 't Hof committed
568
  /** Returns readgroup for bwa */
Peter van 't Hof's avatar
Peter van 't Hof committed
569
  def getReadGroupBwa: String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
570
    var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
571
    RG += "LB:" + libId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
572
573
    RG += "PL:" + platform + "\\t"
    RG += "PU:" + platformUnit + "\\t"
574
    RG += "SM:" + sampleId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
575
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
576
577
578
    if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
    if (readgroupDate != null) RG += "DT:" + readgroupDate + "\\t"
    if (predictedInsertsize.isDefined) RG += "PI:" + predictedInsertsize.get + "\\t"
bow's avatar
bow committed
579

580
    RG.substring(0, RG.lastIndexOf("\\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
581
  }
582
583
584
585

  //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
  /**
   * Extracts file if file is compressed
Peter van 't Hof's avatar
Peter van 't Hof committed
586
587
   * @param file input file
   * @param runDir directory to extract when needed
588
589
   * @return returns extracted file
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
590
591
592
  def extractIfNeeded(file: File, runDir: File): (Option[BiopetCommandLineFunction], File) = {
    require(file != null)
    if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
593
      var newFile: File = swapExt(runDir, file, ".gz", "")
Peter van 't Hof's avatar
Peter van 't Hof committed
594
      if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
595
      val zcatCommand = Zcat(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
596
      (Some(zcatCommand), newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
597
    } else if (file.getName.endsWith(".bz2")) {
598
599
      val newFile = swapExt(runDir, file, ".bz2", "")
      val pbzip2 = Pbzip2(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
600
601
      (Some(pbzip2), newFile)
    } else (None, file)
602
603
  }

604
605
}

606
object Mapping extends PipelineCommand