Mapping.scala 21 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
package nl.lumc.sasc.biopet.pipelines.mapping

17
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.util.Date
19

20
import nl.lumc.sasc.biopet.core._
21
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.extensions.bowtie.{ Bowtie2, Bowtie }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.gmap.Gsnap
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam }
26
import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter
27
import nl.lumc.sasc.biopet.extensions._
28
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
31
import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
Peter van 't Hof's avatar
Peter van 't Hof committed
32
import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition
33
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
36
import org.broadinstitute.gatk.queue.QScript

import scala.math._
37

38
// TODO: documentation
39
class Mapping(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
40

Peter van 't Hof's avatar
Peter van 't Hof committed
41
  def this() = this(null)
bow's avatar
bow committed
42

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  @Input(doc = "R1 fastq file", shortName = "R1", fullName = "inputR1", required = true)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
44
  var inputR1: File = _
bow's avatar
bow committed
45

Peter van 't Hof's avatar
Peter van 't Hof committed
46
  @Input(doc = "R2 fastq file", shortName = "R2", fullName = "inputR2", required = false)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
47
  var inputR2: Option[File] = None
bow's avatar
bow committed
48

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  /** Output name */
Peter van 't Hof's avatar
Peter van 't Hof committed
50
  var outputName: String = _
bow's avatar
bow committed
51

Peter van 't Hof's avatar
Peter van 't Hof committed
52
53
  /** Skip flexiprep */
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
bow's avatar
bow committed
54

Peter van 't Hof's avatar
Peter van 't Hof committed
55
56
  /** Skip mark duplicates */
  protected var skipMarkduplicates: Boolean = config("skip_markduplicates", default = false)
bow's avatar
bow committed
57

Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
  /** Skip metrics */
  protected var skipMetrics: Boolean = config("skip_metrics", default = false)
60

Peter van 't Hof's avatar
Peter van 't Hof committed
61
  /** Aligner */
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  protected var aligner: String = config("aligner", default = "bwa-mem")
bow's avatar
bow committed
63

Peter van 't Hof's avatar
Peter van 't Hof committed
64
65
  /** Number of chunks, when not defined pipeline will automatic calculate number of chunks */
  protected var numberChunks: Option[Int] = config("number_chunks")
66

Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
  /** Enable chunking */
  protected var chunking: Boolean = config("chunking", numberChunks.getOrElse(1) > 1)
bow's avatar
bow committed
69

70
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
  /** Readgroup ID */
  protected var readgroupId: String = _
bow's avatar
bow committed
73

74
75
  // TODO: hide sampleId and libId from the command line so they do not interfere with our config values

Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
  /** Readgroup Platform */
  protected var platform: String = config("platform", default = "illumina")
bow's avatar
bow committed
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
  /** Readgroup platform unit */
  protected var platformUnit: String = config("platform_unit", default = "na")
bow's avatar
bow committed
81

Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
  /** Readgroup sequencing center */
  protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
bow's avatar
bow committed
84

Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
  /** Readgroup description */
  protected var readgroupDescription: Option[String] = config("readgroup_description")
bow's avatar
bow committed
87

Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
  /** Readgroup sequencing date */
  protected var readgroupDate: Date = _
bow's avatar
bow committed
90

Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
  /** Readgroup predicted insert size */
  protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
bow's avatar
bow committed
93

Peter van 't Hof's avatar
Peter van 't Hof committed
94
  protected var paired: Boolean = false
95
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
96
  def finalBamFile: File = new File(outputDir, outputName + ".final.bam")
97

Peter van 't Hof's avatar
Peter van 't Hof committed
98
  /** location of summary file */
99
100
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")

101
102
103
104
105
  override def defaults = Map("gsnap" -> Map("batch" -> 4))

  override def fixedValues = Map(
    "gsnap" -> Map("format" -> "sam"),
    "bowtie" -> Map("sam" -> true)
Peter van 't Hof's avatar
Peter van 't Hof committed
106
  )
107

Peter van 't Hof's avatar
Peter van 't Hof committed
108
  /** File to add to the summary */
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
109
  def summaryFiles: Map[String, File] = Map("output_bam" -> finalBamFile, "input_R1" -> inputR1,
110
    "reference" -> referenceFasta()) ++
111
112
113
114
115
116
117
118
    (if (inputR2.isDefined) Map("input_R2" -> inputR2.get) else Map()) ++
    (bam2wig match {
      case Some(b) => Map(
        "output_wigle" -> b.outputWigleFile,
        "output_tdf" -> b.outputTdfFile,
        "output_bigwig" -> b.outputBwFile)
      case _ => Map()
    })
119

Peter van 't Hof's avatar
Peter van 't Hof committed
120
  /** Settings to add to summary */
121
122
123
124
125
126
  def summarySettings = Map(
    "skip_metrics" -> skipMetrics,
    "skip_flexiprep" -> skipFlexiprep,
    "skip_markduplicates" -> skipMarkduplicates,
    "aligner" -> aligner,
    "chunking" -> chunking,
127
    "numberChunks" -> (if (chunking) numberChunks.getOrElse(1) else None)
128
  ) ++ (if (root == null) Map("reference" -> referenceSummary) else Map())
129

130
131
132
133
134
135
136
137
138
139
  override def reportClass = {
    val mappingReport = new MappingReport(this)
    mappingReport.outputDir = new File(outputDir, "report")
    mappingReport.summaryFile = summaryFile
    mappingReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(mappingReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
140
  /** Will be executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
141
  def init() {
142
    require(outputDir != null, "Missing output directory on mapping module")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
143
    require(inputR1 != null, "Missing output directory on mapping module")
144
145
    require(sampleId.isDefined, "Missing sample ID on mapping module")
    require(libId.isDefined, "Missing library ID on mapping module")
146

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
147
148
    inputFiles :+= new InputFile(inputR1)
    inputR2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
149

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
150
    paired = inputR2.isDefined
151

Peter van 't Hof's avatar
Peter van 't Hof committed
152
153
    if (readgroupId == null)
      readgroupId = config("readgroup_id", default = sampleId.get + "-" + libId.get)
bow's avatar
bow committed
154

Peter van 't Hof's avatar
Peter van 't Hof committed
155
    if (outputName == null) outputName = readgroupId
bow's avatar
bow committed
156

Peter van 't Hof's avatar
Peter van 't Hof committed
157
    if (chunking) {
158
      if (numberChunks.isEmpty) {
159
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
160
        else {
161
          val chunkSize: Int = config("chunksize", 1 << 30)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
162
163
          val filesize = if (inputR1.getName.endsWith(".gz") || inputR1.getName.endsWith(".gzip")) inputR1.length * 3
          else inputR1.length
164
165
166
167
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
168
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
169
  }
bow's avatar
bow committed
170

Peter van 't Hof's avatar
Peter van 't Hof committed
171
  /** Adds all jobs of the pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
172
  def biopetScript() {
173
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
174
      flexiprep.outputDir = new File(outputDir, "flexiprep")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
175
      flexiprep.inputR1 = inputR1
176
      flexiprep.inputR2 = inputR2
Peter van 't Hof's avatar
Peter van 't Hof committed
177
      flexiprep.sampleId = this.sampleId
178
      flexiprep.libId = this.libId
Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
      flexiprep.init()
      flexiprep.runInitialJobs()
181
    }
bow's avatar
bow committed
182
    var bamFiles: List[File] = Nil
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
183
184
    var fastqR1Output: List[File] = Nil
    var fastqR2Output: List[File] = Nil
bow's avatar
bow committed
185

Peter van 't Hof's avatar
Peter van 't Hof committed
186
    val chunks: Map[File, (File, Option[File])] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
187
188
      if (chunking) (for (t <- 1 to numberChunks.getOrElse(1)) yield {
        val chunkDir = new File(outputDir, "chunks" + File.separator + t)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
189
190
        chunkDir -> (new File(chunkDir, inputR1.getName),
          if (paired) Some(new File(chunkDir, inputR2.get.getName)) else None)
Peter van 't Hof's avatar
Peter van 't Hof committed
191
      }).toMap
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
192
      else if (skipFlexiprep) Map(outputDir -> (inputR1, if (paired) inputR2 else None))
193
      else Map(outputDir -> (flexiprep.inputR1, flexiprep.inputR2))
194
    }
bow's avatar
bow committed
195

Peter van 't Hof's avatar
Peter van 't Hof committed
196
    if (chunking) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
197
198
199
200
201
      val fastSplitterR1 = new FastqSplitter(this)
      fastSplitterR1.input = inputR1
      for ((chunkDir, fastqfile) <- chunks) fastSplitterR1.output :+= fastqfile._1
      fastSplitterR1.isIntermediate = true
      add(fastSplitterR1)
bow's avatar
bow committed
202

Peter van 't Hof's avatar
Peter van 't Hof committed
203
      if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
204
205
206
207
208
        val fastSplitterR2 = new FastqSplitter(this)
        fastSplitterR2.input = inputR2.get
        for ((chunkDir, fastqfile) <- chunks) fastSplitterR2.output :+= fastqfile._2.get
        fastSplitterR2.isIntermediate = true
        add(fastSplitterR2)
Peter van 't Hof's avatar
Peter van 't Hof committed
209
210
      }
    }
bow's avatar
bow committed
211

Peter van 't Hof's avatar
Peter van 't Hof committed
212
    for ((chunkDir, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
213
214
215
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
216
        val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
217
        logger.debug(chunkDir + " - " + flexiout)
Peter van 't Hof's avatar
Peter van 't Hof committed
218
219
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
220
221
        fastqR1Output :+= R1
        R2.foreach(R2 => fastqR2Output :+= R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
222
      }
223

Peter van 't Hof's avatar
Peter van 't Hof committed
224
      val outputBam = new File(chunkDir, outputName + ".bam")
225
226
      bamFiles :+= outputBam
      aligner match {
Peter van 't Hof's avatar
Peter van 't Hof committed
227
228
229
        case "bwa-mem"    => addBwaMem(R1, R2, outputBam)
        case "bwa-aln"    => addBwaAln(R1, R2, outputBam)
        case "bowtie"     => addBowtie(R1, R2, outputBam)
Wai Yi Leung's avatar
Wai Yi Leung committed
230
        case "bowtie2"    => addBowtie2(R1, R2, outputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
231
        case "gsnap"      => addGsnap(R1, R2, outputBam)
bow's avatar
bow committed
232
        // TODO: make TopHat here accept multiple input files
Peter van 't Hof's avatar
Peter van 't Hof committed
233
234
235
236
        case "tophat"     => addTophat(R1, R2, outputBam)
        case "stampy"     => addStampy(R1, R2, outputBam)
        case "star"       => addStar(R1, R2, outputBam)
        case "star-2pass" => addStar2pass(R1, R2, outputBam)
237
        case _            => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
238
      }
239
      if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
240
        addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics"), sampleId, libId).functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
241
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
242
    if (!skipFlexiprep) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
243
      flexiprep.runFinalize(fastqR1Output, fastqR2Output)
Peter van 't Hof's avatar
Peter van 't Hof committed
244
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
245
      addSummaryQScript(flexiprep)
Peter van 't Hof's avatar
Peter van 't Hof committed
246
    }
bow's avatar
bow committed
247

248
249
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
250
      bamFile = new File(outputDir, outputName + ".dedup.bam")
251
252
253
      val md = MarkDuplicates(this, bamFiles, bamFile)
      add(md)
      addSummarizable(md, "mark_duplicates")
254
    } else if (skipMarkduplicates && chunking) {
Peter van 't Hof's avatar
Peter van 't Hof committed
255
      val mergeSamFile = MergeSamFiles(this, bamFiles, new File(outputDir, outputName + ".merge.bam"))
256
257
258
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
259

Peter van 't Hof's avatar
Peter van 't Hof committed
260
    if (!skipMetrics) {
Peter van 't Hof's avatar
Peter van 't Hof committed
261
      val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"), sampleId, libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
262
263
264
      addAll(bamMetrics.functions)
      addSummaryQScript(bamMetrics)
    }
265

Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
    add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai")))
    add(Ln(this, bamFile, finalBamFile))
268
    outputFiles += ("finalBamFile" -> finalBamFile.getAbsoluteFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
269

270
    if (config("unmapped_to_gears", default = false).asBoolean) {
271
      val gears = new GearsSingle(this)
272
      gears.bamFile = Some(finalBamFile)
273
274
      gears.sampleId = sampleId
      gears.libId = libId
275
      gears.outputDir = new File(outputDir, "gears")
276
      add(gears)
277
278
    }

279
    bam2wig.foreach(add(_))
280

Peter van 't Hof's avatar
Peter van 't Hof committed
281
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
282
  }
bow's avatar
bow committed
283

284
285
286
287
  protected lazy val bam2wig = if (config("generate_wig", default = false)) {
    Some(Bam2Wig(this, finalBamFile))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
288
  /** Add bwa aln jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
289
  def addBwaAln(R1: File, R2: Option[File], output: File): File = {
290
291
292
    val bwaAlnR1 = new BwaAln(this)
    bwaAlnR1.fastq = R1
    bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
293
    bwaAlnR1.isIntermediate = true
294
295
296
    add(bwaAlnR1)

    val samFile: File = if (paired) {
297
      val bwaAlnR2 = new BwaAln(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
298
      bwaAlnR2.fastq = R2.get
299
      bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
300
      bwaAlnR2.isIntermediate = true
301
302
303
304
      add(bwaAlnR2)

      val bwaSampe = new BwaSampe(this)
      bwaSampe.fastqR1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
305
      bwaSampe.fastqR2 = R2.get
306
307
      bwaSampe.saiR1 = bwaAlnR1.output
      bwaSampe.saiR2 = bwaAlnR2.output
308
      bwaSampe.r = getReadGroupBwa
309
      bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
310
      bwaSampe.isIntermediate = true
311
312
313
314
315
316
317
      add(bwaSampe)

      bwaSampe.output
    } else {
      val bwaSamse = new BwaSamse(this)
      bwaSamse.fastq = R1
      bwaSamse.sai = bwaAlnR1.output
318
      bwaSamse.r = getReadGroupBwa
319
      bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
320
      bwaSamse.isIntermediate = true
321
322
323
324
325
326
327
328
      add(bwaSamse)

      bwaSamse.output
    }

    val sortSam = SortSam(this, samFile, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
Peter van 't Hof's avatar
Peter van 't Hof committed
329
    sortSam.output
330
331
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
332
  /** Adds bwa mem jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
333
  def addBwaMem(R1: File, R2: Option[File], output: File): File = {
334
    val bwaCommand = new BwaMem(this)
335
    bwaCommand.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
336
    if (paired) bwaCommand.R2 = R2.get
337
    bwaCommand.R = Some(getReadGroupBwa)
338
339
    val sortSam = new SortSam(this)
    sortSam.output = output
340
341
342
343
    val pipe = bwaCommand | sortSam
    pipe.isIntermediate = chunking || !skipMarkduplicates
    pipe.threadsCorrection = -1
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
344
    output
345
  }
346

Peter van 't Hof's avatar
Peter van 't Hof committed
347
  def addGsnap(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
348
    val gsnapCommand = new Gsnap(this)
349
    gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
350
    gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam")
bow's avatar
bow committed
351

352
    val reorderSam = new ReorderSam(this)
353
    reorderSam.input = gsnapCommand.output
Peter van 't Hof's avatar
Peter van 't Hof committed
354
    reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam")
355

Peter van 't Hof's avatar
Peter van 't Hof committed
356
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
357
358
    val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil)
    pipe.threadsCorrection = -2
Peter van 't Hof's avatar
Peter van 't Hof committed
359
360
    add(pipe)
    ar._2
bow's avatar
bow committed
361
362
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
363
  def addTophat(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
364
365
366
    // TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
    val tophat = new Tophat(this)
    tophat.R1 = tophat.R1 :+ R1
Peter van 't Hof's avatar
Peter van 't Hof committed
367
    if (paired) tophat.R2 = tophat.R2 :+ R2.get
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
368
    tophat.outputDir = new File(outputDir, "tophat_out")
bow's avatar
bow committed
369
    // always output BAM
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
370
    tophat.noConvertBam = false
371
    // and always keep input ordering
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
372
    tophat.keepFastaOrder = true
bow's avatar
bow committed
373
374
    add(tophat)

375
    // fix unmapped file coordinates
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
376
    val fixedUnmapped = new File(tophat.outputDir, "unmapped_fixup.sam")
377
378
    val fixer = new TophatRecondition(this)
    fixer.inputBam = tophat.outputAcceptedHits
bow's avatar
bow committed
379
    fixer.outputSam = fixedUnmapped.getAbsoluteFile
380
381
382
383
    fixer.isIntermediate = true
    add(fixer)

    // sort fixed SAM file
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
384
    val sorter = SortSam(this, fixer.outputSam, new File(tophat.outputDir, "unmapped_fixup.sorted.bam"))
385
386
387
    sorter.sortOrder = "coordinate"
    sorter.isIntermediate = true
    add(sorter)
bow's avatar
bow committed
388

389
390
    // merge with mapped file
    val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output),
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
391
      new File(tophat.outputDir, "fixed_merged.bam"), sortOrder = "coordinate")
392
393
394
395
396
397
398
399
400
401
    mergeSamFile.createIndex = true
    mergeSamFile.isIntermediate = true
    add(mergeSamFile)

    // make sure header coordinates are correct
    val reorderSam = new ReorderSam(this)
    reorderSam.input = mergeSamFile.output
    reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
    add(reorderSam)

Peter van 't Hof's avatar
Peter van 't Hof committed
402
403
404
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
    add(ar._1)
    ar._2
bow's avatar
bow committed
405
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
406

Peter van 't Hof's avatar
Peter van 't Hof committed
407
  /** Adds stampy jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
408
  def addStampy(R1: File, R2: Option[File], output: File): File = {
409

Peter van 't Hof's avatar
Peter van 't Hof committed
410
    var RG: String = "ID:" + readgroupId + ","
411
412
    RG += "SM:" + sampleId.get + ","
    RG += "LB:" + libId.get + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
413
414
415
416
417
418
    if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
    RG += "PU:" + platformUnit + ","
    if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
    if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
    RG += "PL:" + platform
419
420
421

    val stampyCmd = new Stampy(this)
    stampyCmd.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
422
    if (paired) stampyCmd.R2 = R2.get
423
424
    stampyCmd.readgroup = RG
    stampyCmd.sanger = true
425
    stampyCmd.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
426
427
    stampyCmd.isIntermediate = true
    add(stampyCmd)
428
429
430
    val sortSam = SortSam(this, stampyCmd.output, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
431
    sortSam.output
432
  }
433

Peter van 't Hof's avatar
Peter van 't Hof committed
434
  /** Adds bowtie jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
435
  def addBowtie(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
436
437
438
439
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))
440
    val bowtie = new Bowtie(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
441
442
443
    bowtie.R1 = zcatR1._2
    if (paired) bowtie.R2 = Some(zcatR2.get._2)
    bowtie.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
444
    bowtie.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
445
446
    val ar = addAddOrReplaceReadGroups(bowtie.output, output)
    val pipe = new BiopetFifoPipe(this, (Some(bowtie) :: Some(ar._1) :: Nil).flatten)
447
    pipe.threadsCorrection = -1
Peter van 't Hof's avatar
Peter van 't Hof committed
448
449
    add(pipe)
    ar._2
450
  }
451

452
453
454
  /** Add bowtie2 jobs **/
  def addBowtie2(R1: File, R2: Option[File], output: File): File = {
    val bowtie2 = new Bowtie2(this)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
455
    bowtie2.rgId = Some(readgroupId)
Peter van 't Hof's avatar
Peter van 't Hof committed
456
    bowtie2.rg +:= ("LB:" + libId.get)
Wai Yi Leung's avatar
Wai Yi Leung committed
457
458
    bowtie2.rg +:= ("PL:" + platform)
    bowtie2.rg +:= ("PU:" + platformUnit)
Peter van 't Hof's avatar
Peter van 't Hof committed
459
    bowtie2.rg +:= ("SM:" + sampleId.get)
460
    bowtie2.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
461
    bowtie2.R2 = R2
462
463
464
465
466
467
468
469
470
    val sortSam = new SortSam(this)
    sortSam.output = output
    val pipe = bowtie2 | sortSam
    pipe.isIntermediate = chunking || !skipMarkduplicates
    pipe.threadsCorrection = -1
    add(pipe)
    output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
471
  /** Adds Star jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
472
  def addStar(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
473
474
475
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    val starCommand = Star(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
476
    val ar = addAddOrReplaceReadGroups(starCommand.outputSam, swapExt(outputDir, output, ".bam", ".addAddOrReplaceReadGroups.bam"))
Peter van 't Hof's avatar
Peter van 't Hof committed
477
478
479

    val reorderSam = new ReorderSam(this)
    reorderSam.input = ar._2
Peter van 't Hof's avatar
Peter van 't Hof committed
480
    reorderSam.output = output
Peter van 't Hof's avatar
Peter van 't Hof committed
481

Peter van 't Hof's avatar
Peter van 't Hof committed
482
    val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: zcatR2.flatMap(_._1) ::
Peter van 't Hof's avatar
Peter van 't Hof committed
483
484
      Some(starCommand) :: Some(ar._1) :: Some(reorderSam) :: Nil).flatten)
    pipe.threadsCorrection = -3
485
486
    zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
    zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
Peter van 't Hof's avatar
Peter van 't Hof committed
487
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
488
    reorderSam.output
489
  }
490

Peter van 't Hof's avatar
Peter van 't Hof committed
491
  /** Adds Start 2 pass jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
492
  def addStar2pass(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
493
494
495
496
497
498
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))

    val starCommand = Star._2pass(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
499
    addAll(starCommand._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
500
501
502
    val ar = addAddOrReplaceReadGroups(starCommand._1, output)
    add(ar._1)
    ar._2
503
  }
504

Peter van 't Hof's avatar
Peter van 't Hof committed
505
  /** Adds AddOrReplaceReadGroups */
Peter van 't Hof's avatar
Peter van 't Hof committed
506
  def addAddOrReplaceReadGroups(input: File, output: File): (AddOrReplaceReadGroups, File) = {
507
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
508
509
    addOrReplaceReadGroups.createIndex = true

Peter van 't Hof's avatar
Peter van 't Hof committed
510
    addOrReplaceReadGroups.RGID = readgroupId
511
    addOrReplaceReadGroups.RGLB = libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
512
513
    addOrReplaceReadGroups.RGPL = platform
    addOrReplaceReadGroups.RGPU = platformUnit
514
    addOrReplaceReadGroups.RGSM = sampleId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
515
516
    if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
    if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
Peter van 't Hof's avatar
Peter van 't Hof committed
517
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
bow's avatar
bow committed
518

Peter van 't Hof's avatar
Peter van 't Hof committed
519
    (addOrReplaceReadGroups, addOrReplaceReadGroups.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
520
  }
bow's avatar
bow committed
521

Peter van 't Hof's avatar
Peter van 't Hof committed
522
  /** Returns readgroup for bwa */
Peter van 't Hof's avatar
Peter van 't Hof committed
523
  def getReadGroupBwa: String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
524
    var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
525
    RG += "LB:" + libId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
526
527
    RG += "PL:" + platform + "\\t"
    RG += "PU:" + platformUnit + "\\t"
528
    RG += "SM:" + sampleId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
529
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
530
531
532
    if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
    if (readgroupDate != null) RG += "DT:" + readgroupDate + "\\t"
    if (predictedInsertsize.isDefined) RG += "PI:" + predictedInsertsize.get + "\\t"
bow's avatar
bow committed
533

534
    RG.substring(0, RG.lastIndexOf("\\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
535
  }
536
537
538
539

  //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
  /**
   * Extracts file if file is compressed
Peter van 't Hof's avatar
Peter van 't Hof committed
540
541
   * @param file input file
   * @param runDir directory to extract when needed
542
543
   * @return returns extracted file
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
544
545
546
  def extractIfNeeded(file: File, runDir: File): (Option[BiopetCommandLineFunction], File) = {
    require(file != null)
    if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
547
      var newFile: File = swapExt(runDir, file, ".gz", "")
Peter van 't Hof's avatar
Peter van 't Hof committed
548
      if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
549
      val zcatCommand = Zcat(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
550
      (Some(zcatCommand), newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
551
    } else if (file.getName.endsWith(".bz2")) {
552
553
      val newFile = swapExt(runDir, file, ".bz2", "")
      val pbzip2 = Pbzip2(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
554
555
      (Some(pbzip2), newFile)
    } else (None, file)
556
557
  }

558
559
}

560
object Mapping extends PipelineCommand