Mapping.scala 19.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
package nl.lumc.sasc.biopet.pipelines.mapping

18
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import java.util.Date
20

21
import nl.lumc.sasc.biopet.core._
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.config.Configurable
23
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse }
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam }
import nl.lumc.sasc.biopet.extensions.{ Gsnap, Tophat, _ }
27
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition
31
import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter
32
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
35
import org.broadinstitute.gatk.queue.QScript

import scala.math._
36

37
// TODO: documentation
38
class Mapping(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  def this() = this(null)
bow's avatar
bow committed
41
42

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var input_R1: File = _
bow's avatar
bow committed
44
45

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
46
  var input_R2: Option[File] = None
bow's avatar
bow committed
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  /** Output name */
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  var outputName: String = _
bow's avatar
bow committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
52
  /** Skip flexiprep */
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
bow's avatar
bow committed
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
  /** Skip mark duplicates */
  protected var skipMarkduplicates: Boolean = config("skip_markduplicates", default = false)
bow's avatar
bow committed
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
  /** Skip metrics */
  protected var skipMetrics: Boolean = config("skip_metrics", default = false)
59

Peter van 't Hof's avatar
Peter van 't Hof committed
60
  /** Aligner */
Peter van 't Hof's avatar
Peter van 't Hof committed
61
  protected var aligner: String = config("aligner", default = "bwa-mem")
bow's avatar
bow committed
62

Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
  /** Number of chunks, when not defined pipeline will automatic calculate number of chunks */
  protected var numberChunks: Option[Int] = config("number_chunks")
65

Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
  /** Enable chunking */
  protected var chunking: Boolean = config("chunking", numberChunks.getOrElse(1) > 1)
bow's avatar
bow committed
68

69
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
  /** Readgroup ID */
  protected var readgroupId: String = _
bow's avatar
bow committed
72

73
74
  // TODO: hide sampleId and libId from the command line so they do not interfere with our config values

Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
  /** Readgroup Platform */
  protected var platform: String = config("platform", default = "illumina")
bow's avatar
bow committed
77

Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
  /** Readgroup platform unit */
  protected var platformUnit: String = config("platform_unit", default = "na")
bow's avatar
bow committed
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
  /** Readgroup sequencing center */
  protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
bow's avatar
bow committed
83

Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
  /** Readgroup description */
  protected var readgroupDescription: Option[String] = config("readgroup_description")
bow's avatar
bow committed
86

Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
  /** Readgroup sequencing date */
  protected var readgroupDate: Date = _
bow's avatar
bow committed
89

Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
  /** Readgroup predicted insert size */
  protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
bow's avatar
bow committed
92

Peter van 't Hof's avatar
Peter van 't Hof committed
93
  protected var paired: Boolean = false
94
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
95
  def finalBamFile: File = new File(outputDir, outputName + ".final.bam")
96

Peter van 't Hof's avatar
Peter van 't Hof committed
97
  /** location of summary file */
98
99
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")

100
101
102
103
104
105
106
107
  override def defaults = ConfigUtils.mergeMaps(
    Map(
      "gsnap" -> Map(
        "batch" -> 4,
        "format" -> "sam"
      )
    ), super.defaults)

Peter van 't Hof's avatar
Peter van 't Hof committed
108
  /** File to add to the summary */
109
110
  def summaryFiles: Map[String, File] = Map("output_bamfile" -> finalBamFile, "input_R1" -> input_R1,
    "reference" -> referenceFasta()) ++
111
    (if (input_R2.isDefined) Map("input_R2" -> input_R2.get) else Map())
112

Peter van 't Hof's avatar
Peter van 't Hof committed
113
  /** Settings to add to summary */
114
115
116
117
118
119
120
  def summarySettings = Map(
    "skip_metrics" -> skipMetrics,
    "skip_flexiprep" -> skipFlexiprep,
    "skip_markduplicates" -> skipMarkduplicates,
    "aligner" -> aligner,
    "chunking" -> chunking,
    "numberChunks" -> numberChunks.getOrElse(1)
121
  ) ++ (if (root == null) Map("reference" -> referenceSummary) else Map())
122

123
124
125
126
127
128
129
130
131
132
  override def reportClass = {
    val mappingReport = new MappingReport(this)
    mappingReport.outputDir = new File(outputDir, "report")
    mappingReport.summaryFile = summaryFile
    mappingReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(mappingReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
133
  /** Will be executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
134
  def init() {
135
136
    require(outputDir != null, "Missing output directory on mapping module")
    require(input_R1 != null, "Missing output directory on mapping module")
137
138
    require(sampleId.isDefined, "Missing sample ID on mapping module")
    require(libId.isDefined, "Missing library ID on mapping module")
139

Peter van 't Hof's avatar
Peter van 't Hof committed
140
141
142
    inputFiles :+= InputFile(input_R1)
    input_R2.foreach(inputFiles :+= InputFile(_))

Peter van 't Hof's avatar
Peter van 't Hof committed
143
    paired = input_R2.isDefined
144

145
    if (readgroupId == null) readgroupId = sampleId.get + "-" + libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
146
    else if (readgroupId == null) readgroupId = config("readgroup_id")
bow's avatar
bow committed
147

Peter van 't Hof's avatar
Peter van 't Hof committed
148
    if (outputName == null) outputName = readgroupId
bow's avatar
bow committed
149

Peter van 't Hof's avatar
Peter van 't Hof committed
150
    if (chunking) {
151
      if (numberChunks.isEmpty) {
152
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
153
        else {
154
          val chunkSize: Int = config("chunksize", 1 << 30)
155
          val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
156
          else input_R1.length
157
158
159
160
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
161
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
162
  }
bow's avatar
bow committed
163

Peter van 't Hof's avatar
Peter van 't Hof committed
164
  /** Adds all jobs of the pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
165
  def biopetScript() {
166
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
167
      flexiprep.outputDir = new File(outputDir, "flexiprep")
Peter van 't Hof's avatar
Peter van 't Hof committed
168
      flexiprep.input_R1 = input_R1
Peter van 't Hof's avatar
Peter van 't Hof committed
169
      flexiprep.input_R2 = input_R2
Peter van 't Hof's avatar
Peter van 't Hof committed
170
      flexiprep.sampleId = this.sampleId
171
      flexiprep.libId = this.libId
Peter van 't Hof's avatar
Peter van 't Hof committed
172
173
      flexiprep.init()
      flexiprep.runInitialJobs()
174
    }
bow's avatar
bow committed
175
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
178

Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
181
182
    def removeGz(file: File): File = {
      val absPath = file.getAbsolutePath
      if (absPath.endsWith(".gz")) new File(absPath.substring(0, absPath.lastIndexOf(".gz")))
      else if (absPath.endsWith(".gzip")) new File(absPath.substring(0, absPath.lastIndexOf(".gzip")))
183
      else file
Peter van 't Hof's avatar
Peter van 't Hof committed
184
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
185
186
187
188
189

    val chunks: Map[File, (File, Option[File])] = {
      if (chunking) {
        (for (t <- 1 to numberChunks.getOrElse(1)) yield {
          val chunkDir = new File(outputDir, "chunks" + File.separator + t)
Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
          chunkDir -> (removeGz(new File(chunkDir, input_R1.getName)),
            if (paired) Some(removeGz(new File(chunkDir, input_R2.get.getName))) else None)
Peter van 't Hof's avatar
Peter van 't Hof committed
192
193
194
195
196
197
        }).toMap
      } else if (skipFlexiprep) {
        Map(outputDir -> (
          extractIfNeeded(input_R1, flexiprep.outputDir),
          if (paired) Some(extractIfNeeded(input_R2.get, outputDir)) else None)
        )
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
198
      } else Map(outputDir -> (flexiprep.outputFiles("fastq_input_R1"), flexiprep.outputFiles.get("fastq_input_R2")))
199
    }
bow's avatar
bow committed
200

Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
203
      fastSplitter_R1.input = input_R1
Peter van 't Hof's avatar
Peter van 't Hof committed
204
      for ((chunkDir, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
205
206
      fastSplitter_R1.isIntermediate = true
      add(fastSplitter_R1)
bow's avatar
bow committed
207

Peter van 't Hof's avatar
Peter van 't Hof committed
208
209
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
210
        fastSplitter_R2.input = input_R2.get
Peter van 't Hof's avatar
Peter van 't Hof committed
211
        for ((chunkDir, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2.get
212
213
        fastSplitter_R2.isIntermediate = true
        add(fastSplitter_R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
214
215
      }
    }
bow's avatar
bow committed
216

Peter van 't Hof's avatar
Peter van 't Hof committed
217
    for ((chunkDir, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
218
219
      var R1 = fastqfile._1
      var R2 = fastqfile._2
220
      var deps: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
221
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
222
        val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
223
        logger.debug(chunkDir + " - " + flexiout)
Peter van 't Hof's avatar
Peter van 't Hof committed
224
225
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
226
        deps = flexiout._3
Peter van 't Hof's avatar
Peter van 't Hof committed
227
        fastq_R1_output :+= R1
Peter van 't Hof's avatar
Peter van 't Hof committed
228
        R2.foreach(R2 => fastq_R2_output :+= R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
229
      }
230

Peter van 't Hof's avatar
Peter van 't Hof committed
231
      val outputBam = new File(chunkDir, outputName + ".bam")
232
233
      bamFiles :+= outputBam
      aligner match {
Peter van 't Hof's avatar
Peter van 't Hof committed
234
        case "bwa-mem"    => addBwaMem(R1, R2, outputBam, deps)
235
        case "bwa-aln"    => addBwaAln(R1, R2, outputBam, deps)
236
        case "bowtie"     => addBowtie(R1, R2, outputBam, deps)
bow's avatar
bow committed
237
        case "gsnap"      => addGsnap(R1, R2, outputBam, deps)
bow's avatar
bow committed
238
239
        // TODO: make TopHat here accept multiple input files
        case "tophat"     => addTophat(R1, R2, outputBam, deps)
240
241
        case "stampy"     => addStampy(R1, R2, outputBam, deps)
        case "star"       => addStar(R1, R2, outputBam, deps)
242
        case "star-2pass" => addStar2pass(R1, R2, outputBam, deps)
243
        case _            => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
244
      }
245
      if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
246
        addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics")).functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
247
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
248
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
249
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
250
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
251
      addSummaryQScript(flexiprep)
Peter van 't Hof's avatar
Peter van 't Hof committed
252
    }
bow's avatar
bow committed
253

254
255
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
256
      bamFile = new File(outputDir, outputName + ".dedup.bam")
257
258
259
      val md = MarkDuplicates(this, bamFiles, bamFile)
      add(md)
      addSummarizable(md, "mark_duplicates")
260
261
262
263
264
    } else if (skipMarkduplicates && chunking) {
      val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir)
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
265

Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
268
269
270
    if (!skipMetrics) {
      val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"))
      addAll(bamMetrics.functions)
      addSummaryQScript(bamMetrics)
    }
271

Peter van 't Hof's avatar
Peter van 't Hof committed
272
273
    add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai")))
    add(Ln(this, bamFile, finalBamFile))
274
    outputFiles += ("finalBamFile" -> finalBamFile.getAbsoluteFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
275
276
277

    if (config("generate_wig", default = false).asBoolean)
      addAll(Bam2Wig(this, finalBamFile).functions)
278

Peter van 't Hof's avatar
Peter van 't Hof committed
279
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
280
  }
bow's avatar
bow committed
281

Peter van 't Hof's avatar
Peter van 't Hof committed
282
  /** Add bwa aln jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
283
  def addBwaAln(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
284
285
286
287
    val bwaAlnR1 = new BwaAln(this)
    bwaAlnR1.fastq = R1
    bwaAlnR1.deps = deps
    bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
288
    bwaAlnR1.isIntermediate = true
289
290
291
    add(bwaAlnR1)

    val samFile: File = if (paired) {
292
      val bwaAlnR2 = new BwaAln(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
293
      bwaAlnR2.fastq = R2.get
294
295
      bwaAlnR2.deps = deps
      bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
296
      bwaAlnR2.isIntermediate = true
297
298
299
300
      add(bwaAlnR2)

      val bwaSampe = new BwaSampe(this)
      bwaSampe.fastqR1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
301
      bwaSampe.fastqR2 = R2.get
302
303
      bwaSampe.saiR1 = bwaAlnR1.output
      bwaSampe.saiR2 = bwaAlnR2.output
304
      bwaSampe.r = getReadGroupBwa
305
      bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
306
      bwaSampe.isIntermediate = true
307
308
309
310
311
312
313
      add(bwaSampe)

      bwaSampe.output
    } else {
      val bwaSamse = new BwaSamse(this)
      bwaSamse.fastq = R1
      bwaSamse.sai = bwaAlnR1.output
314
      bwaSamse.r = getReadGroupBwa
315
      bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
316
      bwaSamse.isIntermediate = true
317
318
319
320
321
322
323
324
      add(bwaSamse)

      bwaSamse.output
    }

    val sortSam = SortSam(this, samFile, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
Peter van 't Hof's avatar
Peter van 't Hof committed
325
    sortSam.output
326
327
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
328
  /** Adds bwa mem jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
329
  def addBwaMem(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
330
    val bwaCommand = new BwaMem(this)
331
    bwaCommand.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
332
    if (paired) bwaCommand.R2 = R2.get
333
    bwaCommand.deps = deps
334
    bwaCommand.R = Some(getReadGroupBwa)
335
    bwaCommand.output = swapExt(output.getParent, output, ".bam", ".sam")
336
337
    bwaCommand.isIntermediate = true
    add(bwaCommand)
338
339
340
    val sortSam = SortSam(this, bwaCommand.output, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
341
    sortSam.output
342
  }
343

Peter van 't Hof's avatar
Peter van 't Hof committed
344
  def addGsnap(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
bow's avatar
bow committed
345
    val gsnapCommand = new Gsnap(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
346
    gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
bow's avatar
bow committed
347
348
349
350
351
    gsnapCommand.deps = deps
    gsnapCommand.output = swapExt(output.getParent, output, ".bam", ".sam")
    gsnapCommand.isIntermediate = true
    add(gsnapCommand)

352
    val reorderSam = new ReorderSam(this)
353
    reorderSam.input = gsnapCommand.output
354
355
356
357
    reorderSam.output = swapExt(output.getParent, output, ".sorted.bam", ".reordered.bam")
    add(reorderSam)

    addAddOrReplaceReadGroups(reorderSam.output, output)
bow's avatar
bow committed
358
359
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
360
  def addTophat(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
bow's avatar
bow committed
361
362
363
    // TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
    val tophat = new Tophat(this)
    tophat.R1 = tophat.R1 :+ R1
Peter van 't Hof's avatar
Peter van 't Hof committed
364
    if (paired) tophat.R2 = tophat.R2 :+ R2.get
bow's avatar
bow committed
365
366
367
368
    tophat.output_dir = new File(outputDir, "tophat_out")
    tophat.deps = deps
    // always output BAM
    tophat.no_convert_bam = false
369
370
    // and always keep input ordering
    tophat.keep_fasta_order = true
bow's avatar
bow committed
371
372
    add(tophat)

373
374
375
376
    // fix unmapped file coordinates
    val fixedUnmapped = new File(tophat.output_dir, "unmapped_fixup.sam")
    val fixer = new TophatRecondition(this)
    fixer.inputBam = tophat.outputAcceptedHits
bow's avatar
bow committed
377
    fixer.outputSam = fixedUnmapped.getAbsoluteFile
378
379
380
381
    fixer.isIntermediate = true
    add(fixer)

    // sort fixed SAM file
bow's avatar
bow committed
382
    val sorter = SortSam(this, fixer.outputSam, new File(tophat.output_dir, "unmapped_fixup.sorted.bam"))
383
384
385
    sorter.sortOrder = "coordinate"
    sorter.isIntermediate = true
    add(sorter)
bow's avatar
bow committed
386

387
388
389
390
391
392
393
394
395
396
397
398
399
400
    // merge with mapped file
    val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output),
      tophat.output_dir, "coordinate")
    mergeSamFile.createIndex = true
    mergeSamFile.isIntermediate = true
    add(mergeSamFile)

    // make sure header coordinates are correct
    val reorderSam = new ReorderSam(this)
    reorderSam.input = mergeSamFile.output
    reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
    add(reorderSam)

    addAddOrReplaceReadGroups(reorderSam.output, output)
bow's avatar
bow committed
401
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
402
  /** Adds stampy jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
403
  def addStampy(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
404

Peter van 't Hof's avatar
Peter van 't Hof committed
405
    var RG: String = "ID:" + readgroupId + ","
406
407
    RG += "SM:" + sampleId.get + ","
    RG += "LB:" + libId.get + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
408
409
410
411
412
413
    if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
    RG += "PU:" + platformUnit + ","
    if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
    if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
    RG += "PL:" + platform
414
415
416

    val stampyCmd = new Stampy(this)
    stampyCmd.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
417
    if (paired) stampyCmd.R2 = R2.get
418
419
420
421
    stampyCmd.deps = deps
    stampyCmd.readgroup = RG
    stampyCmd.sanger = true
    stampyCmd.output = this.swapExt(output.getParent, output, ".bam", ".sam")
422
423
    stampyCmd.isIntermediate = true
    add(stampyCmd)
424
425
426
    val sortSam = SortSam(this, stampyCmd.output, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
427
    sortSam.output
428
  }
429

Peter van 't Hof's avatar
Peter van 't Hof committed
430
  /** Adds bowtie jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
431
  def addBowtie(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
432
    val bowtie = new Bowtie(this)
433
    bowtie.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
434
    if (paired) bowtie.R2 = R2
435
436
    bowtie.deps = deps
    bowtie.output = this.swapExt(output.getParent, output, ".bam", ".sam")
437
438
    bowtie.isIntermediate = true
    add(bowtie)
Peter van 't Hof's avatar
Peter van 't Hof committed
439
    addAddOrReplaceReadGroups(bowtie.output, output)
440
  }
441

Peter van 't Hof's avatar
Peter van 't Hof committed
442
  /** Adds Star jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
443
444
  def addStar(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
    val starCommand = Star(this, R1, R2, outputDir, isIntermediate = true, deps = deps)
445
    add(starCommand)
446
    addAddOrReplaceReadGroups(starCommand.outputSam, output)
447
  }
448

Peter van 't Hof's avatar
Peter van 't Hof committed
449
  /** Adds Start 2 pass jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
450
451
  def addStar2pass(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
    val starCommand = Star._2pass(this, R1, R2, outputDir, isIntermediate = true, deps = deps)
452
    addAll(starCommand._2)
453
    addAddOrReplaceReadGroups(starCommand._1, output)
454
  }
455

Peter van 't Hof's avatar
Peter van 't Hof committed
456
  /** Adds AddOrReplaceReadGroups */
457
458
  def addAddOrReplaceReadGroups(input: File, output: File): File = {
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
459
460
    addOrReplaceReadGroups.createIndex = true

Peter van 't Hof's avatar
Peter van 't Hof committed
461
    addOrReplaceReadGroups.RGID = readgroupId
462
    addOrReplaceReadGroups.RGLB = libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
463
464
    addOrReplaceReadGroups.RGPL = platform
    addOrReplaceReadGroups.RGPU = platformUnit
465
    addOrReplaceReadGroups.RGSM = sampleId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
466
467
    if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
    if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
Peter van 't Hof's avatar
Peter van 't Hof committed
468
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
469
    add(addOrReplaceReadGroups)
bow's avatar
bow committed
470

471
    addOrReplaceReadGroups.output
Peter van 't Hof's avatar
Peter van 't Hof committed
472
  }
bow's avatar
bow committed
473

Peter van 't Hof's avatar
Peter van 't Hof committed
474
  /** Returns readgroup for bwa */
Peter van 't Hof's avatar
Peter van 't Hof committed
475
  def getReadGroupBwa: String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
476
    var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
477
    RG += "LB:" + libId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
478
479
    RG += "PL:" + platform + "\\t"
    RG += "PU:" + platformUnit + "\\t"
480
    RG += "SM:" + sampleId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
481
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
482
483
484
    if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
    if (readgroupDate != null) RG += "DT:" + readgroupDate + "\\t"
    if (predictedInsertsize.isDefined) RG += "PI:" + predictedInsertsize.get + "\\t"
bow's avatar
bow committed
485

486
    RG.substring(0, RG.lastIndexOf("\\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
487
  }
488
489
490
491

  //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
  /**
   * Extracts file if file is compressed
Peter van 't Hof's avatar
Peter van 't Hof committed
492
493
   * @param file input file
   * @param runDir directory to extract when needed
494
495
496
   * @return returns extracted file
   */
  def extractIfNeeded(file: File, runDir: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
497
498
    if (file == null) file
    else if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
499
      var newFile: File = swapExt(runDir, file, ".gz", "")
Peter van 't Hof's avatar
Peter van 't Hof committed
500
      if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
501
502
503
      val zcatCommand = Zcat(this, file, newFile)
      zcatCommand.isIntermediate = true
      add(zcatCommand)
Peter van 't Hof's avatar
Peter van 't Hof committed
504
505
      newFile
    } else if (file.getName.endsWith(".bz2")) {
506
507
508
509
      val newFile = swapExt(runDir, file, ".bz2", "")
      val pbzip2 = Pbzip2(this, file, newFile)
      pbzip2.isIntermediate = true
      add(pbzip2)
Peter van 't Hof's avatar
Peter van 't Hof committed
510
511
      newFile
    } else file
512
513
  }

514
515
}

516
object Mapping extends PipelineCommand