Mapping.scala 20.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
package nl.lumc.sasc.biopet.pipelines.mapping

18
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import java.util.Date
20

21
import nl.lumc.sasc.biopet.core._
22
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse }
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam }
25
import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter
26
import nl.lumc.sasc.biopet.extensions._
27
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
30
import nl.lumc.sasc.biopet.pipelines.gears.Gears
Peter van 't Hof's avatar
Peter van 't Hof committed
31
import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition
32
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
35
import org.broadinstitute.gatk.queue.QScript

import scala.math._
36

37
// TODO: documentation
38
class Mapping(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  def this() = this(null)
bow's avatar
bow committed
41
42

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var input_R1: File = _
bow's avatar
bow committed
44
45

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
46
  var input_R2: Option[File] = None
bow's avatar
bow committed
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  /** Output name */
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  var outputName: String = _
bow's avatar
bow committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
52
  /** Skip flexiprep */
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
bow's avatar
bow committed
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
  /** Skip mark duplicates */
  protected var skipMarkduplicates: Boolean = config("skip_markduplicates", default = false)
bow's avatar
bow committed
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
  /** Skip metrics */
  protected var skipMetrics: Boolean = config("skip_metrics", default = false)
59

Peter van 't Hof's avatar
Peter van 't Hof committed
60
  /** Aligner */
Peter van 't Hof's avatar
Peter van 't Hof committed
61
  protected var aligner: String = config("aligner", default = "bwa-mem")
bow's avatar
bow committed
62

Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
  /** Number of chunks, when not defined pipeline will automatic calculate number of chunks */
  protected var numberChunks: Option[Int] = config("number_chunks")
65

Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
  /** Enable chunking */
  protected var chunking: Boolean = config("chunking", numberChunks.getOrElse(1) > 1)
bow's avatar
bow committed
68

69
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
  /** Readgroup ID */
  protected var readgroupId: String = _
bow's avatar
bow committed
72

73
74
  // TODO: hide sampleId and libId from the command line so they do not interfere with our config values

Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
  /** Readgroup Platform */
  protected var platform: String = config("platform", default = "illumina")
bow's avatar
bow committed
77

Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
  /** Readgroup platform unit */
  protected var platformUnit: String = config("platform_unit", default = "na")
bow's avatar
bow committed
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
  /** Readgroup sequencing center */
  protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
bow's avatar
bow committed
83

Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
  /** Readgroup description */
  protected var readgroupDescription: Option[String] = config("readgroup_description")
bow's avatar
bow committed
86

Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
  /** Readgroup sequencing date */
  protected var readgroupDate: Date = _
bow's avatar
bow committed
89

Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
  /** Readgroup predicted insert size */
  protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
bow's avatar
bow committed
92

Peter van 't Hof's avatar
Peter van 't Hof committed
93
  protected var paired: Boolean = false
94
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
95
  def finalBamFile: File = new File(outputDir, outputName + ".final.bam")
96

Peter van 't Hof's avatar
Peter van 't Hof committed
97
  /** location of summary file */
98
99
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")

100
101
102
103
104
  override def defaults = Map("gsnap" -> Map("batch" -> 4))

  override def fixedValues = Map(
    "gsnap" -> Map("format" -> "sam"),
    "bowtie" -> Map("sam" -> true)
Peter van 't Hof's avatar
Peter van 't Hof committed
105
  )
106

Peter van 't Hof's avatar
Peter van 't Hof committed
107
  /** File to add to the summary */
108
109
  def summaryFiles: Map[String, File] = Map("output_bamfile" -> finalBamFile, "input_R1" -> input_R1,
    "reference" -> referenceFasta()) ++
110
    (if (input_R2.isDefined) Map("input_R2" -> input_R2.get) else Map())
111

Peter van 't Hof's avatar
Peter van 't Hof committed
112
  /** Settings to add to summary */
113
114
115
116
117
118
  def summarySettings = Map(
    "skip_metrics" -> skipMetrics,
    "skip_flexiprep" -> skipFlexiprep,
    "skip_markduplicates" -> skipMarkduplicates,
    "aligner" -> aligner,
    "chunking" -> chunking,
119
    "numberChunks" -> (if (chunking) numberChunks.getOrElse(1) else None)
120
  ) ++ (if (root == null) Map("reference" -> referenceSummary) else Map())
121

122
123
124
125
126
127
128
129
130
131
  override def reportClass = {
    val mappingReport = new MappingReport(this)
    mappingReport.outputDir = new File(outputDir, "report")
    mappingReport.summaryFile = summaryFile
    mappingReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(mappingReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
132
  /** Will be executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
133
  def init() {
134
135
    require(outputDir != null, "Missing output directory on mapping module")
    require(input_R1 != null, "Missing output directory on mapping module")
136
137
    require(sampleId.isDefined, "Missing sample ID on mapping module")
    require(libId.isDefined, "Missing library ID on mapping module")
138

Peter van 't Hof's avatar
Peter van 't Hof committed
139
140
    inputFiles :+= new InputFile(input_R1)
    input_R2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
141

Peter van 't Hof's avatar
Peter van 't Hof committed
142
    paired = input_R2.isDefined
143

Peter van 't Hof's avatar
Peter van 't Hof committed
144
145
    if (readgroupId == null)
      readgroupId = config("readgroup_id", default = sampleId.get + "-" + libId.get)
bow's avatar
bow committed
146

Peter van 't Hof's avatar
Peter van 't Hof committed
147
    if (outputName == null) outputName = readgroupId
bow's avatar
bow committed
148

Peter van 't Hof's avatar
Peter van 't Hof committed
149
    if (chunking) {
150
      if (numberChunks.isEmpty) {
151
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
152
        else {
153
          val chunkSize: Int = config("chunksize", 1 << 30)
154
          val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
155
          else input_R1.length
156
157
158
159
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
160
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
161
  }
bow's avatar
bow committed
162

Peter van 't Hof's avatar
Peter van 't Hof committed
163
  /** Adds all jobs of the pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
164
  def biopetScript() {
165
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
166
      flexiprep.outputDir = new File(outputDir, "flexiprep")
Peter van 't Hof's avatar
Peter van 't Hof committed
167
      flexiprep.input_R1 = input_R1
Peter van 't Hof's avatar
Peter van 't Hof committed
168
      flexiprep.input_R2 = input_R2
Peter van 't Hof's avatar
Peter van 't Hof committed
169
      flexiprep.sampleId = this.sampleId
170
      flexiprep.libId = this.libId
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
      flexiprep.init()
      flexiprep.runInitialJobs()
173
    }
bow's avatar
bow committed
174
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
175
176
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
177

Peter van 't Hof's avatar
Peter van 't Hof committed
178
    val chunks: Map[File, (File, Option[File])] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
181
182
183
184
      if (chunking) (for (t <- 1 to numberChunks.getOrElse(1)) yield {
        val chunkDir = new File(outputDir, "chunks" + File.separator + t)
        chunkDir -> (new File(chunkDir, input_R1.getName),
          if (paired) Some(new File(chunkDir, input_R2.get.getName)) else None)
      }).toMap
      else if (skipFlexiprep) Map(outputDir -> (input_R1, if (paired) input_R2 else None))
Peter van 't Hof's avatar
Peter van 't Hof committed
185
      else Map(outputDir -> (flexiprep.input_R1, flexiprep.input_R2))
186
    }
bow's avatar
bow committed
187

Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
190
      fastSplitter_R1.input = input_R1
Peter van 't Hof's avatar
Peter van 't Hof committed
191
      for ((chunkDir, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
192
193
      fastSplitter_R1.isIntermediate = true
      add(fastSplitter_R1)
bow's avatar
bow committed
194

Peter van 't Hof's avatar
Peter van 't Hof committed
195
196
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
197
        fastSplitter_R2.input = input_R2.get
Peter van 't Hof's avatar
Peter van 't Hof committed
198
        for ((chunkDir, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2.get
199
200
        fastSplitter_R2.isIntermediate = true
        add(fastSplitter_R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
      }
    }
bow's avatar
bow committed
203

Peter van 't Hof's avatar
Peter van 't Hof committed
204
    for ((chunkDir, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
207
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
208
        val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
209
        logger.debug(chunkDir + " - " + flexiout)
Peter van 't Hof's avatar
Peter van 't Hof committed
210
211
212
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
        fastq_R1_output :+= R1
Peter van 't Hof's avatar
Peter van 't Hof committed
213
        R2.foreach(R2 => fastq_R2_output :+= R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
214
      }
215

Peter van 't Hof's avatar
Peter van 't Hof committed
216
      val outputBam = new File(chunkDir, outputName + ".bam")
217
218
      bamFiles :+= outputBam
      aligner match {
Peter van 't Hof's avatar
Peter van 't Hof committed
219
220
221
        case "bwa-mem"    => addBwaMem(R1, R2, outputBam)
        case "bwa-aln"    => addBwaAln(R1, R2, outputBam)
        case "bowtie"     => addBowtie(R1, R2, outputBam)
Wai Yi Leung's avatar
Wai Yi Leung committed
222
        case "bowtie2"    => addBowtie2(R1, R2, outputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
223
        case "gsnap"      => addGsnap(R1, R2, outputBam)
bow's avatar
bow committed
224
        // TODO: make TopHat here accept multiple input files
Peter van 't Hof's avatar
Peter van 't Hof committed
225
226
227
228
        case "tophat"     => addTophat(R1, R2, outputBam)
        case "stampy"     => addStampy(R1, R2, outputBam)
        case "star"       => addStar(R1, R2, outputBam)
        case "star-2pass" => addStar2pass(R1, R2, outputBam)
229
        case _            => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
230
      }
231
      if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
232
        addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics"), sampleId, libId).functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
233
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
234
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
235
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
236
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
237
      addSummaryQScript(flexiprep)
Peter van 't Hof's avatar
Peter van 't Hof committed
238
    }
bow's avatar
bow committed
239

240
241
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
242
      bamFile = new File(outputDir, outputName + ".dedup.bam")
243
244
245
      val md = MarkDuplicates(this, bamFiles, bamFile)
      add(md)
      addSummarizable(md, "mark_duplicates")
246
247
248
249
250
    } else if (skipMarkduplicates && chunking) {
      val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir)
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
251

Peter van 't Hof's avatar
Peter van 't Hof committed
252
    if (!skipMetrics) {
Peter van 't Hof's avatar
Peter van 't Hof committed
253
      val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"), sampleId, libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
254
255
256
      addAll(bamMetrics.functions)
      addSummaryQScript(bamMetrics)
    }
257

Peter van 't Hof's avatar
Peter van 't Hof committed
258
259
    add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai")))
    add(Ln(this, bamFile, finalBamFile))
260
    outputFiles += ("finalBamFile" -> finalBamFile.getAbsoluteFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
261

262
263
264
265
266
267
268
    if (config("unmapped_to_gears", default = false).asBoolean) {
      val gears = new Gears(this)
      gears.bamFile = Some(finalBamFile)
      gears.outputDir = new File(outputDir, "gears")
      gears.init()
      gears.biopetScript()
      addAll(gears.functions)
269
      addSummaryQScript(gears)
270
271
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
272
273
    if (config("generate_wig", default = false).asBoolean)
      addAll(Bam2Wig(this, finalBamFile).functions)
274

Peter van 't Hof's avatar
Peter van 't Hof committed
275
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
276
  }
bow's avatar
bow committed
277

Peter van 't Hof's avatar
Peter van 't Hof committed
278
  /** Add bwa aln jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
279
  def addBwaAln(R1: File, R2: Option[File], output: File): File = {
280
281
282
    val bwaAlnR1 = new BwaAln(this)
    bwaAlnR1.fastq = R1
    bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
283
    bwaAlnR1.isIntermediate = true
284
285
286
    add(bwaAlnR1)

    val samFile: File = if (paired) {
287
      val bwaAlnR2 = new BwaAln(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
288
      bwaAlnR2.fastq = R2.get
289
      bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
290
      bwaAlnR2.isIntermediate = true
291
292
293
294
      add(bwaAlnR2)

      val bwaSampe = new BwaSampe(this)
      bwaSampe.fastqR1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
295
      bwaSampe.fastqR2 = R2.get
296
297
      bwaSampe.saiR1 = bwaAlnR1.output
      bwaSampe.saiR2 = bwaAlnR2.output
298
      bwaSampe.r = getReadGroupBwa
299
      bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
300
      bwaSampe.isIntermediate = true
301
302
303
304
305
306
307
      add(bwaSampe)

      bwaSampe.output
    } else {
      val bwaSamse = new BwaSamse(this)
      bwaSamse.fastq = R1
      bwaSamse.sai = bwaAlnR1.output
308
      bwaSamse.r = getReadGroupBwa
309
      bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
310
      bwaSamse.isIntermediate = true
311
312
313
314
315
316
317
318
      add(bwaSamse)

      bwaSamse.output
    }

    val sortSam = SortSam(this, samFile, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
Peter van 't Hof's avatar
Peter van 't Hof committed
319
    sortSam.output
320
321
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
322
  /** Adds bwa mem jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
323
  def addBwaMem(R1: File, R2: Option[File], output: File): File = {
324
    val bwaCommand = new BwaMem(this)
325
    bwaCommand.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
326
    if (paired) bwaCommand.R2 = R2.get
327
    bwaCommand.R = Some(getReadGroupBwa)
328
329
    val sortSam = new SortSam(this)
    sortSam.output = output
330
331
332
333
    val pipe = bwaCommand | sortSam
    pipe.isIntermediate = chunking || !skipMarkduplicates
    pipe.threadsCorrection = -1
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
334
    output
335
  }
336

Peter van 't Hof's avatar
Peter van 't Hof committed
337
  def addGsnap(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
338
    val gsnapCommand = new Gsnap(this)
339
    gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
340
    gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam")
bow's avatar
bow committed
341

342
    val reorderSam = new ReorderSam(this)
343
    reorderSam.input = gsnapCommand.output
Peter van 't Hof's avatar
Peter van 't Hof committed
344
    reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam")
345

Peter van 't Hof's avatar
Peter van 't Hof committed
346
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
347
348
    val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil)
    pipe.threadsCorrection = -2
Peter van 't Hof's avatar
Peter van 't Hof committed
349
350
    add(pipe)
    ar._2
bow's avatar
bow committed
351
352
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
353
  def addTophat(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
354
355
356
    // TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
    val tophat = new Tophat(this)
    tophat.R1 = tophat.R1 :+ R1
Peter van 't Hof's avatar
Peter van 't Hof committed
357
    if (paired) tophat.R2 = tophat.R2 :+ R2.get
bow's avatar
bow committed
358
359
360
    tophat.output_dir = new File(outputDir, "tophat_out")
    // always output BAM
    tophat.no_convert_bam = false
361
362
    // and always keep input ordering
    tophat.keep_fasta_order = true
bow's avatar
bow committed
363
364
    add(tophat)

365
366
367
368
    // fix unmapped file coordinates
    val fixedUnmapped = new File(tophat.output_dir, "unmapped_fixup.sam")
    val fixer = new TophatRecondition(this)
    fixer.inputBam = tophat.outputAcceptedHits
bow's avatar
bow committed
369
    fixer.outputSam = fixedUnmapped.getAbsoluteFile
370
371
372
373
    fixer.isIntermediate = true
    add(fixer)

    // sort fixed SAM file
bow's avatar
bow committed
374
    val sorter = SortSam(this, fixer.outputSam, new File(tophat.output_dir, "unmapped_fixup.sorted.bam"))
375
376
377
    sorter.sortOrder = "coordinate"
    sorter.isIntermediate = true
    add(sorter)
bow's avatar
bow committed
378

379
380
381
382
383
384
385
386
387
388
389
390
391
    // merge with mapped file
    val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output),
      tophat.output_dir, "coordinate")
    mergeSamFile.createIndex = true
    mergeSamFile.isIntermediate = true
    add(mergeSamFile)

    // make sure header coordinates are correct
    val reorderSam = new ReorderSam(this)
    reorderSam.input = mergeSamFile.output
    reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
    add(reorderSam)

Peter van 't Hof's avatar
Peter van 't Hof committed
392
393
394
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
    add(ar._1)
    ar._2
bow's avatar
bow committed
395
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
396
  /** Adds stampy jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
397
  def addStampy(R1: File, R2: Option[File], output: File): File = {
398

Peter van 't Hof's avatar
Peter van 't Hof committed
399
    var RG: String = "ID:" + readgroupId + ","
400
401
    RG += "SM:" + sampleId.get + ","
    RG += "LB:" + libId.get + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
402
403
404
405
406
407
    if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
    RG += "PU:" + platformUnit + ","
    if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
    if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
    RG += "PL:" + platform
408
409
410

    val stampyCmd = new Stampy(this)
    stampyCmd.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
411
    if (paired) stampyCmd.R2 = R2.get
412
413
    stampyCmd.readgroup = RG
    stampyCmd.sanger = true
414
    stampyCmd.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
415
416
    stampyCmd.isIntermediate = true
    add(stampyCmd)
417
418
419
    val sortSam = SortSam(this, stampyCmd.output, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
420
    sortSam.output
421
  }
422

Peter van 't Hof's avatar
Peter van 't Hof committed
423
  /** Adds bowtie jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
424
  def addBowtie(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
425
426
427
428
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))
429
    val bowtie = new Bowtie(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
430
431
432
    bowtie.R1 = zcatR1._2
    if (paired) bowtie.R2 = Some(zcatR2.get._2)
    bowtie.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
433
    bowtie.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
434
435
    val ar = addAddOrReplaceReadGroups(bowtie.output, output)
    val pipe = new BiopetFifoPipe(this, (Some(bowtie) :: Some(ar._1) :: Nil).flatten)
436
    pipe.threadsCorrection = -1
Peter van 't Hof's avatar
Peter van 't Hof committed
437
438
    add(pipe)
    ar._2
439
  }
440

441
442
443
  /** Add bowtie2 jobs **/
  def addBowtie2(R1: File, R2: Option[File], output: File): File = {
    val bowtie2 = new Bowtie2(this)
Wai Yi Leung's avatar
Wai Yi Leung committed
444
445
446
447
448
    bowtie2.rg_id = Some(readgroupId)
    bowtie2.rg +:= ("LB:" + libId.get)
    bowtie2.rg +:= ("PL:" + platform)
    bowtie2.rg +:= ("PU:" + platformUnit)
    bowtie2.rg +:= ("SM:" + sampleId.get)
449
    bowtie2.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
450
    bowtie2.R2 = R2
451
452
453
454
455
456
457
458
459
    val sortSam = new SortSam(this)
    sortSam.output = output
    val pipe = bowtie2 | sortSam
    pipe.isIntermediate = chunking || !skipMarkduplicates
    pipe.threadsCorrection = -1
    add(pipe)
    output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
460
  /** Adds Star jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
461
  def addStar(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
462
463
464
465
466
467
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    val starCommand = Star(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
    val ar = addAddOrReplaceReadGroups(starCommand.outputSam, output)
    val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: (if (paired) zcatR2.get._1 else None) ::
      Some(starCommand) :: Some(ar._1) :: Nil).flatten)
468
469
470
    pipe.threadsCorrection = -1
    zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
    zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
Peter van 't Hof's avatar
Peter van 't Hof committed
471
472
    add(pipe)
    ar._2
473
  }
474

Peter van 't Hof's avatar
Peter van 't Hof committed
475
  /** Adds Start 2 pass jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
476
  def addStar2pass(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
477
478
479
480
481
482
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))

    val starCommand = Star._2pass(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
483
    addAll(starCommand._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
484
485
486
    val ar = addAddOrReplaceReadGroups(starCommand._1, output)
    add(ar._1)
    ar._2
487
  }
488

Peter van 't Hof's avatar
Peter van 't Hof committed
489
  /** Adds AddOrReplaceReadGroups */
Peter van 't Hof's avatar
Peter van 't Hof committed
490
  def addAddOrReplaceReadGroups(input: File, output: File): (AddOrReplaceReadGroups, File) = {
491
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
492
493
    addOrReplaceReadGroups.createIndex = true

Peter van 't Hof's avatar
Peter van 't Hof committed
494
    addOrReplaceReadGroups.RGID = readgroupId
495
    addOrReplaceReadGroups.RGLB = libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
496
497
    addOrReplaceReadGroups.RGPL = platform
    addOrReplaceReadGroups.RGPU = platformUnit
498
    addOrReplaceReadGroups.RGSM = sampleId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
499
500
    if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
    if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
Peter van 't Hof's avatar
Peter van 't Hof committed
501
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
bow's avatar
bow committed
502

Peter van 't Hof's avatar
Peter van 't Hof committed
503
    (addOrReplaceReadGroups, addOrReplaceReadGroups.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
504
  }
bow's avatar
bow committed
505

Peter van 't Hof's avatar
Peter van 't Hof committed
506
  /** Returns readgroup for bwa */
Peter van 't Hof's avatar
Peter van 't Hof committed
507
  def getReadGroupBwa: String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
508
    var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
509
    RG += "LB:" + libId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
510
511
    RG += "PL:" + platform + "\\t"
    RG += "PU:" + platformUnit + "\\t"
512
    RG += "SM:" + sampleId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
513
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
514
515
516
    if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
    if (readgroupDate != null) RG += "DT:" + readgroupDate + "\\t"
    if (predictedInsertsize.isDefined) RG += "PI:" + predictedInsertsize.get + "\\t"
bow's avatar
bow committed
517

518
    RG.substring(0, RG.lastIndexOf("\\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
519
  }
520
521
522
523

  //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
  /**
   * Extracts file if file is compressed
Peter van 't Hof's avatar
Peter van 't Hof committed
524
525
   * @param file input file
   * @param runDir directory to extract when needed
526
527
   * @return returns extracted file
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
528
529
530
  def extractIfNeeded(file: File, runDir: File): (Option[BiopetCommandLineFunction], File) = {
    require(file != null)
    if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
531
      var newFile: File = swapExt(runDir, file, ".gz", "")
Peter van 't Hof's avatar
Peter van 't Hof committed
532
      if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
533
      val zcatCommand = Zcat(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
534
      (Some(zcatCommand), newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
535
    } else if (file.getName.endsWith(".bz2")) {
536
537
      val newFile = swapExt(runDir, file, ".bz2", "")
      val pbzip2 = Pbzip2(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
538
539
      (Some(pbzip2), newFile)
    } else (None, file)
540
541
  }

542
543
}

544
object Mapping extends PipelineCommand