Mapping.scala 19.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
package nl.lumc.sasc.biopet.pipelines.mapping

import java.util.Date
19
import java.io.File
20
21
import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition

22
23
24
25
import scala.math._

import org.broadinstitute.gatk.queue.QScript

26
import nl.lumc.sasc.biopet.core._
27
import nl.lumc.sasc.biopet.core.config.Configurable
28
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
29
import nl.lumc.sasc.biopet.extensions._
30
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaSamse, BwaSampe, BwaAln, BwaMem }
bow's avatar
bow committed
31
import nl.lumc.sasc.biopet.extensions.{ Gsnap, Tophat }
Peter van 't Hof's avatar
Peter van 't Hof committed
32
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
33
import nl.lumc.sasc.biopet.tools.FastqSplitter
34
import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, SortSam, MergeSamFiles, AddOrReplaceReadGroups, ReorderSam }
35
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
36
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
37

38
// TODO: documentation
39
class Mapping(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
40

Peter van 't Hof's avatar
Peter van 't Hof committed
41
  def this() = this(null)
bow's avatar
bow committed
42
43

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
44
  var input_R1: File = _
bow's avatar
bow committed
45
46

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
47
  var input_R2: Option[File] = None
bow's avatar
bow committed
48

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  /** Output name */
Peter van 't Hof's avatar
Peter van 't Hof committed
50
  var outputName: String = _
bow's avatar
bow committed
51

Peter van 't Hof's avatar
Peter van 't Hof committed
52
53
  /** Skip flexiprep */
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
bow's avatar
bow committed
54

Peter van 't Hof's avatar
Peter van 't Hof committed
55
56
  /** Skip mark duplicates */
  protected var skipMarkduplicates: Boolean = config("skip_markduplicates", default = false)
bow's avatar
bow committed
57

Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
  /** Skip metrics */
  protected var skipMetrics: Boolean = config("skip_metrics", default = false)
60

Peter van 't Hof's avatar
Peter van 't Hof committed
61
  /** Aligner */
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  protected var aligner: String = config("aligner", default = "bwa-mem")
bow's avatar
bow committed
63

Peter van 't Hof's avatar
Peter van 't Hof committed
64
65
  /** Number of chunks, when not defined pipeline will automatic calculate number of chunks */
  protected var numberChunks: Option[Int] = config("number_chunks")
66

Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
  /** Enable chunking */
  protected var chunking: Boolean = config("chunking", numberChunks.getOrElse(1) > 1)
bow's avatar
bow committed
69

70
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
  /** Readgroup ID */
  protected var readgroupId: String = _
bow's avatar
bow committed
73

74
75
  // TODO: hide sampleId and libId from the command line so they do not interfere with our config values

Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
  /** Readgroup Platform */
  protected var platform: String = config("platform", default = "illumina")
bow's avatar
bow committed
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
  /** Readgroup platform unit */
  protected var platformUnit: String = config("platform_unit", default = "na")
bow's avatar
bow committed
81

Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
  /** Readgroup sequencing center */
  protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
bow's avatar
bow committed
84

Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
  /** Readgroup description */
  protected var readgroupDescription: Option[String] = config("readgroup_description")
bow's avatar
bow committed
87

Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
  /** Readgroup sequencing date */
  protected var readgroupDate: Date = _
bow's avatar
bow committed
90

Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
  /** Readgroup predicted insert size */
  protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
bow's avatar
bow committed
93

Peter van 't Hof's avatar
Peter van 't Hof committed
94
  protected var paired: Boolean = false
95
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
96
  def finalBamFile: File = new File(outputDir, outputName + ".final.bam")
97

Peter van 't Hof's avatar
Peter van 't Hof committed
98
  /** location of summary file */
99
100
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
101
  /** File to add to the summary */
102
103
  def summaryFiles: Map[String, File] = Map("output_bamfile" -> finalBamFile, "input_R1" -> input_R1,
    "reference" -> referenceFasta()) ++
104
    (if (input_R2.isDefined) Map("input_R2" -> input_R2.get) else Map())
105

Peter van 't Hof's avatar
Peter van 't Hof committed
106
  /** Settings to add to summary */
107
108
109
110
111
112
113
  def summarySettings = Map(
    "skip_metrics" -> skipMetrics,
    "skip_flexiprep" -> skipFlexiprep,
    "skip_markduplicates" -> skipMarkduplicates,
    "aligner" -> aligner,
    "chunking" -> chunking,
    "numberChunks" -> numberChunks.getOrElse(1)
114
  ) ++ (if (root == null) Map("reference" -> referenceSummary) else Map())
115

Peter van 't Hof's avatar
Peter van 't Hof committed
116
  /** Will be executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
117
  def init() {
118
119
    require(outputDir != null, "Missing output directory on mapping module")
    require(input_R1 != null, "Missing output directory on mapping module")
120
121
    require(sampleId.isDefined, "Missing sample ID on mapping module")
    require(libId.isDefined, "Missing library ID on mapping module")
122

Peter van 't Hof's avatar
Peter van 't Hof committed
123
    paired = input_R2.isDefined
124

125
    if (readgroupId == null) readgroupId = sampleId.get + "-" + libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
126
    else if (readgroupId == null) readgroupId = config("readgroup_id")
bow's avatar
bow committed
127

Peter van 't Hof's avatar
Peter van 't Hof committed
128
    if (outputName == null) outputName = readgroupId
bow's avatar
bow committed
129

Peter van 't Hof's avatar
Peter van 't Hof committed
130
    if (chunking) {
131
      if (numberChunks.isEmpty) {
132
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
133
        else {
134
          val chunkSize: Int = config("chunksize", 1 << 30)
135
          val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
136
          else input_R1.length
137
138
139
140
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
141
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
142
  }
bow's avatar
bow committed
143

Peter van 't Hof's avatar
Peter van 't Hof committed
144
  /** Adds all jobs of the pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
145
  def biopetScript() {
146
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
147
      flexiprep.outputDir = new File(outputDir, "flexiprep")
Peter van 't Hof's avatar
Peter van 't Hof committed
148
      flexiprep.input_R1 = input_R1
Peter van 't Hof's avatar
Peter van 't Hof committed
149
      flexiprep.input_R2 = input_R2
Peter van 't Hof's avatar
Peter van 't Hof committed
150
      flexiprep.sampleId = this.sampleId
151
      flexiprep.libId = this.libId
152
153
      flexiprep.init
      flexiprep.runInitialJobs
154
    }
bow's avatar
bow committed
155
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
156
157
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
158

Peter van 't Hof's avatar
Peter van 't Hof committed
159
160
161
162
    def removeGz(file: File): File = {
      val absPath = file.getAbsolutePath
      if (absPath.endsWith(".gz")) new File(absPath.substring(0, absPath.lastIndexOf(".gz")))
      else if (absPath.endsWith(".gzip")) new File(absPath.substring(0, absPath.lastIndexOf(".gzip")))
163
      else file
Peter van 't Hof's avatar
Peter van 't Hof committed
164
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
167
168
169
170
171
172
173
174
175
176
177

    val chunks: Map[File, (File, Option[File])] = {
      if (chunking) {
        (for (t <- 1 to numberChunks.getOrElse(1)) yield {
          val chunkDir = new File(outputDir, "chunks" + File.separator + t)
          (chunkDir -> (removeGz(new File(chunkDir, input_R1.getName)),
            if (paired) Some(removeGz(new File(chunkDir, input_R2.get.getName))) else None))
        }).toMap
      } else if (skipFlexiprep) {
        Map(outputDir -> (
          extractIfNeeded(input_R1, flexiprep.outputDir),
          if (paired) Some(extractIfNeeded(input_R2.get, outputDir)) else None)
        )
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
178
      } else Map(outputDir -> (flexiprep.outputFiles("fastq_input_R1"), flexiprep.outputFiles.get("fastq_input_R2")))
179
    }
bow's avatar
bow committed
180

Peter van 't Hof's avatar
Peter van 't Hof committed
181
182
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
183
      fastSplitter_R1.input = input_R1
Peter van 't Hof's avatar
Peter van 't Hof committed
184
      for ((chunkDir, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
185
186
      fastSplitter_R1.isIntermediate = true
      add(fastSplitter_R1)
bow's avatar
bow committed
187

Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
190
        fastSplitter_R2.input = input_R2.get
Peter van 't Hof's avatar
Peter van 't Hof committed
191
        for ((chunkDir, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2.get
192
193
        fastSplitter_R2.isIntermediate = true
        add(fastSplitter_R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
194
195
      }
    }
bow's avatar
bow committed
196

Peter van 't Hof's avatar
Peter van 't Hof committed
197
    for ((chunkDir, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
198
199
      var R1 = fastqfile._1
      var R2 = fastqfile._2
200
      var deps: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
201
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
202
        val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
203
        logger.debug(chunkDir + " - " + flexiout)
Peter van 't Hof's avatar
Peter van 't Hof committed
204
205
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
206
        deps = flexiout._3
Peter van 't Hof's avatar
Peter van 't Hof committed
207
        fastq_R1_output :+= R1
Peter van 't Hof's avatar
Peter van 't Hof committed
208
        R2.foreach(R2 => fastq_R2_output :+= R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
209
      }
210

Peter van 't Hof's avatar
Peter van 't Hof committed
211
      val outputBam = new File(chunkDir, outputName + ".bam")
212
213
      bamFiles :+= outputBam
      aligner match {
Peter van 't Hof's avatar
Peter van 't Hof committed
214
        case "bwa-mem"    => addBwaMem(R1, R2, outputBam, deps)
215
        case "bwa-aln"    => addBwaAln(R1, R2, outputBam, deps)
216
        case "bowtie"     => addBowtie(R1, R2, outputBam, deps)
bow's avatar
bow committed
217
        case "gsnap"      => addGsnap(R1, R2, outputBam, deps)
bow's avatar
bow committed
218
219
        // TODO: make TopHat here accept multiple input files
        case "tophat"     => addTophat(R1, R2, outputBam, deps)
220
221
        case "stampy"     => addStampy(R1, R2, outputBam, deps)
        case "star"       => addStar(R1, R2, outputBam, deps)
222
        case "star-2pass" => addStar2pass(R1, R2, outputBam, deps)
223
        case _            => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
224
      }
225
      if (config("chunk_metrics", default = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
226
        addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics")).functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
227
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
228
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
229
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
230
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
231
      addSummaryQScript(flexiprep)
Peter van 't Hof's avatar
Peter van 't Hof committed
232
    }
bow's avatar
bow committed
233

234
235
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
236
      bamFile = new File(outputDir, outputName + ".dedup.bam")
237
238
239
      val md = MarkDuplicates(this, bamFiles, bamFile)
      add(md)
      addSummarizable(md, "mark_duplicates")
240
241
242
243
244
    } else if (skipMarkduplicates && chunking) {
      val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir)
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
245

Peter van 't Hof's avatar
Peter van 't Hof committed
246
247
248
249
250
    if (!skipMetrics) {
      val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"))
      addAll(bamMetrics.functions)
      addSummaryQScript(bamMetrics)
    }
251

Peter van 't Hof's avatar
Peter van 't Hof committed
252
253
    add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai")))
    add(Ln(this, bamFile, finalBamFile))
254
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
255
256
257

    if (config("generate_wig", default = false).asBoolean)
      addAll(Bam2Wig(this, finalBamFile).functions)
258
259

    addSummaryJobs
Peter van 't Hof's avatar
Peter van 't Hof committed
260
  }
bow's avatar
bow committed
261

Peter van 't Hof's avatar
Peter van 't Hof committed
262
263
264
265
266
267
268
269
  /**
   * Add bwa aln jobs
   * @param R1
   * @param R2
   * @param output
   * @param deps
   * @return
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
270
  def addBwaAln(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
271
272
273
274
    val bwaAlnR1 = new BwaAln(this)
    bwaAlnR1.fastq = R1
    bwaAlnR1.deps = deps
    bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
275
    bwaAlnR1.isIntermediate = true
276
277
278
    add(bwaAlnR1)

    val samFile: File = if (paired) {
279
      val bwaAlnR2 = new BwaAln(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
280
      bwaAlnR2.fastq = R2.get
281
282
      bwaAlnR2.deps = deps
      bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
283
      bwaAlnR2.isIntermediate = true
284
285
286
287
      add(bwaAlnR2)

      val bwaSampe = new BwaSampe(this)
      bwaSampe.fastqR1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
288
      bwaSampe.fastqR2 = R2.get
289
290
      bwaSampe.saiR1 = bwaAlnR1.output
      bwaSampe.saiR2 = bwaAlnR2.output
291
      bwaSampe.r = getReadGroupBwa
292
      bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
293
      bwaSampe.isIntermediate = true
294
295
296
297
298
299
300
      add(bwaSampe)

      bwaSampe.output
    } else {
      val bwaSamse = new BwaSamse(this)
      bwaSamse.fastq = R1
      bwaSamse.sai = bwaAlnR1.output
301
      bwaSamse.r = getReadGroupBwa
302
      bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
303
      bwaSamse.isIntermediate = true
304
305
306
307
308
309
310
311
312
313
314
      add(bwaSamse)

      bwaSamse.output
    }

    val sortSam = SortSam(this, samFile, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
    return sortSam.output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
315
316
317
318
319
320
321
322
  /**
   * Adds bwa mem jobs
   * @param R1
   * @param R2
   * @param output
   * @param deps
   * @return
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
323
  def addBwaMem(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
324
    val bwaCommand = new BwaMem(this)
325
    bwaCommand.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
326
    if (paired) bwaCommand.R2 = R2.get
327
    bwaCommand.deps = deps
328
    bwaCommand.R = Some(getReadGroupBwa)
329
    bwaCommand.output = swapExt(output.getParent, output, ".bam", ".sam")
330
331
    bwaCommand.isIntermediate = true
    add(bwaCommand)
332
333
334
    val sortSam = SortSam(this, bwaCommand.output, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
335
    sortSam.output
336
  }
337

Peter van 't Hof's avatar
Peter van 't Hof committed
338
  def addGsnap(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
bow's avatar
bow committed
339
    val gsnapCommand = new Gsnap(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
340
    gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
bow's avatar
bow committed
341
342
343
344
345
    gsnapCommand.deps = deps
    gsnapCommand.output = swapExt(output.getParent, output, ".bam", ".sam")
    gsnapCommand.isIntermediate = true
    add(gsnapCommand)

346
    val reorderSam = new ReorderSam(this)
347
    reorderSam.input = gsnapCommand.output
348
349
350
351
    reorderSam.output = swapExt(output.getParent, output, ".sorted.bam", ".reordered.bam")
    add(reorderSam)

    addAddOrReplaceReadGroups(reorderSam.output, output)
bow's avatar
bow committed
352
353
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
354
  def addTophat(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
bow's avatar
bow committed
355
356
357
    // TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
    val tophat = new Tophat(this)
    tophat.R1 = tophat.R1 :+ R1
Peter van 't Hof's avatar
Peter van 't Hof committed
358
    if (paired) tophat.R2 = tophat.R2 :+ R2.get
bow's avatar
bow committed
359
360
361
362
    tophat.output_dir = new File(outputDir, "tophat_out")
    tophat.deps = deps
    // always output BAM
    tophat.no_convert_bam = false
363
364
    // and always keep input ordering
    tophat.keep_fasta_order = true
bow's avatar
bow committed
365
366
    add(tophat)

367
368
369
370
371
372
373
374
375
376
377
378
379
    // fix unmapped file coordinates
    val fixedUnmapped = new File(tophat.output_dir, "unmapped_fixup.sam")
    val fixer = new TophatRecondition(this)
    fixer.inputBam = tophat.outputAcceptedHits
    fixer.outputSam = fixedUnmapped
    fixer.isIntermediate = true
    add(fixer)

    // sort fixed SAM file
    val sorter = SortSam(this, fixer.outputSam, swapExt(fixer.outputSam, ".sam", ".sorted.bam"))
    sorter.sortOrder = "coordinate"
    sorter.isIntermediate = true
    add(sorter)
bow's avatar
bow committed
380

381
382
383
384
385
386
387
388
389
390
391
392
393
394
    // merge with mapped file
    val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output),
      tophat.output_dir, "coordinate")
    mergeSamFile.createIndex = true
    mergeSamFile.isIntermediate = true
    add(mergeSamFile)

    // make sure header coordinates are correct
    val reorderSam = new ReorderSam(this)
    reorderSam.input = mergeSamFile.output
    reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
    add(reorderSam)

    addAddOrReplaceReadGroups(reorderSam.output, output)
bow's avatar
bow committed
395
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
396
397
398
399
400
401
402
403
  /**
   * Adds stampy jobs
   * @param R1
   * @param R2
   * @param output
   * @param deps
   * @return
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
404
  def addStampy(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
405

Peter van 't Hof's avatar
Peter van 't Hof committed
406
    var RG: String = "ID:" + readgroupId + ","
407
408
    RG += "SM:" + sampleId.get + ","
    RG += "LB:" + libId.get + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
409
410
411
412
413
414
    if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
    RG += "PU:" + platformUnit + ","
    if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
    if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
    RG += "PL:" + platform
415
416
417

    val stampyCmd = new Stampy(this)
    stampyCmd.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
418
    if (paired) stampyCmd.R2 = R2.get
419
420
421
422
    stampyCmd.deps = deps
    stampyCmd.readgroup = RG
    stampyCmd.sanger = true
    stampyCmd.output = this.swapExt(output.getParent, output, ".bam", ".sam")
423
424
    stampyCmd.isIntermediate = true
    add(stampyCmd)
425
426
427
    val sortSam = SortSam(this, stampyCmd.output, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
428
    sortSam.output
429
  }
430

Peter van 't Hof's avatar
Peter van 't Hof committed
431
432
433
434
435
436
437
438
  /**
   * Adds bowtie jobs
   * @param R1
   * @param R2
   * @param output
   * @param deps
   * @return
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
439
  def addBowtie(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
440
    val bowtie = new Bowtie(this)
441
    bowtie.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
442
    if (paired) bowtie.R2 = R2
443
444
    bowtie.deps = deps
    bowtie.output = this.swapExt(output.getParent, output, ".bam", ".sam")
445
446
    bowtie.isIntermediate = true
    add(bowtie)
447
    return addAddOrReplaceReadGroups(bowtie.output, output)
448
  }
449

Peter van 't Hof's avatar
Peter van 't Hof committed
450
451
452
453
454
455
456
457
  /**
   * Adds Star jobs
   * @param R1
   * @param R2
   * @param output
   * @param deps
   * @return
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
458
459
  def addStar(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
    val starCommand = Star(this, R1, R2, outputDir, isIntermediate = true, deps = deps)
460
    add(starCommand)
461
    addAddOrReplaceReadGroups(starCommand.outputSam, output)
462
  }
463

Peter van 't Hof's avatar
Peter van 't Hof committed
464
465
466
467
468
469
470
471
  /**
   * Adds Start 2 pass jobs
   * @param R1
   * @param R2
   * @param output
   * @param deps
   * @return
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
472
473
  def addStar2pass(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
    val starCommand = Star._2pass(this, R1, R2, outputDir, isIntermediate = true, deps = deps)
474
    addAll(starCommand._2)
475
    addAddOrReplaceReadGroups(starCommand._1, output)
476
  }
477

Peter van 't Hof's avatar
Peter van 't Hof committed
478
479
480
481
482
483
  /**
   * Adds AddOrReplaceReadGroups
   * @param input
   * @param output
   * @return
   */
484
485
  def addAddOrReplaceReadGroups(input: File, output: File): File = {
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
486
487
    addOrReplaceReadGroups.createIndex = true

Peter van 't Hof's avatar
Peter van 't Hof committed
488
    addOrReplaceReadGroups.RGID = readgroupId
489
    addOrReplaceReadGroups.RGLB = libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
490
491
    addOrReplaceReadGroups.RGPL = platform
    addOrReplaceReadGroups.RGPU = platformUnit
492
    addOrReplaceReadGroups.RGSM = sampleId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
493
494
    if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
    if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
Peter van 't Hof's avatar
Peter van 't Hof committed
495
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
496
    add(addOrReplaceReadGroups)
bow's avatar
bow committed
497

498
    addOrReplaceReadGroups.output
Peter van 't Hof's avatar
Peter van 't Hof committed
499
  }
bow's avatar
bow committed
500

Peter van 't Hof's avatar
Peter van 't Hof committed
501
  /** Returns readgroup for bwa */
502
  def getReadGroupBwa(): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
503
    var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
504
    RG += "LB:" + libId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
505
506
    RG += "PL:" + platform + "\\t"
    RG += "PU:" + platformUnit + "\\t"
507
    RG += "SM:" + sampleId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
508
509
510
511
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
    if (readgroupDescription.isDefined) RG += "DS" + readgroupDescription.get + "\\t"
    if (readgroupDate != null) RG += "DT" + readgroupDate + "\\t"
    if (predictedInsertsize.isDefined) RG += "PI" + predictedInsertsize.get + "\\t"
bow's avatar
bow committed
512

513
    RG.substring(0, RG.lastIndexOf("\\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
514
  }
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540

  //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
  /**
   * Extracts file if file is compressed
   * @param file
   * @param runDir
   * @return returns extracted file
   */
  def extractIfNeeded(file: File, runDir: File): File = {
    if (file == null) return file
    else if (file.getName().endsWith(".gz") || file.getName().endsWith(".gzip")) {
      var newFile: File = swapExt(runDir, file, ".gz", "")
      if (file.getName().endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
      val zcatCommand = Zcat(this, file, newFile)
      zcatCommand.isIntermediate = true
      add(zcatCommand)
      return newFile
    } else if (file.getName().endsWith(".bz2")) {
      val newFile = swapExt(runDir, file, ".bz2", "")
      val pbzip2 = Pbzip2(this, file, newFile)
      pbzip2.isIntermediate = true
      add(pbzip2)
      return newFile
    } else return file
  }

541
542
}

543
object Mapping extends PipelineCommand