Mapping.scala 21.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15 16
package nl.lumc.sasc.biopet.pipelines.mapping

17
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.util.Date
19

20
import nl.lumc.sasc.biopet.core._
21
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
22 23
import nl.lumc.sasc.biopet.extensions.bowtie.{ Bowtie, Bowtie2 }
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.gmap.Gsnap
bow's avatar
bow committed
25
import nl.lumc.sasc.biopet.extensions.hisat.Hisat2
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam }
27
import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter
28
import nl.lumc.sasc.biopet.extensions._
29
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
Peter van 't Hof's avatar
Peter van 't Hof committed
31
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
32
import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
Peter van 't Hof's avatar
Peter van 't Hof committed
33
import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition
Peter van 't Hof's avatar
Peter van 't Hof committed
34
import nl.lumc.sasc.biopet.utils.textToSize
35
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
36 37 38
import org.broadinstitute.gatk.queue.QScript

import scala.math._
39

40
// TODO: documentation
41
class Mapping(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
42

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  def this() = this(null)
bow's avatar
bow committed
44

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  @Input(doc = "R1 fastq file", shortName = "R1", fullName = "inputR1", required = true)
46
  var inputR1: File = _
bow's avatar
bow committed
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  @Input(doc = "R2 fastq file", shortName = "R2", fullName = "inputR2", required = false)
49
  var inputR2: Option[File] = None
bow's avatar
bow committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  /** Output name */
Peter van 't Hof's avatar
Peter van 't Hof committed
52
  var outputName: String = _
bow's avatar
bow committed
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54 55
  /** Skip flexiprep */
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
bow's avatar
bow committed
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57 58
  /** Skip mark duplicates */
  protected var skipMarkduplicates: Boolean = config("skip_markduplicates", default = false)
bow's avatar
bow committed
59

Peter van 't Hof's avatar
Peter van 't Hof committed
60 61
  /** Skip metrics */
  protected var skipMetrics: Boolean = config("skip_metrics", default = false)
62

Peter van 't Hof's avatar
Peter van 't Hof committed
63
  /** Aligner */
Peter van 't Hof's avatar
Peter van 't Hof committed
64
  protected var aligner: String = config("aligner", default = "bwa-mem")
bow's avatar
bow committed
65

Peter van 't Hof's avatar
Peter van 't Hof committed
66 67
  /** Number of chunks, when not defined pipeline will automatic calculate number of chunks */
  protected var numberChunks: Option[Int] = config("number_chunks")
68

Peter van 't Hof's avatar
Peter van 't Hof committed
69 70
  /** Enable chunking */
  protected var chunking: Boolean = config("chunking", numberChunks.getOrElse(1) > 1)
bow's avatar
bow committed
71

72
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
73 74
  /** Readgroup ID */
  protected var readgroupId: String = _
bow's avatar
bow committed
75

76 77
  // TODO: hide sampleId and libId from the command line so they do not interfere with our config values

Peter van 't Hof's avatar
Peter van 't Hof committed
78 79
  /** Readgroup Platform */
  protected var platform: String = config("platform", default = "illumina")
bow's avatar
bow committed
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81 82
  /** Readgroup platform unit */
  protected var platformUnit: String = config("platform_unit", default = "na")
bow's avatar
bow committed
83

Peter van 't Hof's avatar
Peter van 't Hof committed
84 85
  /** Readgroup sequencing center */
  protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
bow's avatar
bow committed
86

Peter van 't Hof's avatar
Peter van 't Hof committed
87 88
  /** Readgroup description */
  protected var readgroupDescription: Option[String] = config("readgroup_description")
bow's avatar
bow committed
89

Peter van 't Hof's avatar
Peter van 't Hof committed
90 91
  /** Readgroup sequencing date */
  protected var readgroupDate: Date = _
bow's avatar
bow committed
92

Peter van 't Hof's avatar
Peter van 't Hof committed
93 94
  /** Readgroup predicted insert size */
  protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
bow's avatar
bow committed
95

Peter van 't Hof's avatar
Peter van 't Hof committed
96
  protected var paired: Boolean = false
97
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
98
  def finalBamFile: File = new File(outputDir, outputName + ".final.bam")
99

Peter van 't Hof's avatar
Peter van 't Hof committed
100
  /** location of summary file */
101 102
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")

103 104
  override def defaults = Map(
    "gsnap" -> Map("batch" -> 4),
Wai Yi Leung's avatar
Wai Yi Leung committed
105
    "star" -> Map("outsamunmapped" -> "Within")
106
  )
107 108 109 110

  override def fixedValues = Map(
    "gsnap" -> Map("format" -> "sam"),
    "bowtie" -> Map("sam" -> true)
Peter van 't Hof's avatar
Peter van 't Hof committed
111
  )
112

Peter van 't Hof's avatar
Peter van 't Hof committed
113
  /** File to add to the summary */
114
  def summaryFiles: Map[String, File] = Map("output_bam" -> finalBamFile, "input_R1" -> inputR1,
115
    "reference" -> referenceFasta()) ++
116 117 118 119 120 121 122 123
    (if (inputR2.isDefined) Map("input_R2" -> inputR2.get) else Map()) ++
    (bam2wig match {
      case Some(b) => Map(
        "output_wigle" -> b.outputWigleFile,
        "output_tdf" -> b.outputTdfFile,
        "output_bigwig" -> b.outputBwFile)
      case _ => Map()
    })
124

Peter van 't Hof's avatar
Peter van 't Hof committed
125
  /** Settings to add to summary */
126 127 128 129
  def summarySettings = Map(
    "skip_metrics" -> skipMetrics,
    "skip_flexiprep" -> skipFlexiprep,
    "skip_markduplicates" -> skipMarkduplicates,
130
    "paired" -> inputR2.isDefined,
131 132
    "aligner" -> aligner,
    "chunking" -> chunking,
Sander Bollen's avatar
Sander Bollen committed
133
    "number_of_chunks" -> (if (chunking) numberChunks.getOrElse(1) else None)
134
  ) ++ (if (root == null) Map("reference" -> referenceSummary) else Map())
135

136 137 138 139 140 141 142 143 144 145
  override def reportClass = {
    val mappingReport = new MappingReport(this)
    mappingReport.outputDir = new File(outputDir, "report")
    mappingReport.summaryFile = summaryFile
    mappingReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(mappingReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
146
  /** Will be executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
147
  def init() {
148
    require(outputDir != null, "Missing output directory on mapping module")
149
    require(inputR1 != null, "Missing output directory on mapping module")
150 151
    require(sampleId.isDefined, "Missing sample ID on mapping module")
    require(libId.isDefined, "Missing library ID on mapping module")
152

153 154
    inputFiles :+= new InputFile(inputR1)
    inputR2.foreach(inputFiles :+= new InputFile(_))
155

156
    paired = inputR2.isDefined
157

Peter van 't Hof's avatar
Peter van 't Hof committed
158 159
    if (readgroupId == null)
      readgroupId = config("readgroup_id", default = sampleId.get + "-" + libId.get)
bow's avatar
bow committed
160

Peter van 't Hof's avatar
Peter van 't Hof committed
161
    if (outputName == null) outputName = readgroupId
bow's avatar
bow committed
162

163
    if (chunking) {
164
      if (numberChunks.isEmpty) {
165
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
166
        else {
Peter van 't Hof's avatar
Peter van 't Hof committed
167
          val chunkSize: String = config("chunksize", default = "5G")
168 169
          val filesize = if (inputR1.getName.endsWith(".gz") || inputR1.getName.endsWith(".gzip")) inputR1.length * 3
          else inputR1.length
Peter van 't Hof's avatar
Peter van 't Hof committed
170
          numberChunks = Option(ceil(filesize.toDouble / textToSize(chunkSize)).toInt)
171 172 173
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
174
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
175
  }
bow's avatar
bow committed
176

Peter van 't Hof's avatar
Peter van 't Hof committed
177
  /** Adds all jobs of the pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
178
  def biopetScript() {
179
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
180
      flexiprep.outputDir = new File(outputDir, "flexiprep")
181
      flexiprep.inputR1 = inputR1
182
      flexiprep.inputR2 = inputR2
Peter van 't Hof's avatar
Peter van 't Hof committed
183
      flexiprep.sampleId = this.sampleId
184
      flexiprep.libId = this.libId
Peter van 't Hof's avatar
Peter van 't Hof committed
185 186
      flexiprep.init()
      flexiprep.runInitialJobs()
187
    }
bow's avatar
bow committed
188
    var bamFiles: List[File] = Nil
189 190
    var fastqR1Output: List[File] = Nil
    var fastqR2Output: List[File] = Nil
bow's avatar
bow committed
191

192
    val chunks: Map[File, (File, Option[File])] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
193 194
      if (chunking) (for (t <- 1 to numberChunks.getOrElse(1)) yield {
        val chunkDir = new File(outputDir, "chunks" + File.separator + t)
195 196
        chunkDir -> (new File(chunkDir, inputR1.getName),
          if (paired) Some(new File(chunkDir, inputR2.get.getName)) else None)
Peter van 't Hof's avatar
Peter van 't Hof committed
197
      }).toMap
198
      else if (skipFlexiprep) Map(outputDir -> (inputR1, if (paired) inputR2 else None))
199
      else Map(outputDir -> (flexiprep.inputR1, flexiprep.inputR2))
200
    }
bow's avatar
bow committed
201

202
    if (chunking) {
203 204 205 206 207
      val fastSplitterR1 = new FastqSplitter(this)
      fastSplitterR1.input = inputR1
      for ((chunkDir, fastqfile) <- chunks) fastSplitterR1.output :+= fastqfile._1
      fastSplitterR1.isIntermediate = true
      add(fastSplitterR1)
bow's avatar
bow committed
208

209
      if (paired) {
210 211 212 213 214
        val fastSplitterR2 = new FastqSplitter(this)
        fastSplitterR2.input = inputR2.get
        for ((chunkDir, fastqfile) <- chunks) fastSplitterR2.output :+= fastqfile._2.get
        fastSplitterR2.isIntermediate = true
        add(fastSplitterR2)
215 216
      }
    }
bow's avatar
bow committed
217

218
    for ((chunkDir, fastqfile) <- chunks) {
219 220 221
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
222
        val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
223
        logger.debug(chunkDir + " - " + flexiout)
224 225
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
226 227
        fastqR1Output :+= R1
        R2.foreach(R2 => fastqR2Output :+= R2)
228
      }
229

Peter van 't Hof's avatar
Peter van 't Hof committed
230
      val outputBam = new File(chunkDir, outputName + ".bam")
231 232
      bamFiles :+= outputBam
      aligner match {
Peter van 't Hof's avatar
Peter van 't Hof committed
233 234 235
        case "bwa-mem"    => addBwaMem(R1, R2, outputBam)
        case "bwa-aln"    => addBwaAln(R1, R2, outputBam)
        case "bowtie"     => addBowtie(R1, R2, outputBam)
236
        case "bowtie2"    => addBowtie2(R1, R2, outputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
237
        case "gsnap"      => addGsnap(R1, R2, outputBam)
bow's avatar
bow committed
238
        case "hisat2"     => addHisat2(R1, R2, outputBam)
bow's avatar
bow committed
239
        // TODO: make TopHat here accept multiple input files
Peter van 't Hof's avatar
Peter van 't Hof committed
240 241 242 243
        case "tophat"     => addTophat(R1, R2, outputBam)
        case "stampy"     => addStampy(R1, R2, outputBam)
        case "star"       => addStar(R1, R2, outputBam)
        case "star-2pass" => addStar2pass(R1, R2, outputBam)
244
        case _            => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
245
      }
246
      if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
247
        addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics"), sampleId, libId).functions)
248
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
249
    if (!skipFlexiprep) {
250
      flexiprep.runFinalize(fastqR1Output, fastqR2Output)
Peter van 't Hof's avatar
Peter van 't Hof committed
251
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
252
      addSummaryQScript(flexiprep)
Peter van 't Hof's avatar
Peter van 't Hof committed
253
    }
bow's avatar
bow committed
254

255 256
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
257
      bamFile = new File(outputDir, outputName + ".dedup.bam")
258 259 260
      val md = MarkDuplicates(this, bamFiles, bamFile)
      add(md)
      addSummarizable(md, "mark_duplicates")
261
    } else if (skipMarkduplicates && chunking) {
Peter van 't Hof's avatar
Peter van 't Hof committed
262
      val mergeSamFile = MergeSamFiles(this, bamFiles, new File(outputDir, outputName + ".merge.bam"))
263 264 265
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
266

Peter van 't Hof's avatar
Peter van 't Hof committed
267
    if (!skipMetrics) {
Peter van 't Hof's avatar
Peter van 't Hof committed
268
      val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"), sampleId, libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
269 270 271
      addAll(bamMetrics.functions)
      addSummaryQScript(bamMetrics)
    }
272

273 274
    add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai")))
    add(Ln(this, bamFile, finalBamFile))
275
    outputFiles += ("finalBamFile" -> finalBamFile.getAbsoluteFile)
276

277
    if (config("unmapped_to_gears", default = false).asBoolean) {
278
      val gears = new GearsSingle(this)
279
      gears.bamFile = Some(finalBamFile)
280 281
      gears.sampleId = sampleId
      gears.libId = libId
282
      gears.outputDir = new File(outputDir, "gears")
283
      add(gears)
284 285
    }

286
    bam2wig.foreach(add(_))
287

Peter van 't Hof's avatar
Peter van 't Hof committed
288
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
289
  }
bow's avatar
bow committed
290

291 292 293 294
  protected lazy val bam2wig = if (config("generate_wig", default = false)) {
    Some(Bam2Wig(this, finalBamFile))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
295
  /** Add bwa aln jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
296
  def addBwaAln(R1: File, R2: Option[File], output: File): File = {
297 298 299
    val bwaAlnR1 = new BwaAln(this)
    bwaAlnR1.fastq = R1
    bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
300
    bwaAlnR1.isIntermediate = true
301 302 303
    add(bwaAlnR1)

    val samFile: File = if (paired) {
304
      val bwaAlnR2 = new BwaAln(this)
305
      bwaAlnR2.fastq = R2.get
306
      bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
307
      bwaAlnR2.isIntermediate = true
308 309 310 311
      add(bwaAlnR2)

      val bwaSampe = new BwaSampe(this)
      bwaSampe.fastqR1 = R1
312
      bwaSampe.fastqR2 = R2.get
313 314
      bwaSampe.saiR1 = bwaAlnR1.output
      bwaSampe.saiR2 = bwaAlnR2.output
315
      bwaSampe.r = getReadGroupBwa
316
      bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
317
      bwaSampe.isIntermediate = true
318 319 320 321 322 323 324
      add(bwaSampe)

      bwaSampe.output
    } else {
      val bwaSamse = new BwaSamse(this)
      bwaSamse.fastq = R1
      bwaSamse.sai = bwaAlnR1.output
325
      bwaSamse.r = getReadGroupBwa
326
      bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
327
      bwaSamse.isIntermediate = true
328 329 330 331 332 333 334 335
      add(bwaSamse)

      bwaSamse.output
    }

    val sortSam = SortSam(this, samFile, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
Peter van 't Hof's avatar
Peter van 't Hof committed
336
    sortSam.output
337 338
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
339
  /** Adds bwa mem jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
340
  def addBwaMem(R1: File, R2: Option[File], output: File): File = {
341
    val bwaCommand = new BwaMem(this)
342
    bwaCommand.R1 = R1
343
    if (paired) bwaCommand.R2 = R2.get
344
    bwaCommand.R = Some(getReadGroupBwa)
345 346
    val sortSam = new SortSam(this)
    sortSam.output = output
347 348 349 350
    val pipe = bwaCommand | sortSam
    pipe.isIntermediate = chunking || !skipMarkduplicates
    pipe.threadsCorrection = -1
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
351
    output
352
  }
353

Peter van 't Hof's avatar
Peter van 't Hof committed
354
  def addGsnap(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
355
    val gsnapCommand = new Gsnap(this)
356
    gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
357
    gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam")
bow's avatar
bow committed
358

359
    val reorderSam = new ReorderSam(this)
360
    reorderSam.input = gsnapCommand.output
Peter van 't Hof's avatar
Peter van 't Hof committed
361
    reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam")
362

Peter van 't Hof's avatar
Peter van 't Hof committed
363
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
364 365
    val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil)
    pipe.threadsCorrection = -2
Peter van 't Hof's avatar
Peter van 't Hof committed
366 367
    add(pipe)
    ar._2
bow's avatar
bow committed
368 369
  }

bow's avatar
bow committed
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
  def addHisat2(R1: File, R2: Option[File], output: File): File = {
    val hisat2 = new Hisat2(this)
    hisat2.R1 = R1
    hisat2.R2 = R2
    hisat2.rgId = Some(readgroupId)
    hisat2.rg +:= s"PL:$platform"
    hisat2.rg +:= s"PU:$platformUnit"
    libId match {
      case Some(id)  => hisat2.rg +:= s"LB:$id"
      case otherwise => ;
    }
    sampleId match {
      case Some(id)  => hisat2.rg +:= s"SM:$id"
      case otherwise => ;
    }

    val sortSam = new SortSam(this)
    sortSam.output = output
    val pipe = hisat2 | sortSam
    pipe.isIntermediate = chunking || !skipMarkduplicates
    pipe.threadsCorrection = 1
    add(pipe)

    output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
396
  def addTophat(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
397 398 399
    // TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
    val tophat = new Tophat(this)
    tophat.R1 = tophat.R1 :+ R1
400
    if (paired) tophat.R2 = tophat.R2 :+ R2.get
401
    tophat.outputDir = new File(outputDir, "tophat_out")
bow's avatar
bow committed
402
    // always output BAM
403
    tophat.noConvertBam = false
404
    // and always keep input ordering
405
    tophat.keepFastaOrder = true
bow's avatar
bow committed
406 407
    add(tophat)

408
    // fix unmapped file coordinates
409
    val fixedUnmapped = new File(tophat.outputDir, "unmapped_fixup.sam")
410 411
    val fixer = new TophatRecondition(this)
    fixer.inputBam = tophat.outputAcceptedHits
412
    fixer.outputSam = fixedUnmapped.getAbsoluteFile
413 414 415 416
    fixer.isIntermediate = true
    add(fixer)

    // sort fixed SAM file
417
    val sorter = SortSam(this, fixer.outputSam, new File(tophat.outputDir, "unmapped_fixup.sorted.bam"))
418 419 420
    sorter.sortOrder = "coordinate"
    sorter.isIntermediate = true
    add(sorter)
bow's avatar
bow committed
421

422 423
    // merge with mapped file
    val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output),
424
      new File(tophat.outputDir, "fixed_merged.bam"), sortOrder = "coordinate")
425 426 427 428 429 430 431 432 433 434
    mergeSamFile.createIndex = true
    mergeSamFile.isIntermediate = true
    add(mergeSamFile)

    // make sure header coordinates are correct
    val reorderSam = new ReorderSam(this)
    reorderSam.input = mergeSamFile.output
    reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
    add(reorderSam)

Peter van 't Hof's avatar
Peter van 't Hof committed
435 436 437
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
    add(ar._1)
    ar._2
bow's avatar
bow committed
438
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
439

Peter van 't Hof's avatar
Peter van 't Hof committed
440
  /** Adds stampy jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
441
  def addStampy(R1: File, R2: Option[File], output: File): File = {
442

Peter van 't Hof's avatar
Peter van 't Hof committed
443
    var RG: String = "ID:" + readgroupId + ","
444 445
    RG += "SM:" + sampleId.get + ","
    RG += "LB:" + libId.get + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
446 447 448 449 450 451
    if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
    RG += "PU:" + platformUnit + ","
    if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
    if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
    RG += "PL:" + platform
452 453 454

    val stampyCmd = new Stampy(this)
    stampyCmd.R1 = R1
455
    if (paired) stampyCmd.R2 = R2.get
456 457
    stampyCmd.readgroup = RG
    stampyCmd.sanger = true
458
    stampyCmd.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
459 460
    stampyCmd.isIntermediate = true
    add(stampyCmd)
461 462 463
    val sortSam = SortSam(this, stampyCmd.output, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
464
    sortSam.output
465
  }
466

Peter van 't Hof's avatar
Peter van 't Hof committed
467
  /** Adds bowtie jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
468
  def addBowtie(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
469 470 471 472
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))
473
    val bowtie = new Bowtie(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
474 475 476
    bowtie.R1 = zcatR1._2
    if (paired) bowtie.R2 = Some(zcatR2.get._2)
    bowtie.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
477
    bowtie.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
478 479
    val ar = addAddOrReplaceReadGroups(bowtie.output, output)
    val pipe = new BiopetFifoPipe(this, (Some(bowtie) :: Some(ar._1) :: Nil).flatten)
480
    pipe.threadsCorrection = -1
Peter van 't Hof's avatar
Peter van 't Hof committed
481 482
    add(pipe)
    ar._2
483
  }
484

485 486 487
  /** Add bowtie2 jobs **/
  def addBowtie2(R1: File, R2: Option[File], output: File): File = {
    val bowtie2 = new Bowtie2(this)
488
    bowtie2.rgId = Some(readgroupId)
Peter van 't Hof's avatar
Peter van 't Hof committed
489
    bowtie2.rg +:= ("LB:" + libId.get)
Wai Yi Leung's avatar
Wai Yi Leung committed
490 491
    bowtie2.rg +:= ("PL:" + platform)
    bowtie2.rg +:= ("PU:" + platformUnit)
Peter van 't Hof's avatar
Peter van 't Hof committed
492
    bowtie2.rg +:= ("SM:" + sampleId.get)
493
    bowtie2.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
494
    bowtie2.R2 = R2
495 496 497 498 499 500 501 502 503
    val sortSam = new SortSam(this)
    sortSam.output = output
    val pipe = bowtie2 | sortSam
    pipe.isIntermediate = chunking || !skipMarkduplicates
    pipe.threadsCorrection = -1
    add(pipe)
    output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
504
  /** Adds Star jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
505
  def addStar(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
506 507 508
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    val starCommand = Star(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
509
    val ar = addAddOrReplaceReadGroups(starCommand.outputSam, swapExt(outputDir, output, ".bam", ".addAddOrReplaceReadGroups.bam"))
510 511 512

    val reorderSam = new ReorderSam(this)
    reorderSam.input = ar._2
Peter van 't Hof's avatar
Peter van 't Hof committed
513
    reorderSam.output = output
514

Peter van 't Hof's avatar
Peter van 't Hof committed
515
    val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: zcatR2.flatMap(_._1) ::
516 517
      Some(starCommand) :: Some(ar._1) :: Some(reorderSam) :: Nil).flatten)
    pipe.threadsCorrection = -3
518 519
    zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
    zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
Peter van 't Hof's avatar
Peter van 't Hof committed
520
    add(pipe)
521
    reorderSam.output
522
  }
523

Peter van 't Hof's avatar
Peter van 't Hof committed
524
  /** Adds Start 2 pass jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
525
  def addStar2pass(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
526 527 528 529 530 531
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))

    val starCommand = Star._2pass(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
532
    addAll(starCommand._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
533 534 535
    val ar = addAddOrReplaceReadGroups(starCommand._1, output)
    add(ar._1)
    ar._2
536
  }
537

Peter van 't Hof's avatar
Peter van 't Hof committed
538
  /** Adds AddOrReplaceReadGroups */
Peter van 't Hof's avatar
Peter van 't Hof committed
539
  def addAddOrReplaceReadGroups(input: File, output: File): (AddOrReplaceReadGroups, File) = {
540
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
541 542
    addOrReplaceReadGroups.createIndex = true

Peter van 't Hof's avatar
Peter van 't Hof committed
543
    addOrReplaceReadGroups.RGID = readgroupId
544
    addOrReplaceReadGroups.RGLB = libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
545 546
    addOrReplaceReadGroups.RGPL = platform
    addOrReplaceReadGroups.RGPU = platformUnit
547
    addOrReplaceReadGroups.RGSM = sampleId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
548 549
    if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
    if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
Peter van 't Hof's avatar
Peter van 't Hof committed
550
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
bow's avatar
bow committed
551

Peter van 't Hof's avatar
Peter van 't Hof committed
552
    (addOrReplaceReadGroups, addOrReplaceReadGroups.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
553
  }
bow's avatar
bow committed
554

Peter van 't Hof's avatar
Peter van 't Hof committed
555
  /** Returns readgroup for bwa */
Peter van 't Hof's avatar
Peter van 't Hof committed
556
  def getReadGroupBwa: String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
557
    var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
558
    RG += "LB:" + libId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
559 560
    RG += "PL:" + platform + "\\t"
    RG += "PU:" + platformUnit + "\\t"
561
    RG += "SM:" + sampleId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
562
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
563 564 565
    if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
    if (readgroupDate != null) RG += "DT:" + readgroupDate + "\\t"
    if (predictedInsertsize.isDefined) RG += "PI:" + predictedInsertsize.get + "\\t"
bow's avatar
bow committed
566

567
    RG.substring(0, RG.lastIndexOf("\\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
568
  }
569 570 571 572

  //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
  /**
   * Extracts file if file is compressed
Peter van 't Hof's avatar
Peter van 't Hof committed
573 574
   * @param file input file
   * @param runDir directory to extract when needed
575 576
   * @return returns extracted file
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
577 578 579
  def extractIfNeeded(file: File, runDir: File): (Option[BiopetCommandLineFunction], File) = {
    require(file != null)
    if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
580
      var newFile: File = swapExt(runDir, file, ".gz", "")
Peter van 't Hof's avatar
Peter van 't Hof committed
581
      if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
582
      val zcatCommand = Zcat(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
583
      (Some(zcatCommand), newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
584
    } else if (file.getName.endsWith(".bz2")) {
585 586
      val newFile = swapExt(runDir, file, ".bz2", "")
      val pbzip2 = Pbzip2(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
587 588
      (Some(pbzip2), newFile)
    } else (None, file)
589 590
  }

591 592
}

593
object Mapping extends PipelineCommand