Mapping.scala 22.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15 16
package nl.lumc.sasc.biopet.pipelines.mapping

17
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.util.Date
19

20
import nl.lumc.sasc.biopet.core._
21
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
22 23
import nl.lumc.sasc.biopet.extensions.bowtie.{ Bowtie, Bowtie2 }
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.gmap.Gsnap
bow's avatar
bow committed
25
import nl.lumc.sasc.biopet.extensions.hisat.Hisat2
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam }
27
import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter
28
import nl.lumc.sasc.biopet.extensions._
29
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
Peter van 't Hof's avatar
Peter van 't Hof committed
31
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
32
import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
Peter van 't Hof's avatar
Peter van 't Hof committed
33
import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition
Peter van 't Hof's avatar
Peter van 't Hof committed
34
import nl.lumc.sasc.biopet.utils.textToSize
35
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
36 37 38
import org.broadinstitute.gatk.queue.QScript

import scala.math._
39

40
// TODO: documentation
41
class Mapping(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
42

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  def this() = this(null)
bow's avatar
bow committed
44

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  @Input(doc = "R1 fastq file", shortName = "R1", fullName = "inputR1", required = true)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
46
  var inputR1: File = _
bow's avatar
bow committed
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  @Input(doc = "R2 fastq file", shortName = "R2", fullName = "inputR2", required = false)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
49
  var inputR2: Option[File] = None
bow's avatar
bow committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  /** Output name */
Peter van 't Hof's avatar
Peter van 't Hof committed
52
  var outputName: String = _
bow's avatar
bow committed
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54 55
  /** Skip flexiprep */
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)
bow's avatar
bow committed
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57 58
  /** Skip mark duplicates */
  protected var skipMarkduplicates: Boolean = config("skip_markduplicates", default = false)
bow's avatar
bow committed
59

Peter van 't Hof's avatar
Peter van 't Hof committed
60 61
  /** Skip metrics */
  protected var skipMetrics: Boolean = config("skip_metrics", default = false)
62

Peter van 't Hof's avatar
Peter van 't Hof committed
63
  /** Aligner */
Peter van 't Hof's avatar
Peter van 't Hof committed
64
  protected var aligner: String = config("aligner", default = "bwa-mem")
bow's avatar
bow committed
65

Peter van 't Hof's avatar
Peter van 't Hof committed
66 67
  /** Number of chunks, when not defined pipeline will automatic calculate number of chunks */
  protected var numberChunks: Option[Int] = config("number_chunks")
68

Peter van 't Hof's avatar
Peter van 't Hof committed
69 70
  /** Enable chunking */
  protected var chunking: Boolean = config("chunking", numberChunks.getOrElse(1) > 1)
bow's avatar
bow committed
71

72
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
73 74
  /** Readgroup ID */
  protected var readgroupId: String = _
bow's avatar
bow committed
75

76 77
  // TODO: hide sampleId and libId from the command line so they do not interfere with our config values

Peter van 't Hof's avatar
Peter van 't Hof committed
78 79
  /** Readgroup Platform */
  protected var platform: String = config("platform", default = "illumina")
bow's avatar
bow committed
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81 82
  /** Readgroup platform unit */
  protected var platformUnit: String = config("platform_unit", default = "na")
bow's avatar
bow committed
83

Peter van 't Hof's avatar
Peter van 't Hof committed
84 85
  /** Readgroup sequencing center */
  protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
bow's avatar
bow committed
86

Peter van 't Hof's avatar
Peter van 't Hof committed
87 88
  /** Readgroup description */
  protected var readgroupDescription: Option[String] = config("readgroup_description")
bow's avatar
bow committed
89

Peter van 't Hof's avatar
Peter van 't Hof committed
90 91
  /** Readgroup sequencing date */
  protected var readgroupDate: Date = _
bow's avatar
bow committed
92

Peter van 't Hof's avatar
Peter van 't Hof committed
93 94
  /** Readgroup predicted insert size */
  protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
bow's avatar
bow committed
95

96
  val keepFinalBamFile: Boolean = config("keep_final_bam_file", default = true)
97

Peter van 't Hof's avatar
Peter van 't Hof committed
98
  protected var paired: Boolean = false
99
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
100
  def finalBamFile: File = if (skipMarkduplicates) {
101 102
    new File(outputDir, outputName + ".bam")
  } else new File(outputDir, outputName + ".dedup.bam")
103

Peter van 't Hof's avatar
Peter van 't Hof committed
104
  /** location of summary file */
105 106
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
107
  override def defaults: Map[String, Any] = Map(
108
    "gsnap" -> Map("batch" -> 4),
Wai Yi Leung's avatar
Wai Yi Leung committed
109
    "star" -> Map("outsamunmapped" -> "Within")
110
  )
111

Peter van 't Hof's avatar
Peter van 't Hof committed
112
  override def fixedValues: Map[String, Any] = Map(
113 114
    "gsnap" -> Map("format" -> "sam"),
    "bowtie" -> Map("sam" -> true)
Peter van 't Hof's avatar
Peter van 't Hof committed
115
  )
116

Peter van 't Hof's avatar
Peter van 't Hof committed
117
  /** File to add to the summary */
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
118
  def summaryFiles: Map[String, File] = Map("output_bam" -> finalBamFile, "input_R1" -> inputR1,
119
    "reference" -> referenceFasta()) ++
120 121 122 123 124 125 126 127
    (if (inputR2.isDefined) Map("input_R2" -> inputR2.get) else Map()) ++
    (bam2wig match {
      case Some(b) => Map(
        "output_wigle" -> b.outputWigleFile,
        "output_tdf" -> b.outputTdfFile,
        "output_bigwig" -> b.outputBwFile)
      case _ => Map()
    })
128

Peter van 't Hof's avatar
Peter van 't Hof committed
129
  /** Settings to add to summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
130
  def summarySettings: Map[String, Any] = Map(
131 132 133
    "skip_metrics" -> skipMetrics,
    "skip_flexiprep" -> skipFlexiprep,
    "skip_markduplicates" -> skipMarkduplicates,
134
    "paired" -> inputR2.isDefined,
135 136
    "aligner" -> aligner,
    "chunking" -> chunking,
Sander Bollen's avatar
Sander Bollen committed
137
    "number_of_chunks" -> (if (chunking) numberChunks.getOrElse(1) else None)
138
  ) ++ (if (root == null) Map("reference" -> referenceSummary) else Map())
139

Peter van 't Hof's avatar
Peter van 't Hof committed
140
  override def reportClass: Some[MappingReport] = {
141 142 143 144 145 146 147 148 149
    val mappingReport = new MappingReport(this)
    mappingReport.outputDir = new File(outputDir, "report")
    mappingReport.summaryFile = summaryFile
    mappingReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(mappingReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
150
  /** Will be executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
151
  def init() {
152
    require(outputDir != null, "Missing output directory on mapping module")
153
    require(inputR1 != null, "Missing inputR1 on mapping module")
154 155
    require(sampleId.isDefined, "Missing sample ID on mapping module")
    require(libId.isDefined, "Missing library ID on mapping module")
156 157
    if (inputR1.exists() && inputR1.length() == 0) logger.warn(s"Input R1 is a empty file: $inputR1")
    inputR2.foreach(r => if (r.exists() && r.length() == 0) logger.warn(s"Input R2 is a empty file: $r"))
158

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
159 160
    inputFiles :+= new InputFile(inputR1)
    inputR2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
161

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
162
    paired = inputR2.isDefined
163

Peter van 't Hof's avatar
Peter van 't Hof committed
164 165
    if (readgroupId == null)
      readgroupId = config("readgroup_id", default = sampleId.get + "-" + libId.get)
bow's avatar
bow committed
166

Peter van 't Hof's avatar
Peter van 't Hof committed
167
    if (outputName == null) outputName = readgroupId
bow's avatar
bow committed
168

Peter van 't Hof's avatar
Peter van 't Hof committed
169
    if (chunking) {
170
      if (numberChunks.isEmpty) {
171
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
172
        else {
Peter van 't Hof's avatar
Peter van 't Hof committed
173
          val chunkSize: String = config("chunksize", default = "5G")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
174 175
          val filesize = if (inputR1.getName.endsWith(".gz") || inputR1.getName.endsWith(".gzip")) inputR1.length * 3
          else inputR1.length
Peter van 't Hof's avatar
Peter van 't Hof committed
176 177
          numberChunks = Some(ceil(filesize.toDouble / textToSize(chunkSize)).toInt)
          if (numberChunks == Some(0)) numberChunks = Some(1)
178 179 180
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
181
      if (numberChunks.getOrElse(1) <= 1) chunking = false
Peter van 't Hof's avatar
Peter van 't Hof committed
182
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
183
  }
bow's avatar
bow committed
184

Peter van 't Hof's avatar
Peter van 't Hof committed
185
  /** Adds all jobs of the pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
186
  def biopetScript() {
187
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
188
      flexiprep.outputDir = new File(outputDir, "flexiprep")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
189
      flexiprep.inputR1 = inputR1
190
      flexiprep.inputR2 = inputR2
Peter van 't Hof's avatar
Peter van 't Hof committed
191
      flexiprep.sampleId = this.sampleId
192
      flexiprep.libId = this.libId
Peter van 't Hof's avatar
Peter van 't Hof committed
193 194
      flexiprep.init()
      flexiprep.runInitialJobs()
195
    }
bow's avatar
bow committed
196
    var bamFiles: List[File] = Nil
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
197 198
    var fastqR1Output: List[File] = Nil
    var fastqR2Output: List[File] = Nil
bow's avatar
bow committed
199

Peter van 't Hof's avatar
Peter van 't Hof committed
200
    val chunks: Map[File, (File, Option[File])] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
201 202
      if (chunking) (for (t <- 1 to numberChunks.getOrElse(1)) yield {
        val chunkDir = new File(outputDir, "chunks" + File.separator + t)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
203 204
        chunkDir -> (new File(chunkDir, inputR1.getName),
          if (paired) Some(new File(chunkDir, inputR2.get.getName)) else None)
Peter van 't Hof's avatar
Peter van 't Hof committed
205
      }).toMap
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
206
      else if (skipFlexiprep) Map(outputDir -> (inputR1, if (paired) inputR2 else None))
207
      else Map(outputDir -> (flexiprep.inputR1, flexiprep.inputR2))
208
    }
bow's avatar
bow committed
209

Peter van 't Hof's avatar
Peter van 't Hof committed
210
    if (chunking) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
211 212 213 214 215
      val fastSplitterR1 = new FastqSplitter(this)
      fastSplitterR1.input = inputR1
      for ((chunkDir, fastqfile) <- chunks) fastSplitterR1.output :+= fastqfile._1
      fastSplitterR1.isIntermediate = true
      add(fastSplitterR1)
bow's avatar
bow committed
216

Peter van 't Hof's avatar
Peter van 't Hof committed
217
      if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
218 219 220 221 222
        val fastSplitterR2 = new FastqSplitter(this)
        fastSplitterR2.input = inputR2.get
        for ((chunkDir, fastqfile) <- chunks) fastSplitterR2.output :+= fastqfile._2.get
        fastSplitterR2.isIntermediate = true
        add(fastSplitterR2)
Peter van 't Hof's avatar
Peter van 't Hof committed
223 224
      }
    }
bow's avatar
bow committed
225

Peter van 't Hof's avatar
Peter van 't Hof committed
226
    for ((chunkDir, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
227 228 229
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
230
        val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
231
        logger.debug(chunkDir + " - " + flexiout)
Peter van 't Hof's avatar
Peter van 't Hof committed
232 233
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
234 235
        fastqR1Output :+= R1
        R2.foreach(R2 => fastqR2Output :+= R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
236
      }
237

Peter van 't Hof's avatar
Peter van 't Hof committed
238
      val outputBam = new File(chunkDir, outputName + ".bam")
239 240
      bamFiles :+= outputBam
      aligner match {
Peter van 't Hof's avatar
Peter van 't Hof committed
241 242 243
        case "bwa-mem"    => addBwaMem(R1, R2, outputBam)
        case "bwa-aln"    => addBwaAln(R1, R2, outputBam)
        case "bowtie"     => addBowtie(R1, R2, outputBam)
Wai Yi Leung's avatar
Wai Yi Leung committed
244
        case "bowtie2"    => addBowtie2(R1, R2, outputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
245
        case "gsnap"      => addGsnap(R1, R2, outputBam)
bow's avatar
bow committed
246
        case "hisat2"     => addHisat2(R1, R2, outputBam)
bow's avatar
bow committed
247
        // TODO: make TopHat here accept multiple input files
Peter van 't Hof's avatar
Peter van 't Hof committed
248 249 250 251
        case "tophat"     => addTophat(R1, R2, outputBam)
        case "stampy"     => addStampy(R1, R2, outputBam)
        case "star"       => addStar(R1, R2, outputBam)
        case "star-2pass" => addStar2pass(R1, R2, outputBam)
252
        case _            => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
253
      }
254
      if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
255
        addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics"), sampleId, libId).functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
256
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
257
    if (!skipFlexiprep) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
258
      flexiprep.runFinalize(fastqR1Output, fastqR2Output)
Peter van 't Hof's avatar
Peter van 't Hof committed
259
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
260
      addSummaryQScript(flexiprep)
Peter van 't Hof's avatar
Peter van 't Hof committed
261
    }
bow's avatar
bow committed
262

263 264
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
265
      bamFile = new File(outputDir, outputName + ".dedup.bam")
266 267
      val md = MarkDuplicates(this, bamFiles, finalBamFile)
      md.isIntermediate = !keepFinalBamFile
268 269
      add(md)
      addSummarizable(md, "mark_duplicates")
270
    } else if (skipMarkduplicates && chunking) {
271 272
      val mergeSamFile = MergeSamFiles(this, bamFiles, finalBamFile)
      mergeSamFile.isIntermediate = !keepFinalBamFile
273 274 275
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
276

Peter van 't Hof's avatar
Peter van 't Hof committed
277
    if (!skipMetrics) {
Peter van 't Hof's avatar
Peter van 't Hof committed
278
      val bamMetrics = BamMetrics(this, bamFile, new File(outputDir, "metrics"), sampleId, libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
279 280 281
      addAll(bamMetrics.functions)
      addSummaryQScript(bamMetrics)
    }
282

283
    outputFiles += ("finalBamFile" -> finalBamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
284

285
    if (config("unmapped_to_gears", default = false).asBoolean) {
286
      val gears = new GearsSingle(this)
287
      gears.bamFile = Some(finalBamFile)
288 289
      gears.sampleId = sampleId
      gears.libId = libId
290
      gears.outputDir = new File(outputDir, "gears")
291
      add(gears)
292 293
    }

294
    bam2wig.foreach(add(_))
295

Peter van 't Hof's avatar
Peter van 't Hof committed
296
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
297
  }
bow's avatar
bow committed
298

Peter van 't Hof's avatar
Peter van 't Hof committed
299
  protected lazy val bam2wig: Option[Bam2Wig] = if (config("generate_wig", default = false)) {
300 301 302
    Some(Bam2Wig(this, finalBamFile))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
303
  /** Add bwa aln jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
304
  def addBwaAln(R1: File, R2: Option[File], output: File): File = {
305 306 307
    val bwaAlnR1 = new BwaAln(this)
    bwaAlnR1.fastq = R1
    bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
308
    bwaAlnR1.isIntermediate = true
309 310 311
    add(bwaAlnR1)

    val samFile: File = if (paired) {
312
      val bwaAlnR2 = new BwaAln(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
313
      bwaAlnR2.fastq = R2.get
314
      bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
315
      bwaAlnR2.isIntermediate = true
316 317 318 319
      add(bwaAlnR2)

      val bwaSampe = new BwaSampe(this)
      bwaSampe.fastqR1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
320
      bwaSampe.fastqR2 = R2.get
321 322
      bwaSampe.saiR1 = bwaAlnR1.output
      bwaSampe.saiR2 = bwaAlnR2.output
323
      bwaSampe.r = getReadGroupBwa
324
      bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
325
      bwaSampe.isIntermediate = true
326 327 328 329 330 331 332
      add(bwaSampe)

      bwaSampe.output
    } else {
      val bwaSamse = new BwaSamse(this)
      bwaSamse.fastq = R1
      bwaSamse.sai = bwaAlnR1.output
333
      bwaSamse.r = getReadGroupBwa
334
      bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
335
      bwaSamse.isIntermediate = true
336 337 338 339 340 341
      add(bwaSamse)

      bwaSamse.output
    }

    val sortSam = SortSam(this, samFile, output)
342
    sortSam.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
343
    add(sortSam)
Peter van 't Hof's avatar
Peter van 't Hof committed
344
    sortSam.output
345 346
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
347
  /** Adds bwa mem jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
348
  def addBwaMem(R1: File, R2: Option[File], output: File): File = {
349
    val bwaCommand = new BwaMem(this)
350
    bwaCommand.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
351
    if (paired) bwaCommand.R2 = R2.get
352
    bwaCommand.R = Some(getReadGroupBwa)
353 354
    val sortSam = new SortSam(this)
    sortSam.output = output
355
    val pipe = bwaCommand | sortSam
356
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
357 358
    pipe.threadsCorrection = -1
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
359
    output
360
  }
361

Peter van 't Hof's avatar
Peter van 't Hof committed
362
  def addGsnap(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
363
    val gsnapCommand = new Gsnap(this)
364
    gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
365
    gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam")
bow's avatar
bow committed
366

367
    val reorderSam = new ReorderSam(this)
368
    reorderSam.input = gsnapCommand.output
Peter van 't Hof's avatar
Peter van 't Hof committed
369
    reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam")
370

Peter van 't Hof's avatar
Peter van 't Hof committed
371
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
372 373
    val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil)
    pipe.threadsCorrection = -2
374
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
375 376
    add(pipe)
    ar._2
bow's avatar
bow committed
377 378
  }

bow's avatar
bow committed
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
  def addHisat2(R1: File, R2: Option[File], output: File): File = {
    val hisat2 = new Hisat2(this)
    hisat2.R1 = R1
    hisat2.R2 = R2
    hisat2.rgId = Some(readgroupId)
    hisat2.rg +:= s"PL:$platform"
    hisat2.rg +:= s"PU:$platformUnit"
    libId match {
      case Some(id)  => hisat2.rg +:= s"LB:$id"
      case otherwise => ;
    }
    sampleId match {
      case Some(id)  => hisat2.rg +:= s"SM:$id"
      case otherwise => ;
    }

    val sortSam = new SortSam(this)
    sortSam.output = output
    val pipe = hisat2 | sortSam
398
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
bow's avatar
bow committed
399 400 401 402 403 404
    pipe.threadsCorrection = 1
    add(pipe)

    output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
405
  def addTophat(R1: File, R2: Option[File], output: File): File = {
bow's avatar
bow committed
406 407 408
    // TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
    val tophat = new Tophat(this)
    tophat.R1 = tophat.R1 :+ R1
Peter van 't Hof's avatar
Peter van 't Hof committed
409
    if (paired) tophat.R2 = tophat.R2 :+ R2.get
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
410
    tophat.outputDir = new File(outputDir, "tophat_out")
bow's avatar
bow committed
411
    // always output BAM
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
412
    tophat.noConvertBam = false
413
    // and always keep input ordering
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
414
    tophat.keepFastaOrder = true
bow's avatar
bow committed
415 416
    add(tophat)

417
    // fix unmapped file coordinates
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
418
    val fixedUnmapped = new File(tophat.outputDir, "unmapped_fixup.sam")
419 420
    val fixer = new TophatRecondition(this)
    fixer.inputBam = tophat.outputAcceptedHits
bow's avatar
bow committed
421
    fixer.outputSam = fixedUnmapped.getAbsoluteFile
422 423 424 425
    fixer.isIntermediate = true
    add(fixer)

    // sort fixed SAM file
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
426
    val sorter = SortSam(this, fixer.outputSam, new File(tophat.outputDir, "unmapped_fixup.sorted.bam"))
427 428 429
    sorter.sortOrder = "coordinate"
    sorter.isIntermediate = true
    add(sorter)
bow's avatar
bow committed
430

431 432
    // merge with mapped file
    val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output),
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
433
      new File(tophat.outputDir, "fixed_merged.bam"), sortOrder = "coordinate")
434 435 436 437 438 439 440 441
    mergeSamFile.createIndex = true
    mergeSamFile.isIntermediate = true
    add(mergeSamFile)

    // make sure header coordinates are correct
    val reorderSam = new ReorderSam(this)
    reorderSam.input = mergeSamFile.output
    reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
442
    reorderSam.isIntermediate = true
443 444
    add(reorderSam)

Peter van 't Hof's avatar
Peter van 't Hof committed
445
    val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
446
    ar._1.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
447 448
    add(ar._1)
    ar._2
bow's avatar
bow committed
449
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
450

Peter van 't Hof's avatar
Peter van 't Hof committed
451
  /** Adds stampy jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
452
  def addStampy(R1: File, R2: Option[File], output: File): File = {
453

Peter van 't Hof's avatar
Peter van 't Hof committed
454
    var RG: String = "ID:" + readgroupId + ","
455 456
    RG += "SM:" + sampleId.get + ","
    RG += "LB:" + libId.get + ","
Peter van 't Hof's avatar
Peter van 't Hof committed
457 458 459 460 461 462
    if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
    RG += "PU:" + platformUnit + ","
    if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + ","
    if (readgroupDate != null) RG += "DT:" + readgroupDate + ","
    RG += "PL:" + platform
463 464 465

    val stampyCmd = new Stampy(this)
    stampyCmd.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
466
    if (paired) stampyCmd.R2 = R2.get
467 468
    stampyCmd.readgroup = RG
    stampyCmd.sanger = true
469
    stampyCmd.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
470 471
    stampyCmd.isIntermediate = true
    add(stampyCmd)
472
    val sortSam = SortSam(this, stampyCmd.output, output)
473
    sortSam.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
474
    add(sortSam)
475
    sortSam.output
476
  }
477

Peter van 't Hof's avatar
Peter van 't Hof committed
478
  /** Adds bowtie jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
479
  def addBowtie(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
480 481 482 483
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))
484
    val bowtie = new Bowtie(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
485 486 487
    bowtie.R1 = zcatR1._2
    if (paired) bowtie.R2 = Some(zcatR2.get._2)
    bowtie.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
488
    bowtie.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
489 490
    val ar = addAddOrReplaceReadGroups(bowtie.output, output)
    val pipe = new BiopetFifoPipe(this, (Some(bowtie) :: Some(ar._1) :: Nil).flatten)
491
    pipe.threadsCorrection = -1
492
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
493 494
    add(pipe)
    ar._2
495
  }
496

497 498 499
  /** Add bowtie2 jobs **/
  def addBowtie2(R1: File, R2: Option[File], output: File): File = {
    val bowtie2 = new Bowtie2(this)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
500
    bowtie2.rgId = Some(readgroupId)
Peter van 't Hof's avatar
Peter van 't Hof committed
501
    bowtie2.rg +:= ("LB:" + libId.get)
Wai Yi Leung's avatar
Wai Yi Leung committed
502 503
    bowtie2.rg +:= ("PL:" + platform)
    bowtie2.rg +:= ("PU:" + platformUnit)
Peter van 't Hof's avatar
Peter van 't Hof committed
504
    bowtie2.rg +:= ("SM:" + sampleId.get)
505
    bowtie2.R1 = R1
Peter van 't Hof's avatar
Peter van 't Hof committed
506
    bowtie2.R2 = R2
507 508 509
    val sortSam = new SortSam(this)
    sortSam.output = output
    val pipe = bowtie2 | sortSam
510
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
511 512 513 514 515
    pipe.threadsCorrection = -1
    add(pipe)
    output
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
516
  /** Adds Star jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
517
  def addStar(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
518 519 520
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    val starCommand = Star(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
521
    val ar = addAddOrReplaceReadGroups(starCommand.outputSam, swapExt(outputDir, output, ".bam", ".addAddOrReplaceReadGroups.bam"))
Peter van 't Hof's avatar
Peter van 't Hof committed
522 523 524

    val reorderSam = new ReorderSam(this)
    reorderSam.input = ar._2
Peter van 't Hof's avatar
Peter van 't Hof committed
525
    reorderSam.output = output
Peter van 't Hof's avatar
Peter van 't Hof committed
526

Peter van 't Hof's avatar
Peter van 't Hof committed
527
    val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: zcatR2.flatMap(_._1) ::
Peter van 't Hof's avatar
Peter van 't Hof committed
528 529
      Some(starCommand) :: Some(ar._1) :: Some(reorderSam) :: Nil).flatten)
    pipe.threadsCorrection = -3
530 531
    zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
    zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
532
    pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
533
    add(pipe)
Peter van 't Hof's avatar
Peter van 't Hof committed
534
    reorderSam.output
535
  }
536

Peter van 't Hof's avatar
Peter van 't Hof committed
537
  /** Adds Start 2 pass jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
538
  def addStar2pass(R1: File, R2: Option[File], output: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
539 540 541 542 543 544
    val zcatR1 = extractIfNeeded(R1, output.getParentFile)
    val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
    zcatR1._1.foreach(add(_))
    zcatR2.foreach(_._1.foreach(add(_)))

    val starCommand = Star._2pass(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
545
    addAll(starCommand._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
546
    val ar = addAddOrReplaceReadGroups(starCommand._1, output)
547
    ar._1.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
548 549
    add(ar._1)
    ar._2
550
  }
551

Peter van 't Hof's avatar
Peter van 't Hof committed
552
  /** Adds AddOrReplaceReadGroups */
Peter van 't Hof's avatar
Peter van 't Hof committed
553
  def addAddOrReplaceReadGroups(input: File, output: File): (AddOrReplaceReadGroups, File) = {
554
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
555 556
    addOrReplaceReadGroups.createIndex = true

Peter van 't Hof's avatar
Peter van 't Hof committed
557
    addOrReplaceReadGroups.RGID = readgroupId
558
    addOrReplaceReadGroups.RGLB = libId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
559 560
    addOrReplaceReadGroups.RGPL = platform
    addOrReplaceReadGroups.RGPU = platformUnit
561
    addOrReplaceReadGroups.RGSM = sampleId.get
Peter van 't Hof's avatar
Peter van 't Hof committed
562 563
    if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
    if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
564
    addOrReplaceReadGroups.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
bow's avatar
bow committed
565

Peter van 't Hof's avatar
Peter van 't Hof committed
566
    (addOrReplaceReadGroups, addOrReplaceReadGroups.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
567
  }
bow's avatar
bow committed
568

Peter van 't Hof's avatar
Peter van 't Hof committed
569
  /** Returns readgroup for bwa */
Peter van 't Hof's avatar
Peter van 't Hof committed
570
  def getReadGroupBwa: String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
571
    var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
572
    RG += "LB:" + libId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
573 574
    RG += "PL:" + platform + "\\t"
    RG += "PU:" + platformUnit + "\\t"
575
    RG += "SM:" + sampleId.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
576
    if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
577 578 579
    if (readgroupDescription.isDefined) RG += "DS:" + readgroupDescription.get + "\\t"
    if (readgroupDate != null) RG += "DT:" + readgroupDate + "\\t"
    if (predictedInsertsize.isDefined) RG += "PI:" + predictedInsertsize.get + "\\t"
bow's avatar
bow committed
580

581
    RG.substring(0, RG.lastIndexOf("\\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
582
  }
583 584 585 586

  //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
  /**
   * Extracts file if file is compressed
Peter van 't Hof's avatar
Peter van 't Hof committed
587 588
   * @param file input file
   * @param runDir directory to extract when needed
589 590
   * @return returns extracted file
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
591 592 593
  def extractIfNeeded(file: File, runDir: File): (Option[BiopetCommandLineFunction], File) = {
    require(file != null)
    if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
594
      var newFile: File = swapExt(runDir, file, ".gz", "")
Peter van 't Hof's avatar
Peter van 't Hof committed
595
      if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
596
      val zcatCommand = Zcat(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
597
      (Some(zcatCommand), newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
598
    } else if (file.getName.endsWith(".bz2")) {
599 600
      val newFile = swapExt(runDir, file, ".bz2", "")
      val pbzip2 = Pbzip2(this, file, newFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
601 602
      (Some(pbzip2), newFile)
    } else (None, file)
603 604
  }

605 606
}

607
object Mapping extends PipelineCommand