SambambaMarkdup.scala 2.83 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.extensions.sambamba

Peter van 't Hof's avatar
Peter van 't Hof committed
17 18
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
20 21
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

Peter van 't Hof's avatar
Peter van 't Hof committed
22
/** Extension for sambemba markdup  */
Peter van 't Hof's avatar
Peter van 't Hof committed
23
class SambambaMarkdup(val parent: Configurable) extends Sambamba {
24 25 26 27 28 29 30

  @Input(doc = "Bam File")
  var input: File = _

  @Output(doc = "Markdup output bam")
  var output: File = _

31
  var removeDuplicates: Boolean = config("remove_duplicates", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
32

33
  // @doc: compression_level 6 is average, 0 = no compression, 9 = best
Peter van 't Hof's avatar
Peter van 't Hof committed
34 35 36 37 38 39 40 41
  val compressionLevel: Option[Int] = config("compression_level")
  val hashTableSize: Option[Int] = config("hash-table-size")
  val overflowListSize: Option[Int] = config("overflow-list-size")
  val ioBufferSize: Option[Int] = config("io-buffer-size")
  val showProgress: Boolean = config("show-progress", default = true)

  override def defaultThreads = 4
  override def defaultCoreMemory = 4.0
Peter van 't Hof's avatar
Peter van 't Hof committed
42

43 44 45 46 47 48 49
  @Output
  private var indexOutput: File = _

  override def beforeGraph(): Unit = {
    indexOutput = new File(output + ".bai")
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
50
  /** Returns command to execute */
51
  def cmdLine: String = required(executable) +
Peter van 't Hof's avatar
Peter van 't Hof committed
52
    required("markdup") +
53
    conditional(removeDuplicates, "--remove-duplicates") +
Peter van 't Hof's avatar
Peter van 't Hof committed
54
    optional("-t", nCoresRequest) +
55
    optional("-l", compressionLevel) +
Peter van 't Hof's avatar
Peter van 't Hof committed
56
    conditional(showProgress, "--show-progress") +
57 58 59
    optional("--hash-table-size=", hashTableSize, spaceSeparated = false) +
    optional("--overflow-list-size=", overflowListSize, spaceSeparated = false) +
    optional("--io-buffer-size=", ioBufferSize, spaceSeparated = false) +
Peter van 't Hof's avatar
Peter van 't Hof committed
60 61
    required(input) +
    required(output)
62 63 64
}

object SambambaMarkdup {
65
  def apply(root: Configurable, input: File, output: File, isIntermediate: Boolean = false): SambambaMarkdup = {
66 67 68
    val markdup = new SambambaMarkdup(root)
    markdup.input = input
    markdup.output = output
69
    markdup.isIntermediate = isIntermediate
Peter van 't Hof's avatar
Peter van 't Hof committed
70
    markdup
71 72 73
  }

  def apply(root: Configurable, input: File): SambambaMarkdup = {
74
    apply(root, input, new File(swapExtension(input.getAbsolutePath)))
75 76
  }

77
  private def swapExtension(inputFile: String) = inputFile.stripSuffix(".bam") + ".dedup.bam"
78
}