SambambaMarkdup.scala 2.55 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16 17
package nl.lumc.sasc.biopet.extensions.sambamba

Peter van 't Hof's avatar
Peter van 't Hof committed
18 19
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21 22
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

Peter van 't Hof's avatar
Peter van 't Hof committed
23
/** Extension for sambemba markdup  */
24
class SambambaMarkdup(val root: Configurable) extends Sambamba {
Peter van 't Hof's avatar
Peter van 't Hof committed
25
  override def defaultThreads = 4
26 27 28 29 30 31 32

  @Input(doc = "Bam File")
  var input: File = _

  @Output(doc = "Markdup output bam")
  var output: File = _

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
33
  var removeDuplicates: Boolean = config("remove_duplicates", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
34

35
  // @doc: compression_level 6 is average, 0 = no compression, 9 = best
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
36 37 38 39
  val compressionLevel: Option[Int] = config("compression_level", default = 6)
  val hashTableSize: Option[Int] = config("hash-table-size", default = 262144)
  val overflowListSize: Option[Int] = config("overflow-list-size", default = 200000)
  val ioBufferSize: Option[Int] = config("io-buffer-size", default = 128)
Peter van 't Hof's avatar
Peter van 't Hof committed
40

Peter van 't Hof's avatar
Peter van 't Hof committed
41
  /** Returns command to execute */
Peter van 't Hof's avatar
Peter van 't Hof committed
42 43
  def cmdLine = required(executable) +
    required("markdup") +
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
44
    conditional(removeDuplicates, "--remove-duplicates") +
Peter van 't Hof's avatar
Peter van 't Hof committed
45
    optional("-t", nCoresRequest) +
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
46 47 48 49
    optional("-l", compressionLevel) +
    optional("--hash-table-size=", hashTableSize, spaceSeparated = false) +
    optional("--overflow-list-size=", overflowListSize, spaceSeparated = false) +
    optional("--io-buffer-size=", ioBufferSize, spaceSeparated = false) +
Peter van 't Hof's avatar
Peter van 't Hof committed
50 51
    required(input) +
    required(output)
52 53 54 55
}

object SambambaMarkdup {
  def apply(root: Configurable, input: File, output: File): SambambaMarkdup = {
56 57 58
    val markdup = new SambambaMarkdup(root)
    markdup.input = input
    markdup.output = output
Peter van 't Hof's avatar
Peter van 't Hof committed
59
    markdup
60 61 62
  }

  def apply(root: Configurable, input: File): SambambaMarkdup = {
63
    apply(root, input, new File(swapExtension(input.getAbsolutePath)))
64 65
  }

66
  private def swapExtension(inputFile: String) = inputFile.stripSuffix(".bam") + ".dedup.bam"
67
}