Stampy.scala 3.95 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
18
package nl.lumc.sasc.biopet.extensions

import java.io.File
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.{ Reference, BiopetCommandLineFunction }
21
22
23
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

Peter van 't Hof's avatar
Peter van 't Hof committed
24
/** Extension for stampy */
25
class Stampy(val root: Configurable) extends BiopetCommandLineFunction with Reference {
26
27
28
29
30
31
32
  @Input(doc = "FastQ file R1", shortName = "R1")
  var R1: File = _

  @Input(doc = "FastQ file R2", shortName = "R2", required = false)
  var R2: File = _

  @Input(doc = "The reference file for the bam files.", shortName = "ref")
33
  var reference: File = null
34
35

  @Input(doc = "The genome prefix.")
36
  var genome: File = config("genome")
37
38

  @Input(doc = "The hash prefix")
39
  var hash: File = config("hash")
40
41
42
43
44

  @Output(doc = "Output file SAM", shortName = "output")
  var output: File = _

  // options set via API or config
45
  //  var numrecords: String = config("numrecords", default = "all")
46
47
48
  var solexa: Boolean = config("solexa", default = false)
  var solexaold: Boolean = config("solexaold", default = false)
  var sanger: Boolean = config("sanger", default = false)
49

50
51
52
53
  var insertsize: Option[Int] = config("insertsize", default = 250)
  var insertsd: Option[Int] = config("insertsd", default = 60)
  var insertsize2: Option[Int] = config("insertsize2", default = -2000)
  var insertsd2: Option[Int] = config("insertsd2", default = -1)
54

55
56
  var sensitive: Boolean = config("sensitive", default = false)
  var fast: Boolean = config("fast", default = false)
57

Peter van 't Hof's avatar
Peter van 't Hof committed
58
  var readgroup: String = null
59
  var verbosity: Option[Int] = config("verbosity", default = 2)
Peter van 't Hof's avatar
Peter van 't Hof committed
60
  var logfile: Option[String] = config("logfile")
61

62
  executable = config("exe", default = "stampy.py", freeVar = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
  override def versionRegex = """stampy v(.*) \(.*\), .*""".r
  override def versionExitcode = List(0, 1)
65

wyleung's avatar
wyleung committed
66
  /// Stampy uses approx factor 1.1 times the size of the genome in memory.
Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
  override def defaultCoreMemory = 4.0
  override def defaultThreads = 8
69
70

  override def versionCommand = executable + " --help"
71

Peter van 't Hof's avatar
Peter van 't Hof committed
72
  /** Sets readgroup when not set yet */
Peter van 't Hof's avatar
Peter van 't Hof committed
73
74
75
  override def beforeGraph: Unit = {
    super.beforeGraph
    require(readgroup != null)
76
    if (reference == null) reference = referenceFasta()
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
  /** Returns command to execute */
  def cmdLine = {
81
    var cmd: String = required(executable) +
82
83
84
85
86
87
88
89
90
      optional("-t", nCoresRequest) +
      conditional(solexa, "--solexa") +
      conditional(solexaold, "--solexaold") +
      conditional(sanger, "--sanger") +
      optional("--insertsize", insertsize) +
      optional("--insertsd", insertsd)

    // Optionally start Mate Pair alignment, if set, the aligner will
    // assign MP reads as MP, otherwise in PE mode, these reads will
wyleung's avatar
wyleung committed
91
    // be aligned with the bits RR or FF showing a False Inversion event
92
    if (insertsd2.getOrElse(-1) != -1) {
93
      cmd += optional("--insertsize2", insertsize2) +
94
        optional("--insertsd2", insertsd2)
95
    }
96

97
    cmd += conditional(sensitive, "--sensitive") +
98
99
100
101
102
103
104
105
      conditional(fast, "--fast") +
      optional("--readgroup", readgroup) +
      optional("-v", verbosity) +
      optional("--logfile", logfile) +
      " -g " + required(genome) +
      " -h " + required(hash) +
      " -o " + required(output) +
      " -M " + required(R1) + optional(R2)
Peter van 't Hof's avatar
Peter van 't Hof committed
106
    cmd
107
108
  }
}