Pysvtools.scala 1.83 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
package nl.lumc.sasc.biopet.extensions

import java.io.File

import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline._

/**
11
12
 * Created by wyleung on 8-1-16.
 */
13
14
15
16
17
class Pysvtools(val root: Configurable) extends BiopetCommandLineFunction {

  @Input(doc = "Input file", required = true)
  var input: List[File] = Nil

18
  @Argument(doc = "Set flanking amount")
19
20
21
  var flanking: Option[Int] = config("flanking")

  var exclusionRegions: List[File] = config("exclusion_regions")
Wai Yi Leung's avatar
Wai Yi Leung committed
22
  var translocationsOnly: Boolean = config("translocations_only", default = false)
23
24
25
26
27
28
29
30

  @Output(doc = "Unzipped file", required = true)
  var output: File = _

  var tsvoutput: File = _
  var bedoutput: File = _
  var regionsoutput: File = _

31
  executable = config("exe", default = "vcf_merge_sv_events")
32
33
34
35
36
37
38
39
40
41
42
43
44

  def versionRegex = """PySVtools (.*)""".r
  def versionCommand = executable + " --version"
  override def defaultThreads = 2

  override def beforeGraph(): Unit = {
    // TODO: we might want to validate the VCF before we start to tool? or is this a responsibility of the tool itself?
    if (input.isEmpty) {
      Logging.addError("No input VCF is given")
    }

    // redefine the tsv, bed and regions output
    val outputNamePrefix = output.getAbsolutePath.stripSuffix(".vcf")
45
46
47
    tsvoutput = new File(outputNamePrefix + ".tsv")
    bedoutput = new File(outputNamePrefix + ".bed")
    regionsoutput = new File(outputNamePrefix + ".regions.bed")
48
49
50
51
52
  }

  /** return commandline to execute */
  def cmdLine = required(executable) +
    repeat("-c", input) +
53
    optional("-f", flanking) +
54
55
56
57
58
59
    "-i " + repeat(input) +
    "-o " + required(tsvoutput) +
    "-b " + required(bedoutput) +
    "-v " + required(output) +
    "-r " + required(regionsoutput)
}