Tabix.scala 3.88 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
17
18
package nl.lumc.sasc.biopet.extensions

import java.io.File

19
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
bow's avatar
bow committed
21
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
Peter van 't Hof's avatar
Peter van 't Hof committed
22
23

/**
bow's avatar
bow committed
24
25
26
27
 * Wrapper for the tabix command
 *
 * Note that tabix can either index a file (no stdout stream) or retrieve regions from an indexed file (stdout stream)
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
28
 */
29
class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Version {
Peter van 't Hof's avatar
Peter van 't Hof committed
30

bow's avatar
bow committed
31
  @Input(doc = "Input bgzipped file", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
  var input: File = null

bow's avatar
bow committed
34
35
36
  @Output(doc = "Output (for region query)", required = false)
  var outputQuery: File = null

37
38
39
  def outputIndex: File = {
    require(input != null, "Input should be defined")
    new File(input.getAbsolutePath + ".tbi")
bow's avatar
bow committed
40
41
42
43
  }

  @Argument(doc = "Regions to query", required = false)
  var regions: List[String] = config("regions", default = List.empty[String])
Peter van 't Hof's avatar
Peter van 't Hof committed
44
45

  var p: Option[String] = config("p")
bow's avatar
bow committed
46
47
48
49
50
51
52
53
54
55
56
  var s: Option[Int] = config("s")
  var b: Option[Int] = config("b")
  var e: Option[Int] = config("e")
  var S: Option[Int] = config("S")
  var c: Option[String] = config("c")
  var r: Option[File] = config("r")
  var B: Boolean = config("B", default = false)
  var zero: Boolean = config("0", default = false)
  var h: Boolean = config("h", default = false)
  var l: Boolean = config("l", default = false)
  var f: Boolean = config("f", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
57

Peter van 't Hof's avatar
Peter van 't Hof committed
58
  executable = config("exe", default = "tabix", freeVar = false)
bow's avatar
bow committed
59

60
61
  def versionCommand = executable
  def versionRegex = """Version: (.*)""".r
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  override def versionExitcode = List(0, 1)
bow's avatar
bow committed
63
64
65

  /** Formats that tabix can handle */
  private val validFormats: Set[String] = Set("gff", "bed", "sam", "vcf", "psltbl")
Peter van 't Hof's avatar
Peter van 't Hof committed
66

67
68
69
  @Output
  var outputFiles: List[File] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
70
  override def beforeGraph(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
71
    super.beforeGraph()
bow's avatar
bow committed
72
73
74
    p match {
      case Some(fmt) =>
        require(validFormats.contains(fmt), "-p flag must be one of " + validFormats.mkString(", "))
75
76
        outputFiles :+= outputIndex
      case None =>
bow's avatar
bow committed
77
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
  }

bow's avatar
bow committed
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
  def cmdLine = {
    val baseCommand = required(executable) +
      optional("-p", p) +
      optional("-s", s) +
      optional("-b", b) +
      optional("-e", e) +
      optional("-S", S) +
      optional("-c", c) +
      optional("-r", r) +
      conditional(B, "-B") +
      conditional(zero, "-0") +
      conditional(h, "-h") +
      conditional(l, "-l") +
      conditional(f, "-f") +
      required(input)

    // query mode ~ we want to output to a file
    if (regions.nonEmpty) baseCommand + required("", repeat(regions), escape = false) + " > " + required(outputQuery)
    // indexing mode
    else baseCommand
  }
}
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

object Tabix {
  def apply(root: Configurable, input: File) = {
    val tabix = new Tabix(root)
    tabix.input = input
    tabix.p = tabix.input.getName match {
      case s if s.endsWith(".vcf.gz")    => Some("vcf")
      case s if s.endsWith(".bed.gz")    => Some("bed")
      case s if s.endsWith(".sam.gz")    => Some("sam")
      case s if s.endsWith(".gff.gz")    => Some("gff")
      case s if s.endsWith(".psltbl.gz") => Some("psltbl")
      case _                             => throw new IllegalArgumentException("Unknown file type")
    }
    tabix
  }
117
}