Centrifuge.scala 4.63 KB
Newer Older
1
2
3
4
5
6
package nl.lumc.sasc.biopet.extensions.centrifuge

import java.io.File

import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version }
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
8
9
10
11

import scala.util.matching.Regex

/**
12
13
 * Created by pjvanthof on 19/09/16.
 */
14
15
16
17
18
19
20
class Centrifuge(val root: Configurable) extends BiopetCommandLineFunction with Version {
  @Input(doc = "Input: FastQ or FastA", required = true)
  var inputR1: File = _

  @Input(doc = "Input: FastQ or FastA", required = false)
  var inputR2: Option[File] = None

21
  var index: File = config("centrifuge_index")
22
23
24
25
26
27
28

  @Output(doc = "Output with hits per sequence")
  var output: File = _

  @Output(doc = "Output with hits per sequence")
  var report: Option[File] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
29
  @Output(required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
30
  var un: Option[File] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32

  @Output(required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
33
  var al: Option[File] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35

  @Output(required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
36
  var unConc: Option[File] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
37
38

  @Output(required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var alConc: Option[File] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41

  @Output(required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
42
  var metFile: Option[File] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

  // Input args
  var q: Boolean = config("q", default = false)
  var qseq: Boolean = config("qseq", default = false)
  var f: Boolean = config("f", default = false)
  var r: Boolean = config("r", default = false)
  var c: Boolean = config("c", default = false)
  var skip: Option[Int] = config("skip")
  var upto: Option[Int] = config("upto")
  var trim5: Option[Int] = config("trim5")
  var trim3: Option[Int] = config("trim3")
  var phred33: Boolean = config("phred33", default = false)
  var phred64: Boolean = config("phred64", default = false)
  var intQuals: Boolean = config("int_quals", default = false)
  var ignoreQuals: Boolean = config("ignore_quals", default = false)
  var nofw: Boolean = config("nofw", default = false)
  var norc: Boolean = config("norc", default = false)

  // Classification args
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  var minHitlen: Option[Int] = config("min_hitlen")
Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
65
66
67
68
69
70
71
72
  var minTotallen: Option[Int] = config("min_totallen")
  var hostTaxids: List[Int] = config("host_taxids", default = Nil)
  var excludeTaxids: List[Int] = config("exclude_taxids", default = Nil)

  // Output args
  var t: Boolean = config("t", default = false)
  var quiet: Boolean = config("quiet", default = false)
  var metStderr: Boolean = config("met_stderr", default = false)
  var met: Option[Int] = config("met")

73
74
75
76
77
78
79
80
81
82
  override def defaultThreads = 8

  executable = config("exe", default = "centrifuge", freeVar = false)

  /** Command to get version of executable */
  def versionCommand: String = s"$executable --version"

  /** Regex to get version from version command output */
  def versionRegex: Regex = ".* version (.*)".r

Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
  override def beforeGraph(): Unit = {
    super.beforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
87
    deps :+= new File(index + ".1.cf")
    deps :+= new File(index + ".2.cf")
    deps :+= new File(index + ".3.cf")
Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
  }

90
  /**
91
92
93
94
   * This function needs to be implemented to define the command that is executed
   *
   * @return Command to run
   */
95
  def cmdLine: String = executable +
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
    conditional(q, "-q") +
    conditional(qseq, "--qseq") +
    conditional(f, "-f") +
    conditional(r, "-r") +
    conditional(c, "-c") +
    optional("--skip", skip) +
    optional("--upto", upto) +
    optional("--trim5", trim5) +
    optional("--trim3", trim3) +
    conditional(phred33, "--phred33") +
    conditional(phred64, "--phred64") +
    conditional(intQuals, "--int-quals") +
    conditional(ignoreQuals, "--ignore-quals") +
    conditional(nofw, "--nofw") +
    conditional(norc, "--norc") +
    optional("--min-hitlen", minHitlen) +
    optional("--min-totallen", minTotallen) +
    optional("--host-taxids", if (hostTaxids.nonEmpty) Some(hostTaxids.mkString(",")) else None) +
    optional("--exclude-taxids", if (excludeTaxids.nonEmpty) Some(excludeTaxids.mkString(",")) else None) +
    optional("--met-file", metFile) +
    conditional(t, "-t") +
    conditional(quiet, "--quiet") +
    conditional(metStderr, "--met-stderr") +
    optional("--met", met) +
    optional(if (un.exists(_.getName.endsWith(".gz"))) "--un-gz" else "--un", un) +
    optional(if (al.exists(_.getName.endsWith(".gz"))) "--al-gz" else "--al", al) +
    optional(if (unConc.exists(_.getName.endsWith(".gz"))) "--un-conc-gz" else "--un-conc", unConc) +
    optional(if (alConc.exists(_.getName.endsWith(".gz"))) "--al-conc-gz" else "--al-conc", alConc) +
Peter van 't Hof's avatar
Peter van 't Hof committed
124
125
    optional("--threads", threads) +
    required("-x", index) +
126
127
128
129
130
131
132
    (inputR2 match {
      case Some(r2) => required("-1", inputR1) + required("-2", r2)
      case _        => required("-U", inputR1)
    }) +
    (if (outputAsStsout) "" else required("-S", output)) +
    optional("--report-file", report)
}