Centrifuge.scala 6.15 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
/**
2
3
4
5
6
7
8
9
10
11
12
13
14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15
16
17
18
package nl.lumc.sasc.biopet.extensions.centrifuge

import java.io.File

19
import nl.lumc.sasc.biopet.core.summary.Summarizable
20
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
21
import nl.lumc.sasc.biopet.utils.config.Configurable
22
import nl.lumc.sasc.biopet.utils.tryToParseNumber
23
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
24

25
import scala.io.Source
26
27
28
import scala.util.matching.Regex

/**
29
30
31
32
33
34
  * Created by pjvanthof on 19/09/16.
  */
class Centrifuge(val parent: Configurable)
    extends BiopetCommandLineFunction
    with Version
    with Summarizable {
35
36
37
38
39
40
  @Input(doc = "Input: FastQ or FastA", required = true)
  var inputR1: File = _

  @Input(doc = "Input: FastQ or FastA", required = false)
  var inputR2: Option[File] = None

41
  var index: File = config("centrifuge_index")
42
43
44
45
46
47
48

  @Output(doc = "Output with hits per sequence")
  var output: File = _

  @Output(doc = "Output with hits per sequence")
  var report: Option[File] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  @Output(required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
50
  var un: Option[File] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
51
52

  @Output(required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
53
  var al: Option[File] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
54
55

  @Output(required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
56
  var unConc: Option[File] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
57
58

  @Output(required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
59
  var alConc: Option[File] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61

  @Output(required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  var metFile: Option[File] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

  // Input args
  var q: Boolean = config("q", default = false)
  var qseq: Boolean = config("qseq", default = false)
  var f: Boolean = config("f", default = false)
  var r: Boolean = config("r", default = false)
  var c: Boolean = config("c", default = false)
  var skip: Option[Int] = config("skip")
  var upto: Option[Int] = config("upto")
  var trim5: Option[Int] = config("trim5")
  var trim3: Option[Int] = config("trim3")
  var phred33: Boolean = config("phred33", default = false)
  var phred64: Boolean = config("phred64", default = false)
  var intQuals: Boolean = config("int_quals", default = false)
  var ignoreQuals: Boolean = config("ignore_quals", default = false)
  var nofw: Boolean = config("nofw", default = false)
  var norc: Boolean = config("norc", default = false)

  // Classification args
Peter van 't Hof's avatar
Peter van 't Hof committed
82
  var minHitlen: Option[Int] = config("min_hitlen")
Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
85
86
87
88
89
90
91
92
  var minTotallen: Option[Int] = config("min_totallen")
  var hostTaxids: List[Int] = config("host_taxids", default = Nil)
  var excludeTaxids: List[Int] = config("exclude_taxids", default = Nil)

  // Output args
  var t: Boolean = config("t", default = false)
  var quiet: Boolean = config("quiet", default = false)
  var metStderr: Boolean = config("met_stderr", default = false)
  var met: Option[Int] = config("met")

93
94
95
96
97
98
99
100
101
102
  override def defaultThreads = 8

  executable = config("exe", default = "centrifuge", freeVar = false)

  /** Command to get version of executable */
  def versionCommand: String = s"$executable --version"

  /** Regex to get version from version command output */
  def versionRegex: Regex = ".* version (.*)".r

Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
  override def beforeGraph(): Unit = {
    super.beforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
107
    deps :+= new File(index + ".1.cf")
    deps :+= new File(index + ".2.cf")
    deps :+= new File(index + ".3.cf")
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
  }

110
  /**
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    * This function needs to be implemented to define the command that is executed
    *
    * @return Command to run
    */
  def cmdLine: String =
    executable +
      conditional(q, "-q") +
      conditional(qseq, "--qseq") +
      conditional(f, "-f") +
      conditional(r, "-r") +
      conditional(c, "-c") +
      optional("--skip", skip) +
      optional("--upto", upto) +
      optional("--trim5", trim5) +
      optional("--trim3", trim3) +
      conditional(phred33, "--phred33") +
      conditional(phred64, "--phred64") +
      conditional(intQuals, "--int-quals") +
      conditional(ignoreQuals, "--ignore-quals") +
      conditional(nofw, "--nofw") +
      conditional(norc, "--norc") +
      optional("--min-hitlen", minHitlen) +
      optional("--min-totallen", minTotallen) +
      optional("--host-taxids", if (hostTaxids.nonEmpty) Some(hostTaxids.mkString(",")) else None) +
      optional("--exclude-taxids",
               if (excludeTaxids.nonEmpty) Some(excludeTaxids.mkString(",")) else None) +
      optional("--met-file", metFile) +
      conditional(t, "-t") +
      conditional(quiet, "--quiet") +
      conditional(metStderr, "--met-stderr") +
      optional("--met", met) +
      optional(if (un.exists(_.getName.endsWith(".gz"))) "--un-gz" else "--un", un) +
      optional(if (al.exists(_.getName.endsWith(".gz"))) "--al-gz" else "--al", al) +
      optional(if (unConc.exists(_.getName.endsWith(".gz"))) "--un-conc-gz" else "--un-conc",
               unConc) +
      optional(if (alConc.exists(_.getName.endsWith(".gz"))) "--al-conc-gz" else "--al-conc",
               alConc) +
      optional("--threads", threads) +
      required("-x", index) +
      (inputR2 match {
        case Some(r2) => required("-1", inputR1) + required("-2", r2)
        case _ => required("-U", inputR1)
      }) +
Sander Bollen's avatar
Sander Bollen committed
154
      (if (outputAsStdout) "" else required("-S", output)) +
155
      optional("--report-file", report)
156
157
158
159
160
161

  /** Must return files to store into summary */
  override def summaryFiles: Map[String, File] = metFile.map("metrics" -> _).toMap

  /** Must returns stats to store into summary */
  override def summaryStats: Any = {
162
163
164
165
    metFile
      .map { file =>
        val reader = Source.fromFile(file)
        val header = reader.getLines().next().split("\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
166
167
        val values =
          reader.getLines().next().split("\t").map(tryToParseNumber(_, fallBack = true).get)
168
169
170
171
        reader.close()
        Map("metrics" -> header.zip(values).toMap)
      }
      .getOrElse(Map())
172
  }
173
}