Kraken.scala 2.87 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */

package nl.lumc.sasc.biopet.extensions.kraken

import java.io.File

import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

/** Extension for Kraken */
class Kraken(val root: Configurable) extends BiopetCommandLineFunction {

  @Input(doc = "Input: FastQ or FastA")
  var input: List[File] = _

  var db: File = config("db")

  var inputFastQ: Boolean = true
  var compression: Boolean = false
  var compressionGzip: Boolean = false
  var compressionBzip: Boolean = false

  var quick: Boolean = false
  var min_hits: Option[Int] = config("min_hits")

  @Output(doc = "Unidentified reads", required = false)
42
  var unclassified_out: Option[File] = None
43
  @Output(doc = "Identified reads", required = false)
44
  var classified_out: Option[File] = None
45
46

  @Output(doc = "Output with hits per sequence")
47
  var output: File = _
48
49
50
  var preload: Boolean = config("preload", default = true)
  var paired: Boolean = config("paired", default = false)

51
52
53
  executable = config("exe", default = "kraken")
  override val versionRegex = """Kraken version (.*)""".r
  override val versionExitcode = List(0, 1)
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

  override val defaultCoreMemory = 8.0
  override val defaultThreads = 4

  override def versionCommand = executable + " --version"

  /** Sets readgroup when not set yet */
  override def beforeGraph: Unit = {
    super.beforeGraph
  }

  /** Returns command to execute */
  def cmdLine = {
    var cmd: String = required(executable) +
      "--db" + required(db) +
      optional("--threads", nCoresRequest) +
      conditional(inputFastQ, "--fastq-input") +
      conditional(inputFastQ == false, "--fasta-input") +
      conditional(quick, "--quick")

    min_hits match {
      case Some(v) => cmd += "--min_hits " + v
      case _       => cmd += ""
    }

79
80
81
    cmd += optional("--unclassified-out ", unclassified_out.get) +
      optional("--classified-out ", classified_out.get) +
      "--output" + required(output) +
82
83
84
85
86
87
88
89
90
      conditional(preload, "--preload") +
      conditional(paired, "--paired")

    // finally the input files (R1 [R2])
    cmd += input.mkString(" ")

    cmd
  }
}