Cutadapt.scala 8.46 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
18
package nl.lumc.sasc.biopet.extensions

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19

20
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
22
23
import nl.lumc.sasc.biopet.core.summary.Summarizable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
24

25
26
27
import scala.collection.mutable
import scala.io.Source

Peter van 't Hof's avatar
Peter van 't Hof committed
28
/**
29
30
31
32
 * Extension for cutadapt
 * Started with version 1.5
 * Updated to version 1.9 (18-01-2016 by wyleung)
 */
33
class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Summarizable with Version {
34
  @Input(doc = "Input fastq file")
35
  var fastqInput: File = _
36

Peter van 't Hof's avatar
Peter van 't Hof committed
37
  @Output
38
  var fastqOutput: File = _
39
40

  @Output(doc = "Output statistics file")
41
  var statsOutput: File = _
42
43

  executable = config("exe", default = "cutadapt")
44
45
  def versionCommand = executable + " --version"
  def versionRegex = """(.*)""".r
46

47
48
49
  /** Name of the key containing clipped adapters information in the summary stats. */
  def adaptersStatsName = "adapters"

50
51
52
53
  var defaultClipMode: String = config("default_clip_mode", default = "3")
  var adapter: Set[String] = config("adapter", default = Nil)
  var anywhere: Set[String] = config("anywhere", default = Nil)
  var front: Set[String] = config("front", default = Nil)
54

Wai Yi Leung's avatar
Wai Yi Leung committed
55
56
57
58
59
60
61
62
  var errorRate: Option[Double] = config("error_rate")
  var noIndels: Boolean = config("no_indels", default = false)
  var times: Option[Int] = config("times")
  var overlap: Option[Int] = config("overlap")
  var matchReadWildcards: Boolean = config("match_read_wildcards", default = false)
  var noMatchAdapterWildcards: Boolean = config("no_match_adapter_wildcards", default = false) // specific for 1.9

  /** Options for filtering of processed reads */
63
64
65
66
  var discard: Boolean = config("discard", default = false)
  var trimmedOnly: Boolean = config("trimmed_only", default = false)
  var minimumLength: Int = config("minimum_length", 1)
  var maximumLength: Option[Int] = config("maximum_length")
Wai Yi Leung's avatar
Wai Yi Leung committed
67
68
69
70
71
72
  var noTrim: Boolean = config("no_trim", default = false)
  var maxN: Option[Int] = config("max_n") // specific for 1.9
  var maskAdapter: Boolean = config("mask_adapter", default = false)

  /** Options that influence what gets output to where */
  var quiet: Boolean = config("quiet", default = false)
73
  //  var output: File // see up @Output
Wai Yi Leung's avatar
Wai Yi Leung committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
  var infoFile: Option[File] = config("info_file")
  var restFile: Option[File] = config("rest_file")
  var wildcardFile: Option[File] = config("wildcard_file")
  var tooShortOutput: Option[File] = config("too_short_output")
  var tooLongOutput: Option[File] = config("too_long_output")
  var untrimmedOutput: Option[File] = config("untrimmed_output")

  /** Additional read modifications */
  var cut: Option[Int] = config("cut")
  var qualityCutoff: Option[String] = config("quality_cutoff")
  var qualityBase: Option[Int] = config("quality_base")
  var trimN: Boolean = config("trim_n", default = false)
  var prefix: Option[String] = config("prefix")
  var suffix: Option[String] = config("suffix")
Peter van 't Hof's avatar
Peter van 't Hof committed
88
  var stripSuffix: Set[String] = config("strip_suffix", default = Nil)
Wai Yi Leung's avatar
Wai Yi Leung committed
89
90
91
  var lengthTag: Option[String] = config("length_tag")

  /** Colorspace options */
92
93
94
95
96
  var colorspace: Boolean = config("colorspace", default = false)
  var doubleEncode: Boolean = config("double_encode", default = false)
  var trimPrimer: Boolean = config("trim_primer", default = false)
  var stripF3: Boolean = config("strip_f3", default = false)
  var maq: Boolean = config("maq", default = false)
Wai Yi Leung's avatar
Wai Yi Leung committed
97
  var bwa: Boolean = config("bwa", default = false, freeVar = false)
98
99
  var noZeroCap: Boolean = config("no_zero_cap", default = false)
  var zeroCap: Boolean = config("zero_cap", default = false)
Wai Yi Leung's avatar
Wai Yi Leung committed
100
101
102
103
104

  /** Paired end options */
  var peAdapter: Set[String] = config("pe_adapter", default = Nil)
  var peAdapterFront: Set[String] = config("pe_adapter_front", default = Nil)
  var peAdapterBoth: Set[String] = config("pe_adapter_both", default = Nil)
105
  var peCut: Boolean = config("pe_cut", default = false)
Wai Yi Leung's avatar
Wai Yi Leung committed
106
  var pairedOutput: Option[File] = config("paired_output")
107
  var interleaved: Boolean = config("interleaved", default = false)
Wai Yi Leung's avatar
Wai Yi Leung committed
108
109
  var untrimmedPairedOutput: Option[File] = config("untrimmed_paired_output")

Peter van 't Hof's avatar
Peter van 't Hof committed
110
  /** return commandline to execute */
111
  def cmdLine = required(executable) +
Wai Yi Leung's avatar
Wai Yi Leung committed
112
    // Options that influence how the adapters are found
113
114
115
    repeat("-a", adapter) +
    repeat("-b", anywhere) +
    repeat("-g", front) +
Wai Yi Leung's avatar
Wai Yi Leung committed
116
117
118
119
120
121
122
    optional("--error-rate", errorRate) +
    conditional(noIndels, "--no-indels") +
    optional("--times", times) +
    optional("--overlap", overlap) +
    conditional(matchReadWildcards, "--match-read-wildcards") +
    conditional(noMatchAdapterWildcards, "--no-match-adapter-wildcards") +
    // Options for filtering of processed reads
123
124
125
126
    conditional(discard, "--discard") +
    conditional(trimmedOnly, "--trimmed-only") +
    optional("-m", minimumLength) +
    optional("-M", maximumLength) +
Wai Yi Leung's avatar
Wai Yi Leung committed
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
    conditional(noTrim, "--no-trim") +
    optional("--max-n", maxN) +
    conditional(maskAdapter, "--mask-adapter") +
    conditional(quiet, "--quiet") +
    optional("--info-file", infoFile) +
    optional("--rest-file", restFile) +
    optional("--wildcard-file", wildcardFile) +
    optional("--too-short-output", tooShortOutput) +
    optional("--too-long-output", tooLongOutput) +
    optional("--untrimmed-output", untrimmedOutput) +
    // Additional read modifications
    optional("--cut", cut) +
    optional("--quality-cutoff", qualityCutoff) +
    conditional(trimN, "--trim-n") +
    optional("--prefix", prefix) +
    optional("--suffix", suffix) +
    optional("--strip-suffix", stripSuffix) +
    optional("--length-tag", lengthTag) +
    // Colorspace options
    conditional(colorspace, "--colorspace") +
    conditional(doubleEncode, "--double-encode") +
    conditional(trimPrimer, "--trim-primer") +
    conditional(stripF3, "--strip-f3") +
    conditional(maq, "--maq") +
    conditional(bwa, "--bwa") +
    conditional(noZeroCap, "--no-zero-cap") +
    conditional(zeroCap, "--zero-cap") +
    // Paired-end options
    repeat("-A", peAdapter) +
    repeat("-G", peAdapterFront) +
    repeat("-B", peAdapterBoth) +
    conditional(interleaved, "--interleaved") +
    optional("--paired-output", pairedOutput) +
    optional("--untrimmed-paired-output", untrimmedPairedOutput) +
161
    // input / output
162
163
164
    required(fastqInput) +
    (if (outputAsStsout) "" else required("--output", fastqOutput) +
      " > " + required(statsOutput))
165

Peter van 't Hof's avatar
Peter van 't Hof committed
166
  /** Output summary stats */
167
  def summaryStats: Map[String, Any] = {
168
169
170
171
172
173
174
175
    val trimR = """.*Trimmed reads: *(\d*) .*""".r
    val tooShortR = """.*Too short reads: *(\d*) .*""".r
    val tooLongR = """.*Too long reads: *(\d*) .*""".r
    val adapterR = """Adapter '([C|T|A|G]*)'.*trimmed (\d*) times.""".r

    val stats: mutable.Map[String, Int] = mutable.Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0)
    val adapter_stats: mutable.Map[String, Int] = mutable.Map()

176
    if (statsOutput.exists) for (line <- Source.fromFile(statsOutput).getLines()) {
177
178
179
180
181
182
183
184
185
      line match {
        case trimR(m)                 => stats += ("trimmed" -> m.toInt)
        case tooShortR(m)             => stats += ("tooshort" -> m.toInt)
        case tooLongR(m)              => stats += ("toolong" -> m.toInt)
        case adapterR(adapter, count) => adapter_stats += (adapter -> count.toInt)
        case _                        =>
      }
    }

186
    Map("num_reads_affected" -> stats("trimmed"),
187
188
      "num_reads_discarded_too_short" -> stats("tooshort"),
      "num_reads_discarded_too_long" -> stats("toolong"),
189
      adaptersStatsName -> adapter_stats.toMap
190
191
192
    )
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
193
  /** Merges values that can be merged for the summary */
194
195
196
197
198
199
200
201
  override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = {
    (v1, v2) match {
      case (v1: Int, v2: Int) => v1 + v2
      case _                  => v1
    }
  }

  def summaryFiles: Map[String, File] = Map()
Zeeuw's avatar
Zeeuw committed
202
}