Cufflinks.scala 10.2 KB
Newer Older
bow's avatar
bow committed
1
/**
2
3
4
5
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
bow's avatar
bow committed
6
 *
7
8
9
10
11
12
13
14
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
bow's avatar
bow committed
15
16
17
18
 */
package nl.lumc.sasc.biopet.extensions

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19

bow's avatar
bow committed
20
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
bow's avatar
bow committed
23
24
25

/**
 * Wrapper for the cufflinks command line tool.
bow's avatar
bow committed
26
 * Written based on cufflinks version v2.2.1 (md5: 07c831c4f8b4e161882731ea5694ff80)
bow's avatar
bow committed
27
28
 */
class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction {
Peter van 't Hof's avatar
Peter van 't Hof committed
29

bow's avatar
bow committed
30
31
32
  /** default executable */
  executable = config("exe", default = "cufflinks")

33
  /** default threads */
Peter van 't Hof's avatar
Peter van 't Hof committed
34
  override def defaultThreads = 8
35
36

  /** default vmem for cluster jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
37
  override def defaultCoreMemory = 6.0
38

bow's avatar
bow committed
39
40
  /** input file */
  @Input(doc = "Input file (SAM or BAM)", required = true)
bow's avatar
bow committed
41
  var input: File = null
bow's avatar
bow committed
42
43
44
45

  /** output files, computed automatically from output directory */

  @Output(doc = "Output GTF file")
46
  lazy val outputGtf: File = {
bow's avatar
bow committed
47
48
    require(input != null && output_dir != null,
      "Can not set Cufflinks GTF output while input file and/or output directory is not defined")
bow's avatar
bow committed
49
    // cufflinks always outputs a transcripts.gtf file in the output directory
bow's avatar
bow committed
50
    new File(output_dir, "transcripts.gtf")
bow's avatar
bow committed
51
52
53
  }

  @Output(doc = "Output isoform FPKM file")
54
  lazy val outputIsoformsFpkm: File = {
bow's avatar
bow committed
55
56
57
    require(input != null && output_dir != null,
      "Can not set Cufflinks isoforms.fpkm_tracking output while input file and/or output directory is not defined")
    new File(output_dir, "isoforms.fpkm_tracking")
bow's avatar
bow committed
58
59
60
  }

  @Output(doc = "Output GTF file")
61
  lazy val outputGenesFpkm: File = {
bow's avatar
bow committed
62
63
    require(input != null && output_dir != null,
      "Can not set Cufflinks genes.fpkm_tracking output while input file and/or output directory is not defined")
bow's avatar
bow committed
64
    // cufflinks always outputs a genes.fpkm_tracking file in the output directory
bow's avatar
bow committed
65
    new File(output_dir, "genes.fpkm_tracking")
bow's avatar
bow committed
66
67
68
  }

  /** write all output files to this directory [./] */
69
  var output_dir: File = config("output_dir", default = new File("."))
bow's avatar
bow committed
70
71
72
73
74

  /** value of random number generator seed [0] */
  var seed: Option[Int] = config("seed")

  /** quantitate against reference transcript annotations */
bow's avatar
bow committed
75
  var GTF: Option[File] = config("GTF")
bow's avatar
bow committed
76
77

  /** use reference transcript annotation to guide assembly */
bow's avatar
bow committed
78
  var GTF_guide: Option[File] = config("GTF_guide")
bow's avatar
bow committed
79
80

  /** ignore all alignment within transcripts in this file */
bow's avatar
bow committed
81
  var mask_file: Option[File] = config("mask_file")
bow's avatar
bow committed
82
83

  /** use bias correction - reference fasta required [NULL] */
bow's avatar
bow committed
84
  var frag_bias_correct: Option[String] = config("frag_bias_correct")
bow's avatar
bow committed
85
86

  /** use 'rescue method' for multi-reads (more accurate) [FALSE] */
bow's avatar
bow committed
87
  var multi_read_correct: Boolean = config("multi_read_correct", default = false)
bow's avatar
bow committed
88
89

  /** library prep used for input reads [below] */
bow's avatar
bow committed
90
  var library_type: Option[String] = config("library_type")
bow's avatar
bow committed
91
92

  /** Method used to normalize library sizes [below] */
bow's avatar
bow committed
93
  var library_norm_method: Option[String] = config("library_norm_method")
bow's avatar
bow committed
94
95
96
97
98
99
100
101
102
103
104

  /** average fragment length (unpaired reads only) [200] */
  var frag_len_mean: Option[Int] = config("frag_len_mean")

  /** fragment length std deviation (unpaired reads only) [80] */
  var frag_len_std_dev: Option[Int] = config("frag_len_std_dev")

  /** maximum iterations allowed for MLE calculation [5000] */
  var max_mle_iterations: Option[Int] = config("max_mle_iterations")

  /** count hits compatible with reference RNAs only [FALSE] */
bow's avatar
bow committed
105
  var compatible_hits_norm: Boolean = config("compatible_hits_norm", default = false)
bow's avatar
bow committed
106
107

  /** count all hits for normalization [TRUE] */
bow's avatar
bow committed
108
  var total_hits_norm: Boolean = config("total_hits_norm", default = true)
bow's avatar
bow committed
109
110
111
112
113
114
115
116

  /** Number of fragment generation samples [100] */
  var num_frag_count_draws: Option[Int] = config("num_frag_count_draws")

  /** Number of fragment assignment samples per generation [50] */
  var num_frag_assign_draws: Option[Int] = config("num_frag_assign_draws")

  /** Maximum number of alignments allowed per fragment [unlim] */
bow's avatar
bow committed
117
  var max_frag_multihits: Option[Int] = config("max_frag_multihits")
bow's avatar
bow committed
118
119

  /** No effective length correction [FALSE] */
bow's avatar
bow committed
120
  var no_effective_length_correction: Boolean = config("no_effective_length_correction", default = false)
bow's avatar
bow committed
121
122

  /** No length correction [FALSE] */
bow's avatar
bow committed
123
  var no_length_correction: Boolean = config("no_length_correction", default = false)
bow's avatar
bow committed
124
125

  /** assembled transcripts have this ID prefix [CUFF] */
bow's avatar
bow committed
126
  var label: Option[String] = config("label")
bow's avatar
bow committed
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

  /** suppress transcripts below this abundance level [0.10] */
  var min_isoform_fraction: Option[Float] = config("min_isoform_fraction")

  /** suppress intra-intronic transcripts below this level [0.15] */
  var pre_mrna_fraction: Option[Float] = config("pre_mrna_fraction")

  /** ignore alignments with gaps longer than this [300000] */
  var max_intron_length: Option[Int] = config("max_intron_length")

  /** alpha for junction binomial test filter [0.001] */
  var junc_alpha: Option[Float] = config("junc_alpha")

  /** percent read overhang taken as 'suspiciously small' [0.09] */
  var small_anchor_fraction: Option[Float] = config("small_anchor_fraction")

  /** minimum number of fragments needed for new transfrags [10] */
  var min_frags_per_transfrag: Option[Int] = config("min_frags_per_transfrag")

  /** number of terminal exon bp to tolerate in introns [8] */
  var overhang_tolerance: Option[Int] = config("overhang_tolerance")

  /** maximum genomic length allowed for a given bundle [3500000] */
  var max_bundle_length: Option[Int] = config("max_bundle_length")

  /** maximum fragments allowed in a bundle before skipping [500000] */
  var max_bundle_frags: Option[Int] = config("max_bundle_frags")

  /** minimum intron size allowed in genome [50] */
  var min_intron_length: Option[Int] = config("min_intron_length")

  /** minimum avg coverage required to attempt 3' trimming [10] */
  var trim_3_avgcov_thresh: Option[Int] = config("trim_3_avgcov_thresh")

  /** fraction of avg coverage below which to trim 3' end [0.1] */
  var trim_3_dropoff_frac: Option[Float] = config("trim_3_dropoff_frac")

  /** maximum fraction of allowed multireads per transcript [0.75] */
  var max_multiread_fraction: Option[Float] = config("max_multiread_fraction")

  /** maximum gap size to fill between transfrags (in bp) [50] */
  var overlap_radius: Option[Int] = config("overlap_radius")

  /** disable tiling by faux reads [FALSE] */
bow's avatar
bow committed
171
  var no_faux_reads: Boolean = config("no_faux_reads", default = false)
bow's avatar
bow committed
172
173
174
175
176
177
178
179

  /** overhang allowed on 3' end when merging with reference [600] */
  var flag_3_overhang_tolerance: Option[Int] = config("flag_3_overhang_tolerance")

  /** overhang allowed inside reference intron when merging [30] */
  var intron_overhang_tolerance: Option[Int] = config("intron_overhang_tolerance")

  /** log-friendly verbose processing (no progress bar) [FALSE] */
bow's avatar
bow committed
180
  var verbose: Boolean = config("verbose", default = false)
bow's avatar
bow committed
181
182

  /** log-friendly quiet processing (no progress bar) [FALSE] */
bow's avatar
bow committed
183
  var quiet: Boolean = config("quiet", default = false)
bow's avatar
bow committed
184
185

  /** do not contact server to check for update availability [FALSE] */
bow's avatar
bow committed
186
  var no_update_check: Boolean = config("no_update_check", default = false)
bow's avatar
bow committed
187

Peter van 't Hof's avatar
Peter van 't Hof committed
188
  override def versionRegex = """cufflinks v(.*)""".r
bow's avatar
bow committed
189
  override def versionCommand = executable
Peter van 't Hof's avatar
Peter van 't Hof committed
190
  override def versionExitcode = List(0, 1)
bow's avatar
bow committed
191

192
  def cmdLine =
Peter van 't Hof's avatar
Peter van 't Hof committed
193
    required(executable) +
bow's avatar
bow committed
194
      required("--output-dir", output_dir) +
bow's avatar
bow committed
195
      optional("--num-threads", threads) +
bow's avatar
bow committed
196
197
198
199
200
      optional("--seed", seed) +
      optional("--GTF", GTF) +
      optional("--GTF-guide", GTF_guide) +
      optional("--mask-file", mask_file) +
      optional("--frag-bias-correct", frag_bias_correct) +
201
      conditional(multi_read_correct, "--multi-read-correct") +
bow's avatar
bow committed
202
203
204
205
206
      optional("--library-type", library_type) +
      optional("--library-norm-method", library_norm_method) +
      optional("--frag-len-mean", frag_len_mean) +
      optional("--frag-len-std-dev", frag_len_std_dev) +
      optional("--max-mle-iterations", max_mle_iterations) +
207
208
      conditional(compatible_hits_norm, "--compatible-hits-norm") +
      conditional(total_hits_norm, "--total-hits-norm") +
bow's avatar
bow committed
209
210
211
      optional("--num-frag-count-draws", num_frag_count_draws) +
      optional("--num-frag-assign-draws", num_frag_assign_draws) +
      optional("--max-frag-multihits", max_frag_multihits) +
212
213
      conditional(no_effective_length_correction, "--no-effective-length-correction") +
      conditional(no_length_correction, "--no-length-correction") +
bow's avatar
bow committed
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
      optional("--label", label) +
      optional("--min-isoform-fraction", min_isoform_fraction) +
      optional("--pre-mrna-fraction", pre_mrna_fraction) +
      optional("--max-intron-length", max_intron_length) +
      optional("--junc-alpha", junc_alpha) +
      optional("--small-anchor-fraction", small_anchor_fraction) +
      optional("--min-frags-per-transfrag", min_frags_per_transfrag) +
      optional("--overhang-tolerance", overhang_tolerance) +
      optional("--max-bundle-length", max_bundle_length) +
      optional("--max-bundle-frags", max_bundle_frags) +
      optional("--min-intron-length", min_intron_length) +
      optional("--trim-3-avgcov-thresh", trim_3_avgcov_thresh) +
      optional("--trim-3-dropoff-frac", trim_3_dropoff_frac) +
      optional("--max-multiread-fraction", max_multiread_fraction) +
      optional("--overlap-radius", overlap_radius) +
229
      conditional(no_faux_reads, "--no-faux-reads") +
bow's avatar
bow committed
230
231
      optional("--flag-3-overhang-tolerance", flag_3_overhang_tolerance) +
      optional("--intron-overhang-tolerance", intron_overhang_tolerance) +
232
233
234
      conditional(verbose, "--verbose") +
      conditional(quiet, "--quiet") +
      conditional(no_update_check, "--no-update-check") +
bow's avatar
bow committed
235
236
      required(input)
}