CommandLineGATK.scala 29 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
package nl.lumc.sasc.biopet.extensions.gatk
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18

import java.io.File

19
import nl.lumc.sasc.biopet.core.{BiopetJavaCommandLineFunction, Reference, Version}
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
21
22
23
24
import org.broadinstitute.gatk.utils.commandline.{Argument, Gather, Input, Output}
import org.broadinstitute.gatk.utils.interval.{IntervalMergingRule, IntervalSetRule}

import scala.util.matching.Regex
Peter van 't Hof's avatar
Peter van 't Hof committed
25

26
trait CommandLineGATK extends BiopetJavaCommandLineFunction with Reference with Version {
27
  analysisName = analysis_type
Peter van 't Hof's avatar
Peter van 't Hof committed
28
  javaMainClass = "org.broadinstitute.gatk.engine.CommandLineGATK"
Peter van 't Hof's avatar
Peter van 't Hof committed
29
  jarFile = config("gatk_jar")
Peter van 't Hof's avatar
Peter van 't Hof committed
30
31

  /** Name of the tool to run */
32
  def analysis_type: String
Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
35
36
37
38
39
40
41
42
43

  /** Input file containing sequence data (BAM or CRAM) */
  @Input(fullName = "input_file", shortName = "I", doc = "Input file containing sequence data (BAM or CRAM)", required = false, exclusiveOf = "", validation = "")
  var input_file: Seq[File] = Nil

  /** Dependencies on any indexes of input_file */
  @Input(fullName = "input_fileIndexes", shortName = "", doc = "Dependencies on any indexes of input_file", required = false, exclusiveOf = "", validation = "")
  private var input_fileIndexes: Seq[File] = Nil

  /** Emit a log entry (level INFO) containing the full list of sequence data files to be included in the analysis (including files inside .bam.list or .cram.list files). */
  @Argument(fullName = "showFullBamList", shortName = "", doc = "Emit a log entry (level INFO) containing the full list of sequence data files to be included in the analysis (including files inside .bam.list or .cram.list files).", required = false, exclusiveOf = "", validation = "")
44
  var showFullBamList: Boolean = config("showFullBamList", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
47

  /** Number of reads per SAM file to buffer in memory */
  @Argument(fullName = "read_buffer_size", shortName = "rbs", doc = "Number of reads per SAM file to buffer in memory", required = false, exclusiveOf = "", validation = "")
48
  var read_buffer_size: Option[Int] = config("read_buffer_size")
Peter van 't Hof's avatar
Peter van 't Hof committed
49
50
51

  /** Run reporting mode */
  @Argument(fullName = "phone_home", shortName = "et", doc = "Run reporting mode", required = false, exclusiveOf = "", validation = "")
52
  var phone_home: Option[String] = config("phone_home")
Peter van 't Hof's avatar
Peter van 't Hof committed
53
54

  /** GATK key file required to run with -et NO_ET */
55
56
  @Input(fullName = "gatk_key", shortName = "K", doc = "GATK key file required to run with -et NO_ET", required = false, exclusiveOf = "", validation = "")
  var gatk_key: Option[File] = config("gatk_key")
Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
59

  /** Tag to identify this GATK run as part of a group of runs */
  @Argument(fullName = "tag", shortName = "tag", doc = "Tag to identify this GATK run as part of a group of runs", required = false, exclusiveOf = "", validation = "")
60
  var tag: Option[String] = config("tag")
Peter van 't Hof's avatar
Peter van 't Hof committed
61
62
63

  /** Filters to apply to reads before analysis */
  @Argument(fullName = "read_filter", shortName = "rf", doc = "Filters to apply to reads before analysis", required = false, exclusiveOf = "", validation = "")
64
  var read_filter: List[String] = config("read_filter", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
65
66
67

  /** Read filters to disable */
  @Argument(fullName = "disable_read_filter", shortName = "drf", doc = "Read filters to disable", required = false, exclusiveOf = "", validation = "")
68
  var disable_read_filter: List[String] = config("disable_read_filter", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
69
70
71

  /** One or more genomic intervals over which to operate */
  @Input(fullName = "intervals", shortName = "L", doc = "One or more genomic intervals over which to operate", required = false, exclusiveOf = "intervalsString", validation = "")
72
  var intervals: List[File] = config("intervals", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
73
74
75

  /** One or more genomic intervals over which to operate */
  @Argument(fullName = "intervalsString", shortName = "L", doc = "One or more genomic intervals over which to operate", required = false, exclusiveOf = "intervals", validation = "")
76
  var intervalsString: List[String] = config("intervalsString", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
79

  /** One or more genomic intervals to exclude from processing */
  @Input(fullName = "excludeIntervals", shortName = "XL", doc = "One or more genomic intervals to exclude from processing", required = false, exclusiveOf = "excludeIntervalsString", validation = "")
80
  var excludeIntervals: List[File] = config("excludeIntervals", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
83

  /** One or more genomic intervals to exclude from processing */
  @Argument(fullName = "excludeIntervalsString", shortName = "XL", doc = "One or more genomic intervals to exclude from processing", required = false, exclusiveOf = "excludeIntervals", validation = "")
84
  var excludeIntervalsString: List[String] = config("excludeIntervalsString", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
87

  /** Set merging approach to use for combining interval inputs */
  @Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Set merging approach to use for combining interval inputs", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
88
  var interval_set_rule: Option[IntervalSetRule] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
91

  /** Interval merging rule for abutting intervals */
  @Argument(fullName = "interval_merging", shortName = "im", doc = "Interval merging rule for abutting intervals", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
92
  var interval_merging: Option[IntervalMergingRule] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
95

  /** Amount of padding (in bp) to add to each interval */
  @Argument(fullName = "interval_padding", shortName = "ip", doc = "Amount of padding (in bp) to add to each interval", required = false, exclusiveOf = "", validation = "")
96
  var interval_padding: Option[Int] = config("interval_padding")
Peter van 't Hof's avatar
Peter van 't Hof committed
97
98
99
100
101
102
103

  /** Reference sequence file */
  @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false, exclusiveOf = "", validation = "")
  var reference_sequence: File = _

  /** Use a non-deterministic random seed */
  @Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Use a non-deterministic random seed", required = false, exclusiveOf = "", validation = "")
104
  var nonDeterministicRandomSeed: Boolean = config("nonDeterministicRandomSeed", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
107

  /** Completely eliminates randomized dithering from rank sum tests. */
  @Argument(fullName = "disableDithering", shortName = "", doc = "Completely eliminates randomized dithering from rank sum tests.", required = false, exclusiveOf = "", validation = "")
108
  var disableDithering: Boolean = config("disableDithering", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
109
110
111

  /** Stop execution cleanly as soon as maxRuntime has been reached */
  @Argument(fullName = "maxRuntime", shortName = "maxRuntime", doc = "Stop execution cleanly as soon as maxRuntime has been reached", required = false, exclusiveOf = "", validation = "")
112
  var maxRuntime: Option[Long] = config("maxRuntime")
Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
115

  /** Unit of time used by maxRuntime */
  @Argument(fullName = "maxRuntimeUnits", shortName = "maxRuntimeUnits", doc = "Unit of time used by maxRuntime", required = false, exclusiveOf = "", validation = "")
116
  var maxRuntimeUnits: Option[String] = config("maxRuntimeUnits")
Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
119

  /** Type of read downsampling to employ at a given locus */
  @Argument(fullName = "downsampling_type", shortName = "dt", doc = "Type of read downsampling to employ at a given locus", required = false, exclusiveOf = "", validation = "")
120
  var downsampling_type: Option[String] = config("downsampling_type")
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
123

  /** Fraction of reads to downsample to */
  @Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction of reads to downsample to", required = false, exclusiveOf = "", validation = "")
124
  var downsample_to_fraction: Option[Double] = config("downsample_to_fraction")
Peter van 't Hof's avatar
Peter van 't Hof committed
125
126
127
128
129
130
131

  /** Format string for downsample_to_fraction */
  @Argument(fullName = "downsample_to_fractionFormat", shortName = "", doc = "Format string for downsample_to_fraction", required = false, exclusiveOf = "", validation = "")
  var downsample_to_fractionFormat: String = "%s"

  /** Target coverage threshold for downsampling to coverage */
  @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Target coverage threshold for downsampling to coverage", required = false, exclusiveOf = "", validation = "")
132
  var downsample_to_coverage: Option[Int] = config("downsample_to_coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
135

  /** Type of BAQ calculation to apply in the engine */
  @Argument(fullName = "baq", shortName = "baq", doc = "Type of BAQ calculation to apply in the engine", required = false, exclusiveOf = "", validation = "")
136
  var baq: Option[String] = config("baq")
Peter van 't Hof's avatar
Peter van 't Hof committed
137
138
139

  /** BAQ gap open penalty */
  @Argument(fullName = "baqGapOpenPenalty", shortName = "baqGOP", doc = "BAQ gap open penalty", required = false, exclusiveOf = "", validation = "")
140
  var baqGapOpenPenalty: Option[Double] = config("baqGapOpenPenalty")
Peter van 't Hof's avatar
Peter van 't Hof committed
141
142
143
144
145
146
147

  /** Format string for baqGapOpenPenalty */
  @Argument(fullName = "baqGapOpenPenaltyFormat", shortName = "", doc = "Format string for baqGapOpenPenalty", required = false, exclusiveOf = "", validation = "")
  var baqGapOpenPenaltyFormat: String = "%s"

  /** Reduce NDN elements in CIGAR string */
  @Argument(fullName = "refactor_NDN_cigar_string", shortName = "fixNDN", doc = "Reduce NDN elements in CIGAR string", required = false, exclusiveOf = "", validation = "")
148
  var refactor_NDN_cigar_string: Boolean = config("refactor_NDN_cigar_string", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
149
150
151

  /** Fix mis-encoded base quality scores */
  @Argument(fullName = "fix_misencoded_quality_scores", shortName = "fixMisencodedQuals", doc = "Fix mis-encoded base quality scores", required = false, exclusiveOf = "", validation = "")
152
  var fix_misencoded_quality_scores: Boolean = config("fix_misencoded_quality_scores", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
153
154
155

  /** Ignore warnings about base quality score encoding */
  @Argument(fullName = "allow_potentially_misencoded_quality_scores", shortName = "allowPotentiallyMisencodedQuals", doc = "Ignore warnings about base quality score encoding", required = false, exclusiveOf = "", validation = "")
156
  var allow_potentially_misencoded_quality_scores: Boolean = config("allow_potentially_misencoded_quality_scores", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
157
158
159

  /** Use the base quality scores from the OQ tag */
  @Argument(fullName = "useOriginalQualities", shortName = "OQ", doc = "Use the base quality scores from the OQ tag", required = false, exclusiveOf = "", validation = "")
160
  var useOriginalQualities: Boolean = config("useOriginalQualities", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
161
162
163

  /** Assign a default base quality */
  @Argument(fullName = "defaultBaseQualities", shortName = "DBQ", doc = "Assign a default base quality", required = false, exclusiveOf = "", validation = "")
164
  var defaultBaseQualities: Option[Int] = config("defaultBaseQualities")
Peter van 't Hof's avatar
Peter van 't Hof committed
165
166

  /** Write GATK runtime performance log to this file */
167
168
  @Output(fullName = "performanceLog", shortName = "PF", doc = "Write GATK runtime performance log to this file", required = false, exclusiveOf = "", validation = "")
  var performanceLog: Option[File] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
169
170
171

  /** Input covariates table file for on-the-fly base quality score recalibration */
  @Input(fullName = "BQSR", shortName = "BQSR", doc = "Input covariates table file for on-the-fly base quality score recalibration", required = false, exclusiveOf = "", validation = "")
172
  var BQSR: Option[File] = _
Peter van 't Hof's avatar
Peter van 't Hof committed
173
174
175

  /** Quantize quality scores to a given number of levels (with -BQSR) */
  @Argument(fullName = "quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels (with -BQSR)", required = false, exclusiveOf = "", validation = "")
176
  var quantize_quals: Option[Int] = config("quantize_quals")
Peter van 't Hof's avatar
Peter van 't Hof committed
177
178
179

  /** Use static quantized quality scores to a given number of levels (with -BQSR) */
  @Argument(fullName = "static_quantized_quals", shortName = "SQQ", doc = "Use static quantized quality scores to a given number of levels (with -BQSR)", required = false, exclusiveOf = "quantize_quals", validation = "")
180
  var static_quantized_quals: List[Int] = config("static_quantized_quals", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
181
182
183

  /** Round quals down to nearest quantized qual */
  @Argument(fullName = "round_down_quantized", shortName = "RDQ", doc = "Round quals down to nearest quantized qual", required = false, exclusiveOf = "quantize_quals", validation = "")
184
  var round_down_quantized: Boolean = config("round_down_quantized", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
185
186
187

  /** Disable printing of base insertion and deletion tags (with -BQSR) */
  @Argument(fullName = "disable_indel_quals", shortName = "DIQ", doc = "Disable printing of base insertion and deletion tags (with -BQSR)", required = false, exclusiveOf = "", validation = "")
188
  var disable_indel_quals: Boolean = config("disable_indel_quals", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
189
190
191

  /** Emit the OQ tag with the original base qualities (with -BQSR) */
  @Argument(fullName = "emit_original_quals", shortName = "EOQ", doc = "Emit the OQ tag with the original base qualities (with -BQSR)", required = false, exclusiveOf = "", validation = "")
192
  var emit_original_quals: Boolean = config("emit_original_quals", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
193
194
195

  /** Don't recalibrate bases with quality scores less than this threshold (with -BQSR) */
  @Argument(fullName = "preserve_qscores_less_than", shortName = "preserveQ", doc = "Don't recalibrate bases with quality scores less than this threshold (with -BQSR)", required = false, exclusiveOf = "", validation = "")
196
  var preserve_qscores_less_than: Option[Int] = config("preserve_qscores_less_than")
Peter van 't Hof's avatar
Peter van 't Hof committed
197
198
199

  /** Global Qscore Bayesian prior to use for BQSR */
  @Argument(fullName = "globalQScorePrior", shortName = "globalQScorePrior", doc = "Global Qscore Bayesian prior to use for BQSR", required = false, exclusiveOf = "", validation = "")
200
  var globalQScorePrior: Option[Double] = config("globalQScorePrior")
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
203
204
205
206
207

  /** Format string for globalQScorePrior */
  @Argument(fullName = "globalQScorePriorFormat", shortName = "", doc = "Format string for globalQScorePrior", required = false, exclusiveOf = "", validation = "")
  var globalQScorePriorFormat: String = "%s"

  /** How strict should we be with validation */
  @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false, exclusiveOf = "", validation = "")
208
  var validation_strictness: Option[String] = config("validation_strictness")
Peter van 't Hof's avatar
Peter van 't Hof committed
209
210
211

  /** Remove program records from the SAM header */
  @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Remove program records from the SAM header", required = false, exclusiveOf = "", validation = "")
212
  var remove_program_records: Boolean = config("remove_program_records", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
213
214
215

  /** Keep program records in the SAM header */
  @Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Keep program records in the SAM header", required = false, exclusiveOf = "", validation = "")
216
  var keep_program_records: Boolean = config("keep_program_records", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
217
218

  /** Rename sample IDs on-the-fly at runtime using the provided mapping file */
219
220
  @Input(fullName = "sample_rename_mapping_file", shortName = "sample_rename_mapping_file", doc = "Rename sample IDs on-the-fly at runtime using the provided mapping file", required = false, exclusiveOf = "", validation = "")
  var sample_rename_mapping_file: Option[File] = config("sample_rename_mapping_file")
Peter van 't Hof's avatar
Peter van 't Hof committed
221
222
223

  /** Enable unsafe operations: nothing will be checked at runtime */
  @Argument(fullName = "unsafe", shortName = "U", doc = "Enable unsafe operations: nothing will be checked at runtime", required = false, exclusiveOf = "", validation = "")
224
  var unsafe: Option[String] = config("unsafe")
Peter van 't Hof's avatar
Peter van 't Hof committed
225
226
227

  /** Disable both auto-generation of index files and index file locking */
  @Argument(fullName = "disable_auto_index_creation_and_locking_when_reading_rods", shortName = "disable_auto_index_creation_and_locking_when_reading_rods", doc = "Disable both auto-generation of index files and index file locking", required = false, exclusiveOf = "", validation = "")
228
  var disable_auto_index_creation_and_locking_when_reading_rods: Boolean = config("disable_auto_index_creation_and_locking_when_reading_rods", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
229
230
231

  /** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */
  @Argument(fullName = "no_cmdline_in_header", shortName = "no_cmdline_in_header", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required = false, exclusiveOf = "", validation = "")
232
  var no_cmdline_in_header: Boolean = config("no_cmdline_in_header", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
233
234
235

  /** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */
  @Argument(fullName = "sites_only", shortName = "sites_only", doc = "Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required = false, exclusiveOf = "", validation = "")
236
  var sites_only: Boolean = config("sites_only", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
237
238
239

  /** Always output all the records in VCF FORMAT fields, even if some are missing */
  @Argument(fullName = "never_trim_vcf_format_field", shortName = "writeFullFormat", doc = "Always output all the records in VCF FORMAT fields, even if some are missing", required = false, exclusiveOf = "", validation = "")
240
  var never_trim_vcf_format_field: Boolean = config("never_trim_vcf_format_field", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
241
242
243

  /** Force BCF output, regardless of the file's extension */
  @Argument(fullName = "bcf", shortName = "bcf", doc = "Force BCF output, regardless of the file's extension", required = false, exclusiveOf = "", validation = "")
244
  var bcf: Boolean = config("bcf", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
245
246
247

  /** Compression level to use for writing BAM files (0 - 9, higher is more compressed) */
  @Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files (0 - 9, higher is more compressed)", required = false, exclusiveOf = "", validation = "")
248
  var bam_compression: Option[Int] = config("bam_compression")
Peter van 't Hof's avatar
Peter van 't Hof committed
249
250
251

  /** If provided, output BAM/CRAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier */
  @Argument(fullName = "simplifyBAM", shortName = "simplifyBAM", doc = "If provided, output BAM/CRAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier", required = false, exclusiveOf = "", validation = "")
252
  var simplifyBAM: Boolean = config("simplifyBAM", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
253
254
255

  /** Turn off on-the-fly creation of indices for output BAM/CRAM files. */
  @Argument(fullName = "disable_bam_indexing", shortName = "", doc = "Turn off on-the-fly creation of indices for output BAM/CRAM files.", required = false, exclusiveOf = "", validation = "")
256
  var disable_bam_indexing: Boolean = config("disable_bam_indexing", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
257
258
259

  /** Enable on-the-fly creation of md5s for output BAM files. */
  @Argument(fullName = "generate_md5", shortName = "", doc = "Enable on-the-fly creation of md5s for output BAM files.", required = false, exclusiveOf = "", validation = "")
260
  var generate_md5: Boolean = config("generate_md5", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275

  /** Number of data threads to allocate to this analysis */
  @Argument(fullName = "num_threads", shortName = "nt", doc = "Number of data threads to allocate to this analysis", required = false, exclusiveOf = "", validation = "")
  var num_threads: Option[Int] = None

  /** Number of CPU threads to allocate per data thread */
  @Argument(fullName = "num_cpu_threads_per_data_thread", shortName = "nct", doc = "Number of CPU threads to allocate per data thread", required = false, exclusiveOf = "", validation = "")
  var num_cpu_threads_per_data_thread: Option[Int] = None

  /** Number of given threads to allocate to BAM IO */
  @Argument(fullName = "num_io_threads", shortName = "nit", doc = "Number of given threads to allocate to BAM IO", required = false, exclusiveOf = "", validation = "")
  var num_io_threads: Option[Int] = None

  /** Enable threading efficiency monitoring */
  @Argument(fullName = "monitorThreadEfficiency", shortName = "mte", doc = "Enable threading efficiency monitoring", required = false, exclusiveOf = "", validation = "")
276
  var monitorThreadEfficiency: Boolean = config("monitorThreadEfficiency", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
277
278
279
280
281
282
283

  /** When using IO threads, total number of BAM file handles to keep open simultaneously */
  @Argument(fullName = "num_bam_file_handles", shortName = "bfh", doc = "When using IO threads, total number of BAM file handles to keep open simultaneously", required = false, exclusiveOf = "", validation = "")
  var num_bam_file_handles: Option[Int] = None

  /** Exclude read groups based on tags */
  @Input(fullName = "read_group_black_list", shortName = "rgbl", doc = "Exclude read groups based on tags", required = false, exclusiveOf = "", validation = "")
284
  var read_group_black_list: List[File] = config("read_group_black_list", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
285
286
287

  /** Pedigree files for samples */
  @Argument(fullName = "pedigree", shortName = "ped", doc = "Pedigree files for samples", required = false, exclusiveOf = "", validation = "")
288
  var pedigree: List[File] = config("pedigree", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
289
290
291

  /** Pedigree string for samples */
  @Argument(fullName = "pedigreeString", shortName = "pedString", doc = "Pedigree string for samples", required = false, exclusiveOf = "", validation = "")
292
  var pedigreeString: List[String] = config("pedigreeString", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
293
294
295

  /** Validation strictness for pedigree information */
  @Argument(fullName = "pedigreeValidationType", shortName = "pedValidationType", doc = "Validation strictness for pedigree information", required = false, exclusiveOf = "", validation = "")
296
  var pedigreeValidationType: Option[String] = config("pedigreeValidationType")
Peter van 't Hof's avatar
Peter van 't Hof committed
297
298
299

  /** Allow interval processing with an unsupported BAM/CRAM */
  @Argument(fullName = "allow_intervals_with_unindexed_bam", shortName = "", doc = "Allow interval processing with an unsupported BAM/CRAM", required = false, exclusiveOf = "", validation = "")
300
  var allow_intervals_with_unindexed_bam: Boolean = config("allow_intervals_with_unindexed_bam", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
301
302
303

  /** Write a BCF copy of the output VCF */
  @Argument(fullName = "generateShadowBCF", shortName = "generateShadowBCF", doc = "Write a BCF copy of the output VCF", required = false, exclusiveOf = "", validation = "")
304
  var generateShadowBCF: Boolean = config("generateShadowBCF", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
305
306
307

  /** Type of IndexCreator to use for VCF/BCF indices */
  @Argument(fullName = "variant_index_type", shortName = "variant_index_type", doc = "Type of IndexCreator to use for VCF/BCF indices", required = false, exclusiveOf = "", validation = "")
308
  var variant_index_type: Option[String] = config("variant_index_type")
Peter van 't Hof's avatar
Peter van 't Hof committed
309
310
311

  /** Parameter to pass to the VCF/BCF IndexCreator */
  @Argument(fullName = "variant_index_parameter", shortName = "variant_index_parameter", doc = "Parameter to pass to the VCF/BCF IndexCreator", required = false, exclusiveOf = "", validation = "")
312
  var variant_index_parameter: Option[Int] = config("variant_index_parameter")
Peter van 't Hof's avatar
Peter van 't Hof committed
313
314
315

  /** Reference window stop */
  @Argument(fullName = "reference_window_stop", shortName = "ref_win_stop", doc = "Reference window stop", required = false, exclusiveOf = "", validation = "")
316
  var reference_window_stop: Option[Int] = config("reference_window_stop")
Peter van 't Hof's avatar
Peter van 't Hof committed
317
318
319

  /** Set the minimum level of logging */
  @Argument(fullName = "logging_level", shortName = "l", doc = "Set the minimum level of logging", required = false, exclusiveOf = "", validation = "")
320
  var logging_level: Option[String] = config("logging_level")
Peter van 't Hof's avatar
Peter van 't Hof committed
321
322
323
324
325
326

  /** Set the logging location */
  @Output(fullName = "log_to_file", shortName = "log", doc = "Set the logging location", required = false, exclusiveOf = "", validation = "")
  @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction])
  var log_to_file: File = _

327
  def versionRegex: Regex = """(.*)""".r
328
  override def versionExitcode = List(0, 1)
329
  def versionCommand: String = executable + " -jar " + jarFile + " -version"
330

Peter van 't Hof's avatar
Peter van 't Hof committed
331
332
  override def defaultCoreMemory = 4.0
  override def faiRequired = true
333
334
  override def dictRequired = true

335
  override def beforeGraph(): Unit = {
336
    super.beforeGraph()
337
    if (interval_set_rule != null && interval_set_rule.isEmpty) {
Peter van 't Hof's avatar
Peter van 't Hof committed
338
      val v: Option[String] = config("interval_set_rule")
339
      interval_set_rule = v.map(IntervalSetRule.valueOf)
Peter van 't Hof's avatar
Peter van 't Hof committed
340
    }
341
    if (interval_merging != null && interval_merging.isEmpty) {
Peter van 't Hof's avatar
Peter van 't Hof committed
342
      val v: Option[String] = config("interval_merging")
343
      interval_merging = v.map(IntervalMergingRule.valueOf)
Peter van 't Hof's avatar
Peter van 't Hof committed
344
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
345
    if (reference_sequence == null) reference_sequence = referenceFasta()
Peter van 't Hof's avatar
Peter van 't Hof committed
346
    input_fileIndexes ++= input_file.filter(orig => orig != null && orig.getName.endsWith(".bam")).flatMap(orig => Array(new File(orig.getPath.stripSuffix(".bam") + ".bai")))
Peter van 't Hof's avatar
Peter van 't Hof committed
347
348
349
350
    if (num_threads.isDefined) nCoresRequest = num_threads
    if (num_cpu_threads_per_data_thread.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * num_cpu_threads_per_data_thread.getOrElse(1))
  }

351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
  override def cmdLine: String = super.cmdLine +
    required("-T", analysis_type) +
    repeat("-I", input_file, formatPrefix = TaggedFile.formatCommandLineParameter) +
    conditional(showFullBamList, "--showFullBamList") +
    optional("-rbs", read_buffer_size) +
    optional("-et", phone_home) +
    optional("-K", gatk_key) +
    optional("-tag", tag) +
    repeat("-rf", read_filter) +
    repeat("-drf", disable_read_filter) +
    repeat("-L", intervals) +
    repeat("-L", intervalsString) +
    repeat("-XL", excludeIntervals) +
    repeat("-XL", excludeIntervalsString) +
    optional("-isr", interval_set_rule) +
    optional("-im", interval_merging) +
    optional("-ip", interval_padding) +
    optional("-R", reference_sequence) +
    conditional(nonDeterministicRandomSeed, "-ndrs") +
    conditional(disableDithering, "--disableDithering") +
    optional("-maxRuntime", maxRuntime) +
    optional("-maxRuntimeUnits", maxRuntimeUnits) +
    optional("-dt", downsampling_type) +
    optional("-dfrac", downsample_to_fraction, format = downsample_to_fractionFormat) +
    optional("-dcov", downsample_to_coverage) +
    optional("-baq", baq) +
    optional("-baqGOP", baqGapOpenPenalty, format = baqGapOpenPenaltyFormat) +
    conditional(refactor_NDN_cigar_string, "-fixNDN") +
    conditional(fix_misencoded_quality_scores, "-fixMisencodedQuals") +
    conditional(allow_potentially_misencoded_quality_scores, "-allowPotentiallyMisencodedQuals") +
    conditional(useOriginalQualities, "-OQ") +
    optional("-DBQ", defaultBaseQualities) +
    optional("-PF", performanceLog) +
    optional("-BQSR", BQSR) +
    optional("-qq", quantize_quals) +
    repeat("-SQQ", static_quantized_quals) +
    conditional(round_down_quantized, "-RDQ") +
    conditional(disable_indel_quals, "-DIQ") +
    conditional(emit_original_quals, "-EOQ") +
    optional("-preserveQ", preserve_qscores_less_than) +
    optional("-globalQScorePrior", globalQScorePrior, format = globalQScorePriorFormat) +
    optional("-S", validation_strictness) +
    conditional(remove_program_records, "-rpr") +
    conditional(keep_program_records, "-kpr") +
    optional("-sample_rename_mapping_file", sample_rename_mapping_file) +
    optional("-U", unsafe) +
    conditional(disable_auto_index_creation_and_locking_when_reading_rods, "-disable_auto_index_creation_and_locking_when_reading_rods") +
    conditional(no_cmdline_in_header, "-no_cmdline_in_header") +
    conditional(sites_only, "-sites_only") +
    conditional(never_trim_vcf_format_field, "-writeFullFormat") +
    conditional(bcf, "-bcf") +
    optional("-compress", bam_compression) +
    conditional(simplifyBAM, "-simplifyBAM") +
    conditional(disable_bam_indexing, "--disable_bam_indexing") +
    conditional(generate_md5, "--generate_md5") +
    optional("-nt", num_threads) +
    optional("-nct", num_cpu_threads_per_data_thread) +
    optional("-nit", num_io_threads) +
    conditional(monitorThreadEfficiency, "-mte") +
    optional("-bfh", num_bam_file_handles) +
    repeat("-rgbl", read_group_black_list) +
    repeat("-ped", pedigree) +
    repeat("-pedString", pedigreeString) +
    optional("-pedValidationType", pedigreeValidationType) +
    conditional(allow_intervals_with_unindexed_bam, "--allow_intervals_with_unindexed_bam") +
    conditional(generateShadowBCF, "-generateShadowBCF") +
    optional("-variant_index_type", variant_index_type) +
    optional("-variant_index_parameter", variant_index_parameter) +
    optional("-ref_win_stop", reference_window_stop) +
    optional("-l", logging_level) +
    optional("-log", log_to_file)
Peter van 't Hof's avatar
Peter van 't Hof committed
422
}