UnifiedGenotyper.scala 26.8 KB
Newer Older
1
2
3
4
5
/**
 * Due to the license issue with GATK, this part of Biopet can only be used inside the
 * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
 * on how to use this protected part of biopet or contact us at sasc@lumc.nl
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
6
package nl.lumc.sasc.biopet.extensions.gatk.broad
7

Peter van 't Hof's avatar
Peter van 't Hof committed
8
9
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
10
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, GATKScatterFunction, LocusScatterFunction, TaggedFile }
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import nl.lumc.sasc.biopet.utils.VcfUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
14
import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output, _ }
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16

class UnifiedGenotyper(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
17
  def analysis_type = "UnifiedGenotyper"
Peter van 't Hof's avatar
Peter van 't Hof committed
18
19
20
21
22
  scatterClass = classOf[LocusScatterFunction]
  setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false }

  /** Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together */
  @Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
23
  var genotype_likelihoods_model: Option[String] = config("genotype_likelihoods_model")
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26

  /** The PCR error rate to be used for computing fragment-based likelihoods */
  @Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
27
  var pcr_error_rate: Option[Double] = config("pcr_error_rate")
Peter van 't Hof's avatar
Peter van 't Hof committed
28
29
30
31
32
33
34

  /** Format string for pcr_error_rate */
  @Argument(fullName = "pcr_error_rateFormat", shortName = "", doc = "Format string for pcr_error_rate", required = false, exclusiveOf = "", validation = "")
  var pcr_error_rateFormat: String = "%s"

  /** If provided, we will calculate the SLOD (SB annotation) */
  @Argument(fullName = "computeSLOD", shortName = "slod", doc = "If provided, we will calculate the SLOD (SB annotation)", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
35
  var computeSLOD: Boolean = config("computeSLOD", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
38

  /** The PairHMM implementation to use for -glm INDEL genotype likelihood calculations */
  @Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for -glm INDEL genotype likelihood calculations", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var pair_hmm_implementation: Option[String] = config("pair_hmm_implementation")
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
42

  /** Minimum base quality required to consider a base for calling */
  @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var min_base_quality_score: Option[Int] = config("min_base_quality_score")
Peter van 't Hof's avatar
Peter van 't Hof committed
44
45
46

  /** Maximum fraction of reads with deletions spanning this locus for it to be callable */
  @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
47
  var max_deletion_fraction: Option[Double] = config("max_deletion_fraction")
Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
50
51
52
53
54

  /** Format string for max_deletion_fraction */
  @Argument(fullName = "max_deletion_fractionFormat", shortName = "", doc = "Format string for max_deletion_fraction", required = false, exclusiveOf = "", validation = "")
  var max_deletion_fractionFormat: String = "%s"

  /** Minimum number of consensus indels required to trigger genotyping run */
  @Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
55
  var min_indel_count_for_genotyping: Option[Int] = config("min_indel_count_for_genotyping")
Peter van 't Hof's avatar
Peter van 't Hof committed
56
57
58

  /** Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles */
  @Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
59
  var min_indel_fraction_per_sample: Option[Double] = config("min_indel_fraction_per_sample")
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
62
63
64
65
66

  /** Format string for min_indel_fraction_per_sample */
  @Argument(fullName = "min_indel_fraction_per_sampleFormat", shortName = "", doc = "Format string for min_indel_fraction_per_sample", required = false, exclusiveOf = "", validation = "")
  var min_indel_fraction_per_sampleFormat: String = "%s"

  /** Indel gap continuation penalty, as Phred-scaled probability.  I.e., 30 => 10 -30/10 */
  @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty, as Phred-scaled probability.  I.e., 30 => 10^-30/10", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
67
  var indelGapContinuationPenalty: Option[String] = config("indelGapContinuationPenalty")
Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
70

  /** Indel gap open penalty, as Phred-scaled probability.  I.e., 30 => 10 -30/10 */
  @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty, as Phred-scaled probability.  I.e., 30 => 10^-30/10", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
71
  var indelGapOpenPenalty: Option[String] = config("indelGapOpenPenalty")
Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
74

  /** Indel haplotype size */
  @Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
75
  var indelHaplotypeSize: Option[Int] = config("indelHaplotypeSize")
Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
78

  /** Output indel debug info */
  @Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
79
  var indelDebug: Boolean = config("indelDebug", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
82

  /** expt */
  @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
83
  var ignoreSNPAlleles: Boolean = config("ignoreSNPAlleles", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
86

  /** expt */
  @Argument(fullName = "allReadsSP", shortName = "dl", doc = "expt", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
87
  var allReadsSP: Boolean = config("allReadsSP", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
90

  /** Ignore lane when building error model, error model is then per-site */
  @Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
91
  var ignoreLaneInfo: Boolean = config("ignoreLaneInfo", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
92
93
94

  /** VCF file with the truth callset for the reference sample */
  @Input(fullName = "reference_sample_calls", shortName = "referenceCalls", doc = "VCF file with the truth callset for the reference sample", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
95
  var reference_sample_calls: Option[File] = config("reference_sample_calls")
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
98

  /** Reference sample name. */
  @Argument(fullName = "reference_sample_name", shortName = "refsample", doc = "Reference sample name.", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
99
  var reference_sample_name: Option[String] = config("reference_sample_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
100
101
102

  /** Min quality score to consider. Smaller numbers process faster. Default: Q1. */
  @Argument(fullName = "min_quality_score", shortName = "minqs", doc = "Min quality score to consider. Smaller numbers process faster. Default: Q1.", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
103
  var min_quality_score: Option[String] = config("min_quality_score")
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106

  /** Max quality score to consider. Smaller numbers process faster. Default: Q40. */
  @Argument(fullName = "max_quality_score", shortName = "maxqs", doc = "Max quality score to consider. Smaller numbers process faster. Default: Q40.", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
107
  var max_quality_score: Option[String] = config("max_quality_score")
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
110

  /** Phred-Scaled prior quality of the site. Default: Q20. */
  @Argument(fullName = "site_quality_prior", shortName = "site_prior", doc = "Phred-Scaled prior quality of the site. Default: Q20.", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
111
  var site_quality_prior: Option[String] = config("site_quality_prior")
Peter van 't Hof's avatar
Peter van 't Hof committed
112
113
114

  /** The minimum confidence in the error model to make a call. Number should be between 0 (no power requirement) and 1 (maximum power required). */
  @Argument(fullName = "min_power_threshold_for_calling", shortName = "min_call_power", doc = "The minimum confidence in the error model to make a call. Number should be between 0 (no power requirement) and 1 (maximum power required).", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
115
  var min_power_threshold_for_calling: Option[Double] = config("min_power_threshold_for_calling")
Peter van 't Hof's avatar
Peter van 't Hof committed
116
117
118
119
120
121
122

  /** Format string for min_power_threshold_for_calling */
  @Argument(fullName = "min_power_threshold_for_callingFormat", shortName = "", doc = "Format string for min_power_threshold_for_calling", required = false, exclusiveOf = "", validation = "")
  var min_power_threshold_for_callingFormat: String = "%s"

  /** If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site */
  @Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
123
  var annotateNDA: Boolean = config("annotateNDA", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
124
125
126

  /** Heterozygosity value used to compute prior likelihoods for any locus */
  @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
127
  var heterozygosity: Option[Double] = config("heterozygosity")
Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
130
131
132
133
134

  /** Format string for heterozygosity */
  @Argument(fullName = "heterozygosityFormat", shortName = "", doc = "Format string for heterozygosity", required = false, exclusiveOf = "", validation = "")
  var heterozygosityFormat: String = "%s"

  /** Heterozygosity for indel calling */
  @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
135
  var indel_heterozygosity: Option[Double] = config("indel_heterozygosity")
Peter van 't Hof's avatar
Peter van 't Hof committed
136
137
138
139

  /** Format string for indel_heterozygosity */
  @Argument(fullName = "indel_heterozygosityFormat", shortName = "", doc = "Format string for indel_heterozygosity", required = false, exclusiveOf = "", validation = "")
  var indel_heterozygosityFormat: String = "%s"
Peter van 't Hof's avatar
Peter van 't Hof committed
140

Peter van 't Hof's avatar
Peter van 't Hof committed
141
142
  /** The minimum phred-scaled confidence threshold at which variants should be called */
  @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
143
  var standard_min_confidence_threshold_for_calling: Option[Double] = config("stand_call_conf")
Peter van 't Hof's avatar
Peter van 't Hof committed
144
145
146
147
148
149
150

  /** Format string for standard_min_confidence_threshold_for_calling */
  @Argument(fullName = "standard_min_confidence_threshold_for_callingFormat", shortName = "", doc = "Format string for standard_min_confidence_threshold_for_calling", required = false, exclusiveOf = "", validation = "")
  var standard_min_confidence_threshold_for_callingFormat: String = "%s"

  /** The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold) */
  @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
151
  var standard_min_confidence_threshold_for_emitting: Option[Double] = config("stand_emit_conf")
Peter van 't Hof's avatar
Peter van 't Hof committed
152
153
154
155
156
157
158

  /** Format string for standard_min_confidence_threshold_for_emitting */
  @Argument(fullName = "standard_min_confidence_threshold_for_emittingFormat", shortName = "", doc = "Format string for standard_min_confidence_threshold_for_emitting", required = false, exclusiveOf = "", validation = "")
  var standard_min_confidence_threshold_for_emittingFormat: String = "%s"

  /** Maximum number of alternate alleles to genotype */
  @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
159
  var max_alternate_alleles: Option[Int] = config("max_alternate_alleles")
Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
162

  /** Input prior for calls */
  @Argument(fullName = "input_prior", shortName = "inputPrior", doc = "Input prior for calls", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
163
  var input_prior: List[Double] = config("input_prior", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
164
165
166

  /** Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy). */
  @Argument(fullName = "sample_ploidy", shortName = "ploidy", doc = "Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
167
  var sample_ploidy: Option[Int] = config("sample_ploidy")
Peter van 't Hof's avatar
Peter van 't Hof committed
168
169
170

  /** Specifies how to determine the alternate alleles to use for genotyping */
  @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
171
  var genotyping_mode: Option[String] = config("genotyping_mode")
Peter van 't Hof's avatar
Peter van 't Hof committed
172
173
174

  /** The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES */
  @Input(fullName = "alleles", shortName = "alleles", doc = "The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
175
  var alleles: Option[File] = config("alleles")
Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
178

  /** Fraction of contamination in sequencing data (for all samples) to aggressively remove */
  @Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
179
  var contamination_fraction_to_filter: Option[Double] = config("contamination_fraction_to_filter")
Peter van 't Hof's avatar
Peter van 't Hof committed
180
181
182
183
184
185
186

  /** Format string for contamination_fraction_to_filter */
  @Argument(fullName = "contamination_fraction_to_filterFormat", shortName = "", doc = "Format string for contamination_fraction_to_filter", required = false, exclusiveOf = "", validation = "")
  var contamination_fraction_to_filterFormat: String = "%s"

  /** Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be \"<SampleID><TAB><Contamination>\" (Contamination is double) per line; No header. */
  @Argument(fullName = "contamination_fraction_per_sample_file", shortName = "contaminationFile", doc = "Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be \"<SampleID><TAB><Contamination>\" (Contamination is double) per line; No header.", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
187
  var contamination_fraction_per_sample_file: Option[File] = config("contamination_fraction_per_sample_file")
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
190

  /** Non-reference probability calculation model to employ */
  @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
191
  var p_nonref_model: Option[String] = config("p_nonref_model")
Peter van 't Hof's avatar
Peter van 't Hof committed
192
193
194

  /** x */
  @Argument(fullName = "exactcallslog", shortName = "logExactCalls", doc = "x", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
195
  var exactcallslog: Option[File] = config("exactcallslog")
Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
198

  /** Specifies which type of calls we should output */
  @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
199
  var output_mode: Option[String] = config("output_mode")
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
202

  /** Annotate all sites with PLs */
  @Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
203
  var allSitePLs: Boolean = config("allSitePLs", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
204
205
206

  /** dbSNP file */
  @Input(fullName = "dbsnp", shortName = "D", doc = "dbSNP file", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
207
  var dbsnp: Option[File] = config("dbsnp")
Peter van 't Hof's avatar
Peter van 't Hof committed
208
209
210

  /** Comparison VCF file */
  @Input(fullName = "comp", shortName = "comp", doc = "Comparison VCF file", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
211
  var comp: List[File] = config("comp", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
212
213
214
215
216
217
218
219

  /** File to which variants should be written */
  @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "")
  @Gather(classOf[CatVariantsGatherer])
  var out: File = _

  /** If provided, only these samples will be emitted into the VCF, regardless of which samples are present in the BAM file */
  @Argument(fullName = "onlyEmitSamples", shortName = "onlyEmitSamples", doc = "If provided, only these samples will be emitted into the VCF, regardless of which samples are present in the BAM file", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
220
  var onlyEmitSamples: List[String] = config("onlyEmitSamples", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
221

Peter van 't Hof's avatar
Peter van 't Hof committed
222
223
224
  /** File to print all of the annotated and detailed debugging output */
  @Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false, exclusiveOf = "", validation = "")
  var debug_file: File = _
225

Peter van 't Hof's avatar
Peter van 't Hof committed
226
227
228
229
230
231
  /** File to print any relevant callability metrics output */
  @Argument(fullName = "metrics_file", shortName = "metrics", doc = "File to print any relevant callability metrics output", required = false, exclusiveOf = "", validation = "")
  var metrics_file: File = _

  /** One or more specific annotations to apply to variant calls */
  @Argument(fullName = "annotation", shortName = "A", doc = "One or more specific annotations to apply to variant calls", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
232
  var annotation: List[String] = config("annotation", default = Nil, freeVar = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
233
234
235

  /** One or more specific annotations to exclude */
  @Argument(fullName = "excludeAnnotation", shortName = "XA", doc = "One or more specific annotations to exclude", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
236
  var excludeAnnotation: List[String] = config("excludeAnnotation", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
237
238
239

  /** One or more classes/groups of annotations to apply to variant calls.  The single value 'none' removes the default group */
  @Argument(fullName = "group", shortName = "G", doc = "One or more classes/groups of annotations to apply to variant calls.  The single value 'none' removes the default group", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
240
  var group: List[String] = config("group", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
241
242
243

  /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */
  @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
244
  var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
245
246
247

  /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */
  @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
248
  var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
249
250
251

  /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */
  @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
252
  var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
253

Peter van 't Hof's avatar
Peter van 't Hof committed
254
255
  override def beforeGraph() {
    super.beforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
256
257
258
    reference_sample_calls.foreach(deps :+= VcfUtils.getVcfIndexFile(_))
    alleles.foreach(deps :+= VcfUtils.getVcfIndexFile(_))
    dbsnp.foreach(deps :+= VcfUtils.getVcfIndexFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
259
    deps ++= comp.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig))
Peter van 't Hof's avatar
Peter van 't Hof committed
260
261
    if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out))
      if (!org.broadinstitute.gatk.utils.commandline.ArgumentTypeDescriptor.isCompressed(out.getPath))
Peter van 't Hof's avatar
Peter van 't Hof committed
262
        outputFiles :+= VcfUtils.getVcfIndexFile(out)
Peter van 't Hof's avatar
Peter van 't Hof committed
263
264
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
  override def cmdLine = super.cmdLine +
    optional("-glm", genotype_likelihoods_model, spaceSeparated = true, escape = true, format = "%s") +
    optional("-pcr_error", pcr_error_rate, spaceSeparated = true, escape = true, format = pcr_error_rateFormat) +
    conditional(computeSLOD, "-slod", escape = true, format = "%s") +
    optional("-pairHMM", pair_hmm_implementation, spaceSeparated = true, escape = true, format = "%s") +
    optional("-mbq", min_base_quality_score, spaceSeparated = true, escape = true, format = "%s") +
    optional("-deletions", max_deletion_fraction, spaceSeparated = true, escape = true, format = max_deletion_fractionFormat) +
    optional("-minIndelCnt", min_indel_count_for_genotyping, spaceSeparated = true, escape = true, format = "%s") +
    optional("-minIndelFrac", min_indel_fraction_per_sample, spaceSeparated = true, escape = true, format = min_indel_fraction_per_sampleFormat) +
    optional("-indelGCP", indelGapContinuationPenalty, spaceSeparated = true, escape = true, format = "%s") +
    optional("-indelGOP", indelGapOpenPenalty, spaceSeparated = true, escape = true, format = "%s") +
    optional("-indelHSize", indelHaplotypeSize, spaceSeparated = true, escape = true, format = "%s") +
    conditional(indelDebug, "-indelDebug", escape = true, format = "%s") +
    conditional(ignoreSNPAlleles, "-ignoreSNPAlleles", escape = true, format = "%s") +
    conditional(allReadsSP, "-dl", escape = true, format = "%s") +
    conditional(ignoreLaneInfo, "-ignoreLane", escape = true, format = "%s") +
    optional(TaggedFile.formatCommandLineParameter("-referenceCalls", reference_sample_calls), reference_sample_calls, spaceSeparated = true, escape = true, format = "%s") +
    optional("-refsample", reference_sample_name, spaceSeparated = true, escape = true, format = "%s") +
    optional("-minqs", min_quality_score, spaceSeparated = true, escape = true, format = "%s") +
    optional("-maxqs", max_quality_score, spaceSeparated = true, escape = true, format = "%s") +
    optional("-site_prior", site_quality_prior, spaceSeparated = true, escape = true, format = "%s") +
    optional("-min_call_power", min_power_threshold_for_calling, spaceSeparated = true, escape = true, format = min_power_threshold_for_callingFormat) +
    conditional(annotateNDA, "-nda", escape = true, format = "%s") +
    optional("-hets", heterozygosity, spaceSeparated = true, escape = true, format = heterozygosityFormat) +
    optional("-indelHeterozygosity", indel_heterozygosity, spaceSeparated = true, escape = true, format = indel_heterozygosityFormat) +
    optional("-stand_call_conf", standard_min_confidence_threshold_for_calling, spaceSeparated = true, escape = true, format = standard_min_confidence_threshold_for_callingFormat) +
    optional("-stand_emit_conf", standard_min_confidence_threshold_for_emitting, spaceSeparated = true, escape = true, format = standard_min_confidence_threshold_for_emittingFormat) +
    optional("-maxAltAlleles", max_alternate_alleles, spaceSeparated = true, escape = true, format = "%s") +
    repeat("-inputPrior", input_prior, spaceSeparated = true, escape = true, format = "%s") +
    optional("-ploidy", sample_ploidy, spaceSeparated = true, escape = true, format = "%s") +
    optional("-gt_mode", genotyping_mode, spaceSeparated = true, escape = true, format = "%s") +
    optional(TaggedFile.formatCommandLineParameter("-alleles", alleles), alleles, spaceSeparated = true, escape = true, format = "%s") +
    optional("-contamination", contamination_fraction_to_filter, spaceSeparated = true, escape = true, format = contamination_fraction_to_filterFormat) +
    optional("-contaminationFile", contamination_fraction_per_sample_file, spaceSeparated = true, escape = true, format = "%s") +
    optional("-pnrm", p_nonref_model, spaceSeparated = true, escape = true, format = "%s") +
    optional("-logExactCalls", exactcallslog, spaceSeparated = true, escape = true, format = "%s") +
    optional("-out_mode", output_mode, spaceSeparated = true, escape = true, format = "%s") +
    conditional(allSitePLs, "-allSitePLs", escape = true, format = "%s") +
    optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") +
    repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") +
    optional("-o", out, spaceSeparated = true, escape = true, format = "%s") +
    repeat("-onlyEmitSamples", onlyEmitSamples, spaceSeparated = true, escape = true, format = "%s") +
    optional("-debug_file", debug_file, spaceSeparated = true, escape = true, format = "%s") +
    optional("-metrics", metrics_file, spaceSeparated = true, escape = true, format = "%s") +
    repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") +
    repeat("-XA", excludeAnnotation, spaceSeparated = true, escape = true, format = "%s") +
    repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") +
    conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") +
    conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") +
    conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s")
}
Peter van 't Hof's avatar
Peter van 't Hof committed
316
317
318
319
320
321
322
323
324

object UnifiedGenotyper {
  def apply(root: Configurable, inputFiles: List[File], outputFile: File): UnifiedGenotyper = {
    val ug = new UnifiedGenotyper(root)
    ug.input_file = inputFiles
    ug.out = outputFile
    ug
  }
}