From 31b7ebfc67677a16d8b5a042f2f3f95abca3bbac Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Wed, 27 Apr 2016 11:05:33 +0200
Subject: [PATCH] Changed baserecalibrator to biopet config values

---
 .../gatk/broad/BaseRecalibrator.scala         | 68 +++++++------------
 1 file changed, 26 insertions(+), 42 deletions(-)

diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala
index 808085092..0408b8b79 100644
--- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala
+++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala
@@ -5,17 +5,6 @@
  */
 package nl.lumc.sasc.biopet.extensions.gatk.broad
 
-//import java.io.File
-//
-//import nl.lumc.sasc.biopet.utils.config.Configurable
-//
-//class BaseRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.BaseRecalibrator with GatkGeneral {
-//  if (config.contains("scattercount")) scatterCount = config("scattercount", default = 1)
-//  if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").asString)
-//  if (config.contains("known_sites")) knownSites :+= new File(config("known_sites").asString)
-//}
-//
-
 import java.io.File
 
 import nl.lumc.sasc.biopet.utils.config.Configurable
@@ -32,7 +21,7 @@ class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with S
 
   /** A database of known polymorphic sites */
   @Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites", required = false, exclusiveOf = "", validation = "")
-  var knownSites: Seq[File] = Nil
+  var knownSites: List[File] = config("known_sites")
 
   /** Dependencies on any indexes of knownSites */
   @Input(fullName = "knownSitesIndexes", shortName = "", doc = "Dependencies on any indexes of knownSites", required = false, exclusiveOf = "", validation = "")
@@ -43,81 +32,77 @@ class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with S
   //@Gather(classOf[org.broadinstitute.gatk.engine.recalibration.BQSRGatherer])
   var out: File = _
 
-  /** List the available covariates and exit */
-  @Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false, exclusiveOf = "", validation = "")
-  var list: Boolean = _
-
   /** One or more covariates to be used in the recalibration. Can be specified multiple times */
   @Argument(fullName = "covariate", shortName = "cov", doc = "One or more covariates to be used in the recalibration. Can be specified multiple times", required = false, exclusiveOf = "", validation = "")
-  var covariate: Seq[String] = Nil
+  var covariate: List[String] = config("covariate", default = Nil)
 
   /** Do not use the standard set of covariates, but rather just the ones listed using the -cov argument */
   @Argument(fullName = "no_standard_covs", shortName = "noStandard", doc = "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument", required = false, exclusiveOf = "", validation = "")
-  var no_standard_covs: Boolean = _
+  var no_standard_covs: Boolean = config("no_standard_covs", default = false)
 
   /** If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only. */
   @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.", required = false, exclusiveOf = "", validation = "")
-  var run_without_dbsnp_potentially_ruining_quality: Boolean = _
+  var run_without_dbsnp_potentially_ruining_quality: Boolean = config("run_without_dbsnp_potentially_ruining_quality", default = false)
 
   /** How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS */
   @Argument(fullName = "solid_recal_mode", shortName = "sMode", doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS", required = false, exclusiveOf = "", validation = "")
-  var solid_recal_mode: String = _
+  var solid_recal_mode: Option[String] = config("solid_recal_mode")
 
   /** Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ */
   @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false, exclusiveOf = "", validation = "")
-  var solid_nocall_strategy: String = _
+  var solid_nocall_strategy: Option[String] = config("solid_nocall_strategy")
 
   /** Size of the k-mer context to be used for base mismatches */
   @Argument(fullName = "mismatches_context_size", shortName = "mcs", doc = "Size of the k-mer context to be used for base mismatches", required = false, exclusiveOf = "", validation = "")
-  var mismatches_context_size: Option[Int] = None
+  var mismatches_context_size: Option[Int] = config("mismatches_context_size")
 
   /** Size of the k-mer context to be used for base insertions and deletions */
   @Argument(fullName = "indels_context_size", shortName = "ics", doc = "Size of the k-mer context to be used for base insertions and deletions", required = false, exclusiveOf = "", validation = "")
-  var indels_context_size: Option[Int] = None
+  var indels_context_size: Option[Int] = config("indels_context_size")
 
   /** The maximum cycle value permitted for the Cycle covariate */
   @Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "The maximum cycle value permitted for the Cycle covariate", required = false, exclusiveOf = "", validation = "")
-  var maximum_cycle_value: Option[Int] = None
+  var maximum_cycle_value: Option[Int] = config("maximum_cycle_value")
 
   /** default quality for the base mismatches covariate */
   @Argument(fullName = "mismatches_default_quality", shortName = "mdq", doc = "default quality for the base mismatches covariate", required = false, exclusiveOf = "", validation = "")
-  var mismatches_default_quality: Option[Byte] = None
+  var mismatches_default_quality: Option[String] = config("mismatches_default_quality")
 
   /** default quality for the base insertions covariate */
   @Argument(fullName = "insertions_default_quality", shortName = "idq", doc = "default quality for the base insertions covariate", required = false, exclusiveOf = "", validation = "")
-  var insertions_default_quality: Option[Byte] = None
+  var insertions_default_quality: Option[String] = config("insertions_default_quality")
 
   /** default quality for the base deletions covariate */
   @Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false, exclusiveOf = "", validation = "")
-  var deletions_default_quality: Option[Byte] = None
+  var deletions_default_quality: Option[String] = config("deletions_default_quality")
 
   /** minimum quality for the bases in the tail of the reads to be considered */
   @Argument(fullName = "low_quality_tail", shortName = "lqt", doc = "minimum quality for the bases in the tail of the reads to be considered", required = false, exclusiveOf = "", validation = "")
-  var low_quality_tail: Option[Byte] = None
+  var low_quality_tail: Option[String] = config("low_quality_tail")
 
   /** number of distinct quality scores in the quantized output */
   @Argument(fullName = "quantizing_levels", shortName = "ql", doc = "number of distinct quality scores in the quantized output", required = false, exclusiveOf = "", validation = "")
-  var quantizing_levels: Option[Int] = None
+  var quantizing_levels: Option[Int] = config("quantizing_levels")
 
   /** the binary tag covariate name if using it */
   @Argument(fullName = "binary_tag_name", shortName = "bintag", doc = "the binary tag covariate name if using it", required = false, exclusiveOf = "", validation = "")
-  var binary_tag_name: String = _
+  var binary_tag_name: Option[String] = config("binary_tag_name")
 
   /** Sort the rows in the tables of reports */
   @Argument(fullName = "sort_by_all_columns", shortName = "sortAllCols", doc = "Sort the rows in the tables of reports", required = false, exclusiveOf = "", validation = "")
-  var sort_by_all_columns: Boolean = _
+  var sort_by_all_columns: Boolean = config("sort_by_all_columns", default = false)
 
   /** If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid. */
   @Argument(fullName = "default_platform", shortName = "dP", doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.", required = false, exclusiveOf = "", validation = "")
-  var default_platform: String = _
+  var default_platform: Option[String] = config("default_platform")
 
   /** If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid. */
   @Argument(fullName = "force_platform", shortName = "fP", doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.", required = false, exclusiveOf = "", validation = "")
-  var force_platform: String = _
+  var force_platform: Option[String] = config("force_platform")
 
   /** If provided, the read group of EVERY read will be forced to be the provided String. */
   @Argument(fullName = "force_readgroup", shortName = "fRG", doc = "If provided, the read group of EVERY read will be forced to be the provided String.", required = false, exclusiveOf = "", validation = "")
-  var force_readgroup: String = _
+  var force_readgroup: Option[String] = config("force_readgroup")
 
   /** If provided, log all updates to the recalibration tables to the given file. For debugging/testing purposes only */
   @Output(fullName = "recal_table_update_log", shortName = "recal_table_update_log", doc = "If provided, log all updates to the recalibration tables to the given file. For debugging/testing purposes only", required = false, exclusiveOf = "", validation = "")
@@ -126,19 +111,19 @@ class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with S
 
   /** Max size of the k-mer context to be used for repeat covariates */
   @Argument(fullName = "max_str_unit_length", shortName = "maxstr", doc = "Max size of the k-mer context to be used for repeat covariates", required = false, exclusiveOf = "", validation = "")
-  var max_str_unit_length: Option[Int] = None
+  var max_str_unit_length: Option[Int] = config("max_str_unit_length")
 
   /** Max number of repetitions to be used for repeat covariates */
   @Argument(fullName = "max_repeat_length", shortName = "maxrep", doc = "Max number of repetitions to be used for repeat covariates", required = false, exclusiveOf = "", validation = "")
-  var max_repeat_length: Option[Int] = None
+  var max_repeat_length: Option[Int] = config("max_repeat_length")
 
   /** Reduce memory usage in multi-threaded code at the expense of threading efficiency */
   @Argument(fullName = "lowMemoryMode", shortName = "lowMemoryMode", doc = "Reduce memory usage in multi-threaded code at the expense of threading efficiency", required = false, exclusiveOf = "", validation = "")
-  var lowMemoryMode: Boolean = _
+  var lowMemoryMode: Boolean = config("lowMemoryMode", default = false)
 
   /** BQSR BAQ gap open penalty (Phred Scaled).  Default value is 40.  30 is perhaps better for whole genome call sets */
   @Argument(fullName = "bqsrBAQGapOpenPenalty", shortName = "bqsrBAQGOP", doc = "BQSR BAQ gap open penalty (Phred Scaled).  Default value is 40.  30 is perhaps better for whole genome call sets", required = false, exclusiveOf = "", validation = "")
-  var bqsrBAQGapOpenPenalty: Option[Double] = None
+  var bqsrBAQGapOpenPenalty: Option[Double] = config("bqsrBAQGapOpenPenalty")
 
   /** Format string for bqsrBAQGapOpenPenalty */
   @Argument(fullName = "bqsrBAQGapOpenPenaltyFormat", shortName = "", doc = "Format string for bqsrBAQGapOpenPenalty", required = false, exclusiveOf = "", validation = "")
@@ -146,15 +131,15 @@ class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with S
 
   /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */
   @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
-  var filter_reads_with_N_cigar: Boolean = _
+  var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false)
 
   /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */
   @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
-  var filter_mismatching_base_and_quals: Boolean = _
+  var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false)
 
   /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */
   @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "")
-  var filter_bases_not_stored: Boolean = _
+  var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false)
 
   if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").asString)
 
@@ -166,7 +151,6 @@ class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with S
   override def cmdLine = super.cmdLine +
     repeat("-knownSites", knownSites, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") +
     required("-o", out, spaceSeparated = true, escape = true, format = "%s") +
-    conditional(list, "-ls", escape = true, format = "%s") +
     repeat("-cov", covariate, spaceSeparated = true, escape = true, format = "%s") +
     conditional(no_standard_covs, "-noStandard", escape = true, format = "%s") +
     conditional(run_without_dbsnp_potentially_ruining_quality, "-run_without_dbsnp_potentially_ruining_quality", escape = true, format = "%s") +
-- 
GitLab