From 31b7ebfc67677a16d8b5a042f2f3f95abca3bbac Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Wed, 27 Apr 2016 11:05:33 +0200 Subject: [PATCH] Changed baserecalibrator to biopet config values --- .../gatk/broad/BaseRecalibrator.scala | 68 +++++++------------ 1 file changed, 26 insertions(+), 42 deletions(-) diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala index 808085092..0408b8b79 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala @@ -5,17 +5,6 @@ */ package nl.lumc.sasc.biopet.extensions.gatk.broad -//import java.io.File -// -//import nl.lumc.sasc.biopet.utils.config.Configurable -// -//class BaseRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.BaseRecalibrator with GatkGeneral { -// if (config.contains("scattercount")) scatterCount = config("scattercount", default = 1) -// if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").asString) -// if (config.contains("known_sites")) knownSites :+= new File(config("known_sites").asString) -//} -// - import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable @@ -32,7 +21,7 @@ class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with S /** A database of known polymorphic sites */ @Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites", required = false, exclusiveOf = "", validation = "") - var knownSites: Seq[File] = Nil + var knownSites: List[File] = config("known_sites") /** Dependencies on any indexes of knownSites */ @Input(fullName = "knownSitesIndexes", shortName = "", doc = "Dependencies on any indexes of knownSites", required = false, exclusiveOf = "", validation = "") @@ -43,81 +32,77 @@ class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with S //@Gather(classOf[org.broadinstitute.gatk.engine.recalibration.BQSRGatherer]) var out: File = _ - /** List the available covariates and exit */ - @Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false, exclusiveOf = "", validation = "") - var list: Boolean = _ - /** One or more covariates to be used in the recalibration. Can be specified multiple times */ @Argument(fullName = "covariate", shortName = "cov", doc = "One or more covariates to be used in the recalibration. Can be specified multiple times", required = false, exclusiveOf = "", validation = "") - var covariate: Seq[String] = Nil + var covariate: List[String] = config("covariate", default = Nil) /** Do not use the standard set of covariates, but rather just the ones listed using the -cov argument */ @Argument(fullName = "no_standard_covs", shortName = "noStandard", doc = "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument", required = false, exclusiveOf = "", validation = "") - var no_standard_covs: Boolean = _ + var no_standard_covs: Boolean = config("no_standard_covs", default = false) /** If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only. */ @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.", required = false, exclusiveOf = "", validation = "") - var run_without_dbsnp_potentially_ruining_quality: Boolean = _ + var run_without_dbsnp_potentially_ruining_quality: Boolean = config("run_without_dbsnp_potentially_ruining_quality", default = false) /** How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS */ @Argument(fullName = "solid_recal_mode", shortName = "sMode", doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS", required = false, exclusiveOf = "", validation = "") - var solid_recal_mode: String = _ + var solid_recal_mode: Option[String] = config("solid_recal_mode") /** Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ */ @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false, exclusiveOf = "", validation = "") - var solid_nocall_strategy: String = _ + var solid_nocall_strategy: Option[String] = config("solid_nocall_strategy") /** Size of the k-mer context to be used for base mismatches */ @Argument(fullName = "mismatches_context_size", shortName = "mcs", doc = "Size of the k-mer context to be used for base mismatches", required = false, exclusiveOf = "", validation = "") - var mismatches_context_size: Option[Int] = None + var mismatches_context_size: Option[Int] = config("mismatches_context_size") /** Size of the k-mer context to be used for base insertions and deletions */ @Argument(fullName = "indels_context_size", shortName = "ics", doc = "Size of the k-mer context to be used for base insertions and deletions", required = false, exclusiveOf = "", validation = "") - var indels_context_size: Option[Int] = None + var indels_context_size: Option[Int] = config("indels_context_size") /** The maximum cycle value permitted for the Cycle covariate */ @Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "The maximum cycle value permitted for the Cycle covariate", required = false, exclusiveOf = "", validation = "") - var maximum_cycle_value: Option[Int] = None + var maximum_cycle_value: Option[Int] = config("maximum_cycle_value") /** default quality for the base mismatches covariate */ @Argument(fullName = "mismatches_default_quality", shortName = "mdq", doc = "default quality for the base mismatches covariate", required = false, exclusiveOf = "", validation = "") - var mismatches_default_quality: Option[Byte] = None + var mismatches_default_quality: Option[String] = config("mismatches_default_quality") /** default quality for the base insertions covariate */ @Argument(fullName = "insertions_default_quality", shortName = "idq", doc = "default quality for the base insertions covariate", required = false, exclusiveOf = "", validation = "") - var insertions_default_quality: Option[Byte] = None + var insertions_default_quality: Option[String] = config("insertions_default_quality") /** default quality for the base deletions covariate */ @Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false, exclusiveOf = "", validation = "") - var deletions_default_quality: Option[Byte] = None + var deletions_default_quality: Option[String] = config("deletions_default_quality") /** minimum quality for the bases in the tail of the reads to be considered */ @Argument(fullName = "low_quality_tail", shortName = "lqt", doc = "minimum quality for the bases in the tail of the reads to be considered", required = false, exclusiveOf = "", validation = "") - var low_quality_tail: Option[Byte] = None + var low_quality_tail: Option[String] = config("low_quality_tail") /** number of distinct quality scores in the quantized output */ @Argument(fullName = "quantizing_levels", shortName = "ql", doc = "number of distinct quality scores in the quantized output", required = false, exclusiveOf = "", validation = "") - var quantizing_levels: Option[Int] = None + var quantizing_levels: Option[Int] = config("quantizing_levels") /** the binary tag covariate name if using it */ @Argument(fullName = "binary_tag_name", shortName = "bintag", doc = "the binary tag covariate name if using it", required = false, exclusiveOf = "", validation = "") - var binary_tag_name: String = _ + var binary_tag_name: Option[String] = config("binary_tag_name") /** Sort the rows in the tables of reports */ @Argument(fullName = "sort_by_all_columns", shortName = "sortAllCols", doc = "Sort the rows in the tables of reports", required = false, exclusiveOf = "", validation = "") - var sort_by_all_columns: Boolean = _ + var sort_by_all_columns: Boolean = config("sort_by_all_columns", default = false) /** If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid. */ @Argument(fullName = "default_platform", shortName = "dP", doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.", required = false, exclusiveOf = "", validation = "") - var default_platform: String = _ + var default_platform: Option[String] = config("default_platform") /** If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid. */ @Argument(fullName = "force_platform", shortName = "fP", doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.", required = false, exclusiveOf = "", validation = "") - var force_platform: String = _ + var force_platform: Option[String] = config("force_platform") /** If provided, the read group of EVERY read will be forced to be the provided String. */ @Argument(fullName = "force_readgroup", shortName = "fRG", doc = "If provided, the read group of EVERY read will be forced to be the provided String.", required = false, exclusiveOf = "", validation = "") - var force_readgroup: String = _ + var force_readgroup: Option[String] = config("force_readgroup") /** If provided, log all updates to the recalibration tables to the given file. For debugging/testing purposes only */ @Output(fullName = "recal_table_update_log", shortName = "recal_table_update_log", doc = "If provided, log all updates to the recalibration tables to the given file. For debugging/testing purposes only", required = false, exclusiveOf = "", validation = "") @@ -126,19 +111,19 @@ class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with S /** Max size of the k-mer context to be used for repeat covariates */ @Argument(fullName = "max_str_unit_length", shortName = "maxstr", doc = "Max size of the k-mer context to be used for repeat covariates", required = false, exclusiveOf = "", validation = "") - var max_str_unit_length: Option[Int] = None + var max_str_unit_length: Option[Int] = config("max_str_unit_length") /** Max number of repetitions to be used for repeat covariates */ @Argument(fullName = "max_repeat_length", shortName = "maxrep", doc = "Max number of repetitions to be used for repeat covariates", required = false, exclusiveOf = "", validation = "") - var max_repeat_length: Option[Int] = None + var max_repeat_length: Option[Int] = config("max_repeat_length") /** Reduce memory usage in multi-threaded code at the expense of threading efficiency */ @Argument(fullName = "lowMemoryMode", shortName = "lowMemoryMode", doc = "Reduce memory usage in multi-threaded code at the expense of threading efficiency", required = false, exclusiveOf = "", validation = "") - var lowMemoryMode: Boolean = _ + var lowMemoryMode: Boolean = config("lowMemoryMode", default = false) /** BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets */ @Argument(fullName = "bqsrBAQGapOpenPenalty", shortName = "bqsrBAQGOP", doc = "BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false, exclusiveOf = "", validation = "") - var bqsrBAQGapOpenPenalty: Option[Double] = None + var bqsrBAQGapOpenPenalty: Option[Double] = config("bqsrBAQGapOpenPenalty") /** Format string for bqsrBAQGapOpenPenalty */ @Argument(fullName = "bqsrBAQGapOpenPenaltyFormat", shortName = "", doc = "Format string for bqsrBAQGapOpenPenalty", required = false, exclusiveOf = "", validation = "") @@ -146,15 +131,15 @@ class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with S /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") - var filter_reads_with_N_cigar: Boolean = _ + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") - var filter_mismatching_base_and_quals: Boolean = _ + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") - var filter_bases_not_stored: Boolean = _ + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").asString) @@ -166,7 +151,6 @@ class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with S override def cmdLine = super.cmdLine + repeat("-knownSites", knownSites, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + required("-o", out, spaceSeparated = true, escape = true, format = "%s") + - conditional(list, "-ls", escape = true, format = "%s") + repeat("-cov", covariate, spaceSeparated = true, escape = true, format = "%s") + conditional(no_standard_covs, "-noStandard", escape = true, format = "%s") + conditional(run_without_dbsnp_potentially_ruining_quality, "-run_without_dbsnp_potentially_ruining_quality", escape = true, format = "%s") + -- GitLab