diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Grep.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Grep.scala index 606c7af5dd84538cce4d283eff06087c10aabb2c..2f7d65916c69340336474370cad52a526cd7f465 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Grep.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Grep.scala @@ -7,8 +7,8 @@ import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } /** - * Created by pjvanthof on 30/03/16. - */ + * Created by pjvanthof on 30/03/16. + */ class Grep(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Input file", required = true) var input: File = _ diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/AnalyzeCovariates.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala similarity index 98% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/AnalyzeCovariates.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala index b501d47de6cb0899d8e02ae8a4372fd50de4f7e1..c0740c64008c0d887a879e445c08e41dc1a96cd6 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/AnalyzeCovariates.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala @@ -1,9 +1,9 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } class AnalyzeCovariates(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ApplyRecalibration.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala similarity index 96% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ApplyRecalibration.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala index a84aa4b4b8728a1a5c7bbab442a4d905b626821a..b3be8d8578a2a0563274bfc88fa5cf8eb14df543 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ApplyRecalibration.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala @@ -1,15 +1,12 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import nl.lumc.sasc.biopet.core.ScatterGatherableFunction import nl.lumc.sasc.biopet.utils.VcfUtils -import org.broadinstitute.gatk.utils.commandline.Argument -import org.broadinstitute.gatk.utils.commandline.Gather -import org.broadinstitute.gatk.utils.commandline.Input -import org.broadinstitute.gatk.utils.commandline.Output +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output } class ApplyRecalibration(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { def analysis_type = "ApplyRecalibration" diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BamGatherFunction.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BamGatherFunction.scala similarity index 95% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BamGatherFunction.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BamGatherFunction.scala index eb821f0e47ec6c5b3444cb8bed7172ae6dc906eb..c7a55537e2750e1cb316dfe51d63f9874faa6a72 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BamGatherFunction.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BamGatherFunction.scala @@ -1,9 +1,8 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction +package nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles +import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction /** * Merges BAM files using htsjdk.samtools.MergeSamFiles. diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala similarity index 98% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala index 828beeb64ea6396437fd195dbdb4718e2417f24a..7e5bbfd3ff4645d11bd2ee173005c713bf0f2458 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala @@ -1,11 +1,10 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.extensions.gatk.{ TaggedFile } -import nl.lumc.sasc.biopet.core.ScatterGatherableFunction import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } //TODO: check gathering diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala index ceceed5f64ba51a75b098e4cd1b18beaa4f1894d..4d712a8407abb8f09b8e6e7fdcceaba8d11d2bb4 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala @@ -1,58 +1,56 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.core.{ Reference, BiopetJavaCommandLineFunction } +import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Input, Output } +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output } -class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction with Reference { +class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction { + analysisName = "CatVariants" + javaMainClass = "org.broadinstitute.gatk.tools.CatVariants" - javaMainClass = classOf[org.broadinstitute.gatk.tools.CatVariants].getName + /** genome reference file <name>.fasta */ + @Input(fullName = "reference", shortName = "R", doc = "genome reference file <name>.fasta", required = true, exclusiveOf = "", validation = "") + var reference: File = _ - @Input(required = true) - var inputFiles: List[File] = Nil + /** Input VCF file/s */ + @Input(fullName = "variant", shortName = "V", doc = "Input VCF file/s", required = true, exclusiveOf = "", validation = "") + var variant: Seq[File] = Nil - @Output(required = true) - var outputFile: File = null + /** output file */ + @Output(fullName = "outputFile", shortName = "out", doc = "output file", required = true, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var outputFile: File = _ - @Input - var reference: File = null + /** assumeSorted should be true if the input files are already sorted (based on the position of the variants) */ + @Argument(fullName = "assumeSorted", shortName = "assumeSorted", doc = "assumeSorted should be true if the input files are already sorted (based on the position of the variants)", required = false, exclusiveOf = "", validation = "") + var assumeSorted: Boolean = _ - var assumeSorted = false + /** which type of IndexCreator to use for VCF/BCF indices */ + @Argument(fullName = "variant_index_type", shortName = "", doc = "which type of IndexCreator to use for VCF/BCF indices", required = false, exclusiveOf = "", validation = "") + var variant_index_type: Option[String] = None - override def beforeGraph(): Unit = { - super.beforeGraph() - if (reference == null) reference = referenceFasta() - } + /** the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator */ + @Argument(fullName = "variant_index_parameter", shortName = "", doc = "the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator", required = false, exclusiveOf = "", validation = "") + var variant_index_parameter: Option[Int] = None + + /** Set the minimum level of logging */ + @Argument(fullName = "logging_level", shortName = "l", doc = "Set the minimum level of logging", required = false, exclusiveOf = "", validation = "") + var logging_level: String = _ + + /** Set the logging location */ + @Output(fullName = "log_to_file", shortName = "log", doc = "Set the logging location", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var log_to_file: File = _ override def cmdLine = super.cmdLine + - repeat("-V", inputFiles) + - required("-out", outputFile) + - required("-R", reference) + - conditional(assumeSorted, "--assumeSorted") + required("-R", reference, spaceSeparated = true, escape = true, format = "%s") + + repeat("-V", variant, spaceSeparated = true, escape = true, format = "%s") + + required("-out", outputFile, spaceSeparated = true, escape = true, format = "%s") + + conditional(assumeSorted, "-assumeSorted", escape = true, format = "%s") + + optional("--variant_index_type", variant_index_type, spaceSeparated = true, escape = true, format = "%s") + + optional("--variant_index_parameter", variant_index_parameter, spaceSeparated = true, escape = true, format = "%s") + + optional("-l", logging_level, spaceSeparated = true, escape = true, format = "%s") + + optional("-log", log_to_file, spaceSeparated = true, escape = true, format = "%s") } - -object CatVariants { - def apply(root: Configurable, input: List[File], output: File): CatVariants = { - val cv = new CatVariants(root) - cv.inputFiles = input - cv.outputFile = output - cv - } -} \ No newline at end of file diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CatVariantsGather.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariantsGather.scala similarity index 96% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CatVariantsGather.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariantsGather.scala index e97d6affbf023455122c49b3df59900211df0bea..27c6cb7cadb59997c7dd0c2039e1f53c74f4f8e7 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CatVariantsGather.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariantsGather.scala @@ -1,4 +1,4 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineGVCFs.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala similarity index 99% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineGVCFs.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala index e20331fe1b1c670acff8d1d1ca743585a8a0735f..ed0065e589e17000a4e4fb9742d4bca71f6622a1 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineGVCFs.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala @@ -1,11 +1,11 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import nl.lumc.sasc.biopet.core.ScatterGatherableFunction import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output, _ } class CombineGVCFs(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala index 343e2d769656dd6800d4cd552f51aa25cec7d28c..ba1740d3518e1ea6dbc17a9c60b0d6aa59abc9df 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala @@ -1,80 +1,128 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ package nl.lumc.sasc.biopet.extensions.gatk import java.io.File +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction import nl.lumc.sasc.biopet.utils.VcfUtils import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output } - -/** - * Extension for CombineVariants from GATK - * - * Created by pjvan_thof on 2/26/15. - * - * @deprecated - */ -class CombineVariants(val root: Configurable) extends Gatk { - val analysisType = "CombineVariants" - - @Input(doc = "", required = true) - var inputFiles: List[File] = Nil - - @Output(doc = "", required = true) - var outputFile: File = null - - var setKey: String = null - var rodPriorityList: String = null - var minimumN: Int = config("minimumN", default = 1) - var genotypeMergeOptions: Option[String] = config("genotypeMergeOptions") - var excludeNonVariants: Boolean = false - - var inputMap: Map[File, String] = Map() - - def addInput(file: File, name: String): Unit = { - inputFiles :+= file - inputMap += file -> name - } +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } + +class CombineVariants(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "CombineVariants" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** VCF files to merge together */ + @Input(fullName = "variant", shortName = "V", doc = "VCF files to merge together", required = true, exclusiveOf = "", validation = "") + var variant: Seq[File] = Nil + + /** File to which variants should be written */ + @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[CatVariantsGatherer]) + var out: File = _ + + /** Determines how we should merge genotype records for samples shared across the ROD files */ + @Argument(fullName = "genotypemergeoption", shortName = "genotypeMergeOptions", doc = "Determines how we should merge genotype records for samples shared across the ROD files", required = false, exclusiveOf = "", validation = "") + var genotypemergeoption: Option[String] = config("genotypemergeoption") + + /** Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields */ + @Argument(fullName = "filteredrecordsmergetype", shortName = "filteredRecordsMergeType", doc = "Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required = false, exclusiveOf = "", validation = "") + var filteredrecordsmergetype: Option[String] = config("filteredrecordsmergetype") + + /** Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel) */ + @Argument(fullName = "multipleallelesmergetype", shortName = "multipleAllelesMergeType", doc = "Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required = false, exclusiveOf = "", validation = "") + var multipleallelesmergetype: Option[String] = config("multipleallelesmergetype") + + /** Ordered list specifying priority for merging */ + @Argument(fullName = "rod_priority_list", shortName = "priority", doc = "Ordered list specifying priority for merging", required = false, exclusiveOf = "", validation = "") + var rod_priority_list: Option[String] = config("rod_priority_list") + + /** Emit interesting sites requiring complex compatibility merging to file */ + @Argument(fullName = "printComplexMerges", shortName = "printComplexMerges", doc = "Emit interesting sites requiring complex compatibility merging to file", required = false, exclusiveOf = "", validation = "") + var printComplexMerges: Boolean = config("printComplexMerges", default = false) + + /** Treat filtered variants as uncalled */ + @Argument(fullName = "filteredAreUncalled", shortName = "filteredAreUncalled", doc = "Treat filtered variants as uncalled", required = false, exclusiveOf = "", validation = "") + var filteredAreUncalled: Boolean = config("filteredAreUncalled", default = false) + + /** Emit a sites-only file */ + @Argument(fullName = "minimalVCF", shortName = "minimalVCF", doc = "Emit a sites-only file", required = false, exclusiveOf = "", validation = "") + var minimalVCF: Boolean = config("minimalVCF", default = false) + + /** Exclude sites where no variation is present after merging */ + @Argument(fullName = "excludeNonVariants", shortName = "env", doc = "Exclude sites where no variation is present after merging", required = false, exclusiveOf = "", validation = "") + var excludeNonVariants: Boolean = config("excludeNonVariants", default = false) + + /** Key name for the set attribute */ + @Argument(fullName = "setKey", shortName = "setKey", doc = "Key name for the set attribute", required = false, exclusiveOf = "", validation = "") + var setKey: Option[String] = config("set_key") + + /** Assume input VCFs have identical sample sets and disjoint calls */ + @Argument(fullName = "assumeIdenticalSamples", shortName = "assumeIdenticalSamples", doc = "Assume input VCFs have identical sample sets and disjoint calls", required = false, exclusiveOf = "", validation = "") + var assumeIdenticalSamples: Boolean = config("assumeIdenticalSamples", default = false) + + /** Minimum number of input files the site must be observed in to be included */ + @Argument(fullName = "minimumN", shortName = "minN", doc = "Minimum number of input files the site must be observed in to be included", required = false, exclusiveOf = "", validation = "") + var minimumN: Option[Int] = config("minimumN") + + /** Do not output the command line to the header */ + @Argument(fullName = "suppressCommandLineHeader", shortName = "suppressCommandLineHeader", doc = "Do not output the command line to the header", required = false, exclusiveOf = "", validation = "") + var suppressCommandLineHeader: Boolean = config("suppressCommandLineHeader", default = false) + + /** Use the INFO content of the record with the highest AC */ + @Argument(fullName = "mergeInfoWithMaxAC", shortName = "mergeInfoWithMaxAC", doc = "Use the INFO content of the record with the highest AC", required = false, exclusiveOf = "", validation = "") + var mergeInfoWithMaxAC: Boolean = config("mergeInfoWithMaxAC", default = false) + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) @Output @Gather(enabled = false) private var outputIndex: File = _ - override def beforeGraph(): Unit = { + override def beforeGraph() { super.beforeGraph() - outputIndex = VcfUtils.getVcfIndexFile(outputFile) - genotypeMergeOptions match { - case Some("UNIQUIFY") | Some("PRIORITIZE") | Some("UNSORTED") | Some("REQUIRE_UNIQUE") | None => - case _ => throw new IllegalArgumentException("Wrong option for genotypeMergeOptions") - } - deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi")) - deps = deps.distinct + deps ++= variant.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + outputIndex = VcfUtils.getVcfIndexFile(out) } override def cmdLine = super.cmdLine + - (for (file <- inputFiles) yield { - inputMap.get(file) match { - case Some(name) => required("-V:" + name, file) - case _ => required("-V", file) - } - }).mkString + - required("-o", outputFile) + - optional("--setKey", setKey) + - optional("--rod_priority_list", rodPriorityList) + - optional("-genotypeMergeOptions", genotypeMergeOptions) + - conditional(excludeNonVariants, "--excludeNonVariants") + repeat("-V", variant, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + optional("-genotypeMergeOptions", genotypemergeoption, spaceSeparated = true, escape = true, format = "%s") + + optional("-filteredRecordsMergeType", filteredrecordsmergetype, spaceSeparated = true, escape = true, format = "%s") + + optional("-multipleAllelesMergeType", multipleallelesmergetype, spaceSeparated = true, escape = true, format = "%s") + + optional("-priority", rod_priority_list, spaceSeparated = true, escape = true, format = "%s") + + conditional(printComplexMerges, "-printComplexMerges", escape = true, format = "%s") + + conditional(filteredAreUncalled, "-filteredAreUncalled", escape = true, format = "%s") + + conditional(minimalVCF, "-minimalVCF", escape = true, format = "%s") + + conditional(excludeNonVariants, "-env", escape = true, format = "%s") + + optional("-setKey", setKey, spaceSeparated = true, escape = true, format = "%s") + + conditional(assumeIdenticalSamples, "-assumeIdenticalSamples", escape = true, format = "%s") + + optional("-minN", minimumN, spaceSeparated = true, escape = true, format = "%s") + + conditional(suppressCommandLineHeader, "-suppressCommandLineHeader", escape = true, format = "%s") + + conditional(mergeInfoWithMaxAC, "-mergeInfoWithMaxAC", escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") +} + +object CombineVariants { + def apply(root: Configurable, input: List[File], output: File): CombineVariants = { + val cv = new CombineVariants(root) + cv.variant = input + cv.out = output + cv + } } diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CommandLineGATK.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CommandLineGATK.scala similarity index 99% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CommandLineGATK.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CommandLineGATK.scala index 3e8091437d7bfe377748a827fef43655cf5b65b4..7fbba210ce42d99573c3dfd127a45ab9346f438a 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CommandLineGATK.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CommandLineGATK.scala @@ -1,4 +1,4 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ContigScatterFunction.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ContigScatterFunction.scala similarity index 88% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ContigScatterFunction.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ContigScatterFunction.scala index 12350d3ad5d891e410f15dc90662645c3ceb2319..abfc807c26b7a88623b4b3ec649b010cb3d5d526 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ContigScatterFunction.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ContigScatterFunction.scala @@ -1,8 +1,9 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk -import collection.JavaConversions._ -import org.broadinstitute.gatk.utils.interval.IntervalUtils import org.broadinstitute.gatk.queue.function.InProcessFunction +import org.broadinstitute.gatk.utils.interval.IntervalUtils + +import scala.collection.JavaConversions._ /** * Splits intervals by contig instead of evenly. diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala deleted file mode 100644 index 92ca40e02d94e4935f3f1c031cf1371e4b77b8a2..0000000000000000000000000000000000000000 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.core.{ Version, BiopetJavaCommandLineFunction, Reference } -import org.broadinstitute.gatk.utils.commandline.Input - -/** - * General extension for GATK module - * - * Created by pjvan_thof on 2/26/15. - * - * @deprecated - */ -abstract class Gatk extends BiopetJavaCommandLineFunction with Reference with Version { - override def subPath = "gatk" :: super.subPath - - jarFile = config("gatk_jar") - - val analysisType: String - - override def defaultCoreMemory = 3.0 - - @Input(required = true) - var reference: File = null - - @Input(required = false) - var gatkKey: Option[File] = config("gatk_key") - - @Input(required = false) - var intervals: List[File] = config("intervals", default = Nil) - - @Input(required = false) - var excludeIntervals: List[File] = config("exclude_intervals", default = Nil) - - @Input(required = false) - var pedigree: List[File] = config("pedigree", default = Nil) - - var et: Option[String] = config("et") - - def versionRegex = """(.*)""".r - override def versionExitcode = List(0, 1) - def versionCommand = executable + " -jar " + jarFile + " -version" - - override def getVersion = super.getVersion.collect { case version => "Gatk " + version } - override def dictRequired = true - - override def beforeGraph(): Unit = { - super.beforeGraph() - if (reference == null) reference = referenceFasta() - } - - override def cmdLine = super.cmdLine + - required("-T", analysisType) + - required("-R", reference) + - optional("-K", gatkKey) + - optional("-et", et) + - repeat("-L", intervals) + - repeat("-XL", excludeIntervals) + - repeat("-ped", pedigree) -} \ No newline at end of file diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkScatterFunction.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkScatterFunction.scala similarity index 97% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkScatterFunction.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkScatterFunction.scala index f2399b946ca51488bc102b90c9b96f4368f91147..d49b4d34372dfebdd5eafb6469d643e3067a8d4b 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkScatterFunction.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkScatterFunction.scala @@ -1,12 +1,12 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk -import org.broadinstitute.gatk.utils.interval.IntervalUtils import java.io.File import org.broadinstitute.gatk.queue.extensions.gatk.GATKIntervals -import org.broadinstitute.gatk.utils.io.IOUtils import org.broadinstitute.gatk.queue.function.scattergather.{ CloneFunction, ScatterFunction } -import org.broadinstitute.gatk.utils.commandline.{ Output, _ } +import org.broadinstitute.gatk.utils.commandline.Output +import org.broadinstitute.gatk.utils.interval.IntervalUtils +import org.broadinstitute.gatk.utils.io.IOUtils trait GATKScatterFunction extends ScatterFunction { /* The runtime field to set for specifying intervals. */ diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala index 62d2457de0431b8c28ddb3e2eae58352a4b39b23..aea609fb98b311748e98cd044cf0b454a88bfe94 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala @@ -1,52 +1,70 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ package nl.lumc.sasc.biopet.extensions.gatk import java.io.File +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction import nl.lumc.sasc.biopet.core.summary.Summarizable +import nl.lumc.sasc.biopet.utils.VcfUtils import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Input, Output } -import org.broadinstitute.gatk.utils.report.{ GATKReportTable, GATKReport } +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } +import org.broadinstitute.gatk.utils.report.{ GATKReport, GATKReportTable } -/** - * Extension for CombineVariants from GATK - * - * Created by pjvan_thof on 2/26/15. - * - * @deprecated - */ -class GenotypeConcordance(val root: Configurable) extends Gatk with Summarizable { - val analysisType = "GenotypeConcordance" +class GenotypeConcordance(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction with Summarizable { + analysisName = "GenotypeConcordance" + val analysis_type = "GenotypeConcordance" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } - @Input(required = true) - var evalFile: File = null + /** The variants and genotypes to evaluate */ + @Input(fullName = "eval", shortName = "eval", doc = "The variants and genotypes to evaluate", required = true, exclusiveOf = "", validation = "") + var eval: File = _ - @Input(required = true) - var compFile: File = null + /** The variants and genotypes to compare against */ + @Input(fullName = "comp", shortName = "comp", doc = "The variants and genotypes to compare against", required = true, exclusiveOf = "", validation = "") + var comp: File = _ - @Output(required = true) - var outputFile: File = null + /** Filters will be ignored */ + @Argument(fullName = "ignoreFilters", shortName = "", doc = "Filters will be ignored", required = false, exclusiveOf = "", validation = "") + var ignoreFilters: Boolean = config("ignoreFilters", default = false) - var moltenize = true + /** One or more criteria to use to set EVAL genotypes to no-call. These genotype-level filters are only applied to the EVAL rod. */ + @Argument(fullName = "genotypeFilterExpressionEval", shortName = "gfe", doc = "One or more criteria to use to set EVAL genotypes to no-call. These genotype-level filters are only applied to the EVAL rod.", required = false, exclusiveOf = "", validation = "") + var genotypeFilterExpressionEval: List[String] = config("genotypeFilterExpressionEval", default = Nil) - def summaryFiles = Map("output" -> outputFile) + /** One or more criteria to use to set COMP genotypes to no-call. These genotype-level filters are only applied to the COMP rod. */ + @Argument(fullName = "genotypeFilterExpressionComp", shortName = "gfc", doc = "One or more criteria to use to set COMP genotypes to no-call. These genotype-level filters are only applied to the COMP rod.", required = false, exclusiveOf = "", validation = "") + var genotypeFilterExpressionComp: Seq[String] = config("genotypeFilterExpressionComp", default = Nil) + + /** Molten rather than tabular output */ + @Argument(fullName = "moltenize", shortName = "moltenize", doc = "Molten rather than tabular output", required = false, exclusiveOf = "", validation = "") + var moltenize: Boolean = config("moltenize", default = true) + + /** File to output the discordant sites and genotypes. */ + @Output(fullName = "printInterestingSites", shortName = "sites", doc = "File to output the discordant sites and genotypes.", required = false, exclusiveOf = "", validation = "") + var printInterestingSites: Option[File] = None + + /** An output file created by the walker. Will overwrite contents if file exists */ + @Output(fullName = "out", shortName = "o", doc = "An output file created by the walker. Will overwrite contents if file exists", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + def summaryFiles = Map("output" -> out) def summaryStats = { - val report = new GATKReport(outputFile) + val report = new GATKReport(out) val compProportions = report.getTable("GenotypeConcordance_CompProportions") val counts = report.getTable("GenotypeConcordance_Counts") val evalProportions = report.getTable("GenotypeConcordance_EvalProportions") @@ -82,15 +100,22 @@ class GenotypeConcordance(val root: Configurable) extends Gatk with Summarizable ) } - override def beforeGraph(): Unit = { + override def beforeGraph() { super.beforeGraph() - deps :::= (evalFile :: compFile :: Nil).filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi")) - deps = deps.distinct + if (eval != null) deps :+= VcfUtils.getVcfIndexFile(eval) + if (comp != null) deps :+= VcfUtils.getVcfIndexFile(comp) } override def cmdLine = super.cmdLine + - required("--eval", evalFile) + - required("--comp", compFile) + - required("-o", outputFile) + - conditional(moltenize, "--moltenize") + required(TaggedFile.formatCommandLineParameter("-eval", eval), eval, spaceSeparated = true, escape = true, format = "%s") + + required(TaggedFile.formatCommandLineParameter("-comp", comp), comp, spaceSeparated = true, escape = true, format = "%s") + + conditional(ignoreFilters, "--ignoreFilters", escape = true, format = "%s") + + repeat("-gfe", genotypeFilterExpressionEval, spaceSeparated = true, escape = true, format = "%s") + + repeat("-gfc", genotypeFilterExpressionComp, spaceSeparated = true, escape = true, format = "%s") + + conditional(moltenize, "-moltenize", escape = true, format = "%s") + + optional("-sites", printInterestingSites, spaceSeparated = true, escape = true, format = "%s") + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") } diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala similarity index 99% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala index b1a54e34f4b9a079fef323110c2b61b7d9c5ad25..650340d63c1be0ba9195609a616ddddb4abee8ef 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala @@ -1,11 +1,11 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import nl.lumc.sasc.biopet.core.ScatterGatherableFunction import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } class GenotypeGVCFs(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala similarity index 99% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala index 5ffdcb306b32bcc06152fd3e1a7457715443cee9..9eac2ba9e3d4974e7b479addcb3d0f1dadf5ef56 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala @@ -1,10 +1,10 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.ScatterGatherableFunction import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output } diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala similarity index 99% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala index 34b7c58f9af92fdaee68bb710fc800779984e953..7d16d832892a0daaffd999f549f5fd00f327b51c 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala @@ -1,11 +1,11 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import nl.lumc.sasc.biopet.core.ScatterGatherableFunction import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } class IndelRealigner(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/LocusScatterFunction.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/LocusScatterFunction.scala similarity index 87% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/LocusScatterFunction.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/LocusScatterFunction.scala index 853d185b37d7cb1838b2c3f2928a91cdb8b82548..6a3b961ecbfb034465909714305e469af63a816a 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/LocusScatterFunction.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/LocusScatterFunction.scala @@ -1,8 +1,9 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk -import collection.JavaConversions._ -import org.broadinstitute.gatk.utils.interval.IntervalUtils import org.broadinstitute.gatk.queue.function.InProcessFunction +import org.broadinstitute.gatk.utils.interval.IntervalUtils + +import scala.collection.JavaConversions._ /** * A scatter function that divides down to the locus level. diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/PrintReads.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala similarity index 99% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/PrintReads.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala index 9f18533cf8f78dcf9ebc8f3749112db9e6d6bd4f..6eaca11891f0d5a559cd3723f5ee8852800f9260 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/PrintReads.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala @@ -1,9 +1,9 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline._ class PrintReads(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/RealignerTargetCreator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala similarity index 97% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/RealignerTargetCreator.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala index 383e74fbf0e4055c2fb5ec296b5a4ccaf72ead39..74ce632bd78dba9de535c3f3b4f82fb481976410 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/RealignerTargetCreator.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala @@ -1,11 +1,11 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.extensions.gatk.{ TaggedFile } import nl.lumc.sasc.biopet.core.ScatterGatherableFunction import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, _ } class RealignerTargetCreator(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala index d98abff1485b59cd0424eff47d03b0d1dbfe585d..a1ed7b732f9b72d1660c9ea8c1995e5fc0137a68 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala @@ -1,69 +1,262 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ package nl.lumc.sasc.biopet.extensions.gatk import java.io.File +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction import nl.lumc.sasc.biopet.utils.VcfUtils import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output } +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } -/** - * Extension for CombineVariants from GATK - * - * Created by pjvan_thof on 2/26/15. - * - * @deprecated - */ -class SelectVariants(val root: Configurable) extends Gatk { - val analysisType = "SelectVariants" +class SelectVariants(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { + def analysis_type = "SelectVariants" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } - @Input(doc = "", required = true) - var inputFiles: List[File] = Nil + /** Input VCF file */ + @Input(fullName = "variant", shortName = "V", doc = "Input VCF file", required = true, exclusiveOf = "", validation = "") + var variant: File = _ - @Output(doc = "", required = true) - var outputFile: File = null + /** Output variants not called in this comparison track */ + @Input(fullName = "discordance", shortName = "disc", doc = "Output variants not called in this comparison track", required = false, exclusiveOf = "", validation = "") + var discordance: Option[File] = None - var excludeNonVariants: Boolean = false + /** Output variants also called in this comparison track */ + @Input(fullName = "concordance", shortName = "conc", doc = "Output variants also called in this comparison track", required = false, exclusiveOf = "", validation = "") + var concordance: Option[File] = None - var inputMap: Map[File, String] = Map() + /** File to which variants should be written */ + @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "") + @Gather(classOf[CatVariantsGatherer]) + var out: File = _ - def addInput(file: File, name: String): Unit = { - inputFiles :+= file - inputMap += file -> name - } + /** Include genotypes from this sample */ + @Argument(fullName = "sample_name", shortName = "sn", doc = "Include genotypes from this sample", required = false, exclusiveOf = "", validation = "") + var sample_name: List[String] = config("sample_name", default = Nil) + + /** Regular expression to select multiple samples */ + @Argument(fullName = "sample_expressions", shortName = "se", doc = "Regular expression to select multiple samples", required = false, exclusiveOf = "", validation = "") + var sample_expressions: List[String] = config("sample_expressions", default = Nil) + + /** File containing a list of samples to include */ + @Input(fullName = "sample_file", shortName = "sf", doc = "File containing a list of samples to include", required = false, exclusiveOf = "", validation = "") + var sample_file: List[File] = config("sample_file", default = Nil) + + /** Exclude genotypes from this sample */ + @Argument(fullName = "exclude_sample_name", shortName = "xl_sn", doc = "Exclude genotypes from this sample", required = false, exclusiveOf = "", validation = "") + var exclude_sample_name: List[String] = config("exclude_sample_name", default = Nil) + + /** List of samples to exclude */ + @Input(fullName = "exclude_sample_file", shortName = "xl_sf", doc = "List of samples to exclude", required = false, exclusiveOf = "", validation = "") + var exclude_sample_file: List[File] = config("exclude_sample_file", default = Nil) + + /** List of sample expressions to exclude */ + @Input(fullName = "exclude_sample_expressions", shortName = "xl_se", doc = "List of sample expressions to exclude", required = false, exclusiveOf = "", validation = "") + var exclude_sample_expressions: List[File] = config("exclude_sample_expressions", default = Nil) + + /** One or more criteria to use when selecting the data */ + @Argument(fullName = "selectexpressions", shortName = "select", doc = "One or more criteria to use when selecting the data", required = false, exclusiveOf = "", validation = "") + var selectexpressions: List[String] = config("selectexpressions", default = Nil) + + /** Invert the selection criteria for -select */ + @Argument(fullName = "invertselect", shortName = "invertSelect", doc = "Invert the selection criteria for -select", required = false, exclusiveOf = "", validation = "") + var invertselect: Boolean = config("invertselect", default = false) + + /** Don't include non-variant sites */ + @Argument(fullName = "excludeNonVariants", shortName = "env", doc = "Don't include non-variant sites", required = false, exclusiveOf = "", validation = "") + var excludeNonVariants: Boolean = config("excludeNonVariants", default = false) + + /** Don't include filtered sites */ + @Argument(fullName = "excludeFiltered", shortName = "ef", doc = "Don't include filtered sites", required = false, exclusiveOf = "", validation = "") + var excludeFiltered: Boolean = config("excludeFiltered", default = false) + + /** Preserve original alleles, do not trim */ + @Argument(fullName = "preserveAlleles", shortName = "noTrim", doc = "Preserve original alleles, do not trim", required = false, exclusiveOf = "", validation = "") + var preserveAlleles: Boolean = config("preserveAlleles", default = false) + + /** Remove alternate alleles not present in any genotypes */ + @Argument(fullName = "removeUnusedAlternates", shortName = "trimAlternates", doc = "Remove alternate alleles not present in any genotypes", required = false, exclusiveOf = "", validation = "") + var removeUnusedAlternates: Boolean = config("removeUnusedAlternates", default = false) + + /** Select only variants of a particular allelicity */ + @Argument(fullName = "restrictAllelesTo", shortName = "restrictAllelesTo", doc = "Select only variants of a particular allelicity", required = false, exclusiveOf = "", validation = "") + var restrictAllelesTo: Option[String] = config("restrictAllelesTo") + + /** Store the original AC, AF, and AN values after subsetting */ + @Argument(fullName = "keepOriginalAC", shortName = "keepOriginalAC", doc = "Store the original AC, AF, and AN values after subsetting", required = false, exclusiveOf = "", validation = "") + var keepOriginalAC: Boolean = config("keepOriginalAC", default = false) + + /** Store the original DP value after subsetting */ + @Argument(fullName = "keepOriginalDP", shortName = "keepOriginalDP", doc = "Store the original DP value after subsetting", required = false, exclusiveOf = "", validation = "") + var keepOriginalDP: Boolean = config("keepOriginalDP", default = false) + + /** Output mendelian violation sites only */ + @Argument(fullName = "mendelianViolation", shortName = "mv", doc = "Output mendelian violation sites only", required = false, exclusiveOf = "", validation = "") + var mendelianViolation: Boolean = config("mendelianViolation", default = false) + + /** Output non-mendelian violation sites only */ + @Argument(fullName = "invertMendelianViolation", shortName = "invMv", doc = "Output non-mendelian violation sites only", required = false, exclusiveOf = "", validation = "") + var invertMendelianViolation: Boolean = config("invertMendelianViolation", default = false) + + /** Minimum GQ score for each trio member to accept a site as a violation */ + @Argument(fullName = "mendelianViolationQualThreshold", shortName = "mvq", doc = "Minimum GQ score for each trio member to accept a site as a violation", required = false, exclusiveOf = "", validation = "") + var mendelianViolationQualThreshold: Option[Double] = config("mendelianViolationQualThreshold") + + /** Format string for mendelianViolationQualThreshold */ + @Argument(fullName = "mendelianViolationQualThresholdFormat", shortName = "", doc = "Format string for mendelianViolationQualThreshold", required = false, exclusiveOf = "", validation = "") + var mendelianViolationQualThresholdFormat: String = "%s" + + /** Select a fraction of variants at random from the input */ + @Argument(fullName = "select_random_fraction", shortName = "fraction", doc = "Select a fraction of variants at random from the input", required = false, exclusiveOf = "", validation = "") + var select_random_fraction: Option[Double] = config("select_random_fraction") + + /** Format string for select_random_fraction */ + @Argument(fullName = "select_random_fractionFormat", shortName = "", doc = "Format string for select_random_fraction", required = false, exclusiveOf = "", validation = "") + var select_random_fractionFormat: String = "%s" + + /** Select a fraction of genotypes at random from the input and sets them to no-call */ + @Argument(fullName = "remove_fraction_genotypes", shortName = "fractionGenotypes", doc = "Select a fraction of genotypes at random from the input and sets them to no-call", required = false, exclusiveOf = "", validation = "") + var remove_fraction_genotypes: Option[Double] = config("remove_fraction_genotypes") + + /** Format string for remove_fraction_genotypes */ + @Argument(fullName = "remove_fraction_genotypesFormat", shortName = "", doc = "Format string for remove_fraction_genotypes", required = false, exclusiveOf = "", validation = "") + var remove_fraction_genotypesFormat: String = "%s" + + /** Select only a certain type of variants from the input file */ + @Argument(fullName = "selectTypeToInclude", shortName = "selectType", doc = "Select only a certain type of variants from the input file", required = false, exclusiveOf = "", validation = "") + var selectTypeToInclude: List[String] = config("selectTypeToInclude", default = Nil) + + /** Do not select certain type of variants from the input file */ + @Argument(fullName = "selectTypeToExclude", shortName = "xlSelectType", doc = "Do not select certain type of variants from the input file", required = false, exclusiveOf = "", validation = "") + var selectTypeToExclude: Seq[String] = config("selectTypeToExclude", default = Nil) + + /** List of variant IDs to select */ + @Input(fullName = "keepIDs", shortName = "IDs", doc = "List of variant IDs to select", required = false, exclusiveOf = "", validation = "") + var keepIDs: Option[File] = config("keepIDs") + + /** List of variant IDs to select */ + @Argument(fullName = "excludeIDs", shortName = "xlIDs", doc = "List of variant IDs to select", required = false, exclusiveOf = "", validation = "") + var excludeIDs: Option[File] = config("excludeIDs") + + /** If true, the incoming VariantContext will be fully decoded */ + @Argument(fullName = "fullyDecode", shortName = "", doc = "If true, the incoming VariantContext will be fully decoded", required = false, exclusiveOf = "", validation = "") + var fullyDecode: Boolean = config("fullyDecode", default = false) + + /** If true, we won't actually write the output file. For efficiency testing only */ + @Argument(fullName = "justRead", shortName = "", doc = "If true, we won't actually write the output file. For efficiency testing only", required = false, exclusiveOf = "", validation = "") + var justRead: Boolean = config("justRead", default = false) + + /** Maximum size of indels to include */ + @Argument(fullName = "maxIndelSize", shortName = "", doc = "Maximum size of indels to include", required = false, exclusiveOf = "", validation = "") + var maxIndelSize: Option[Int] = config("maxIndelSize") + + /** Minimum size of indels to include */ + @Argument(fullName = "minIndelSize", shortName = "", doc = "Minimum size of indels to include", required = false, exclusiveOf = "", validation = "") + var minIndelSize: Option[Int] = config("minIndelSize") + + /** Maximum number of samples filtered at the genotype level */ + @Argument(fullName = "maxFilteredGenotypes", shortName = "", doc = "Maximum number of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "") + var maxFilteredGenotypes: Option[Int] = config("maxFilteredGenotypes") + + /** Minimum number of samples filtered at the genotype level */ + @Argument(fullName = "minFilteredGenotypes", shortName = "", doc = "Minimum number of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "") + var minFilteredGenotypes: Option[Int] = config("minFilteredGenotypes") + + /** Maximum fraction of samples filtered at the genotype level */ + @Argument(fullName = "maxFractionFilteredGenotypes", shortName = "", doc = "Maximum fraction of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "") + var maxFractionFilteredGenotypes: Option[Double] = config("maxFractionFilteredGenotypes") + + /** Format string for maxFractionFilteredGenotypes */ + @Argument(fullName = "maxFractionFilteredGenotypesFormat", shortName = "", doc = "Format string for maxFractionFilteredGenotypes", required = false, exclusiveOf = "", validation = "") + var maxFractionFilteredGenotypesFormat: String = "%s" + + /** Maximum fraction of samples filtered at the genotype level */ + @Argument(fullName = "minFractionFilteredGenotypes", shortName = "", doc = "Maximum fraction of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "") + var minFractionFilteredGenotypes: Option[Double] = config("minFractionFilteredGenotypes") + + /** Format string for minFractionFilteredGenotypes */ + @Argument(fullName = "minFractionFilteredGenotypesFormat", shortName = "", doc = "Format string for minFractionFilteredGenotypes", required = false, exclusiveOf = "", validation = "") + var minFractionFilteredGenotypesFormat: String = "%s" + + /** Set filtered genotypes to no-call */ + @Argument(fullName = "setFilteredGtToNocall", shortName = "", doc = "Set filtered genotypes to no-call", required = false, exclusiveOf = "", validation = "") + var setFilteredGtToNocall: Boolean = config("setFilteredGtToNocall", default = false) + + /** Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored. */ + @Argument(fullName = "ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", shortName = "", doc = "Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.", required = false, exclusiveOf = "", validation = "") + var ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES: Boolean = config("ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", default = false) + + /** Forces output VCF to be compliant to up-to-date version */ + @Argument(fullName = "forceValidOutput", shortName = "", doc = "Forces output VCF to be compliant to up-to-date version", required = false, exclusiveOf = "", validation = "") + var forceValidOutput: Boolean = config("forceValidOutput", default = false) + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) @Output @Gather(enabled = false) private var outputIndex: File = _ - override def beforeGraph(): Unit = { + override def beforeGraph() { super.beforeGraph() - outputIndex = VcfUtils.getVcfIndexFile(outputFile) - deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi")) - deps = deps.distinct + if (variant != null) + deps :+= VcfUtils.getVcfIndexFile(variant) + discordance.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + concordance.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) + if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) + outputIndex = VcfUtils.getVcfIndexFile(out) } override def cmdLine = super.cmdLine + - (for (file <- inputFiles) yield { - inputMap.get(file) match { - case Some(name) => required("-V:" + name, file) - case _ => required("-V", file) - } - }).mkString + - required("-o", outputFile) + - conditional(excludeNonVariants, "--excludeNonVariants") + required(TaggedFile.formatCommandLineParameter("-V", variant), variant, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-disc", discordance), discordance, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-conc", concordance), concordance, spaceSeparated = true, escape = true, format = "%s") + + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + + repeat("-sn", sample_name, spaceSeparated = true, escape = true, format = "%s") + + repeat("-se", sample_expressions, spaceSeparated = true, escape = true, format = "%s") + + repeat("-sf", sample_file, spaceSeparated = true, escape = true, format = "%s") + + repeat("-xl_sn", exclude_sample_name, spaceSeparated = true, escape = true, format = "%s") + + repeat("-xl_sf", exclude_sample_file, spaceSeparated = true, escape = true, format = "%s") + + repeat("-xl_se", exclude_sample_expressions, spaceSeparated = true, escape = true, format = "%s") + + repeat("-select", selectexpressions, spaceSeparated = true, escape = true, format = "%s") + + conditional(invertselect, "-invertSelect", escape = true, format = "%s") + + conditional(excludeNonVariants, "-env", escape = true, format = "%s") + + conditional(excludeFiltered, "-ef", escape = true, format = "%s") + + conditional(preserveAlleles, "-noTrim", escape = true, format = "%s") + + conditional(removeUnusedAlternates, "-trimAlternates", escape = true, format = "%s") + + optional("-restrictAllelesTo", restrictAllelesTo, spaceSeparated = true, escape = true, format = "%s") + + conditional(keepOriginalAC, "-keepOriginalAC", escape = true, format = "%s") + + conditional(keepOriginalDP, "-keepOriginalDP", escape = true, format = "%s") + + conditional(mendelianViolation, "-mv", escape = true, format = "%s") + + conditional(invertMendelianViolation, "-invMv", escape = true, format = "%s") + + optional("-mvq", mendelianViolationQualThreshold, spaceSeparated = true, escape = true, format = mendelianViolationQualThresholdFormat) + + optional("-fraction", select_random_fraction, spaceSeparated = true, escape = true, format = select_random_fractionFormat) + + optional("-fractionGenotypes", remove_fraction_genotypes, spaceSeparated = true, escape = true, format = remove_fraction_genotypesFormat) + + repeat("-selectType", selectTypeToInclude, spaceSeparated = true, escape = true, format = "%s") + + repeat("-xlSelectType", selectTypeToExclude, spaceSeparated = true, escape = true, format = "%s") + + optional("-IDs", keepIDs, spaceSeparated = true, escape = true, format = "%s") + + optional("-xlIDs", excludeIDs, spaceSeparated = true, escape = true, format = "%s") + + conditional(fullyDecode, "--fullyDecode", escape = true, format = "%s") + + conditional(justRead, "--justRead", escape = true, format = "%s") + + optional("--maxIndelSize", maxIndelSize, spaceSeparated = true, escape = true, format = "%s") + + optional("--minIndelSize", minIndelSize, spaceSeparated = true, escape = true, format = "%s") + + optional("--maxFilteredGenotypes", maxFilteredGenotypes, spaceSeparated = true, escape = true, format = "%s") + + optional("--minFilteredGenotypes", minFilteredGenotypes, spaceSeparated = true, escape = true, format = "%s") + + optional("--maxFractionFilteredGenotypes", maxFractionFilteredGenotypes, spaceSeparated = true, escape = true, format = maxFractionFilteredGenotypesFormat) + + optional("--minFractionFilteredGenotypes", minFractionFilteredGenotypes, spaceSeparated = true, escape = true, format = minFractionFilteredGenotypesFormat) + + conditional(setFilteredGtToNocall, "--setFilteredGtToNocall", escape = true, format = "%s") + + conditional(ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES, "--ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", escape = true, format = "%s") + + conditional(forceValidOutput, "--forceValidOutput", escape = true, format = "%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") } diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala similarity index 99% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala index fbb4423898d3f0c688a71cce40a2f1ad2b632978..0edfe5260fe2fb9101fd92d01fbcebce94ba0441 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala @@ -1,11 +1,11 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import nl.lumc.sasc.biopet.core.ScatterGatherableFunction import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output, _ } class UnifiedGenotyper(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantAnnotator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala similarity index 99% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantAnnotator.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala index 7fa034cfe6c48b3e4498a7ee4f968404a157a88c..d98a55a49eb6e34c00588fddde66dc00cddfe610 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantAnnotator.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala @@ -1,4 +1,4 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantEval.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala similarity index 98% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantEval.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala index a595ce5c1494101ac9104220db3a2c28d3120d2a..cf1c362c7dd06b2eec25104fdfd3fb5a06dea2f8 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantEval.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala @@ -1,13 +1,11 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File import nl.lumc.sasc.biopet.utils.VcfUtils import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile -import java.io.File -import org.broadinstitute.gatk.utils.commandline.Argument -import org.broadinstitute.gatk.utils.commandline.Gather -import org.broadinstitute.gatk.utils.commandline.Input -import org.broadinstitute.gatk.utils.commandline.Output +import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output } class VariantEval(val root: Configurable) extends CommandLineGATK { def analysis_type = "VariantEval" diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantRecalibrator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala similarity index 99% rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantRecalibrator.scala rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala index db631449a9e6c59112832247367634ee8d256983..96b5ee4c00fc1350e6298f31d11c2308010b6e23 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantRecalibrator.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala @@ -1,4 +1,4 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad +package nl.lumc.sasc.biopet.extensions.gatk import java.io.File diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CatVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CatVariants.scala deleted file mode 100644 index 6a23df5ff8d1f1a66c7606e826d7a09ad8924cc5..0000000000000000000000000000000000000000 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CatVariants.scala +++ /dev/null @@ -1,59 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.Argument -import org.broadinstitute.gatk.utils.commandline.Gather -import org.broadinstitute.gatk.utils.commandline.Input -import org.broadinstitute.gatk.utils.commandline.Output - -class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction { - analysisName = "CatVariants" - javaMainClass = "org.broadinstitute.gatk.tools.CatVariants" - - /** genome reference file <name>.fasta */ - @Input(fullName = "reference", shortName = "R", doc = "genome reference file <name>.fasta", required = true, exclusiveOf = "", validation = "") - var reference: File = _ - - /** Input VCF file/s */ - @Input(fullName = "variant", shortName = "V", doc = "Input VCF file/s", required = true, exclusiveOf = "", validation = "") - var variant: Seq[File] = Nil - - /** output file */ - @Output(fullName = "outputFile", shortName = "out", doc = "output file", required = true, exclusiveOf = "", validation = "") - @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) - var outputFile: File = _ - - /** assumeSorted should be true if the input files are already sorted (based on the position of the variants) */ - @Argument(fullName = "assumeSorted", shortName = "assumeSorted", doc = "assumeSorted should be true if the input files are already sorted (based on the position of the variants)", required = false, exclusiveOf = "", validation = "") - var assumeSorted: Boolean = _ - - /** which type of IndexCreator to use for VCF/BCF indices */ - @Argument(fullName = "variant_index_type", shortName = "", doc = "which type of IndexCreator to use for VCF/BCF indices", required = false, exclusiveOf = "", validation = "") - var variant_index_type: Option[String] = None - - /** the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator */ - @Argument(fullName = "variant_index_parameter", shortName = "", doc = "the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator", required = false, exclusiveOf = "", validation = "") - var variant_index_parameter: Option[Int] = None - - /** Set the minimum level of logging */ - @Argument(fullName = "logging_level", shortName = "l", doc = "Set the minimum level of logging", required = false, exclusiveOf = "", validation = "") - var logging_level: String = _ - - /** Set the logging location */ - @Output(fullName = "log_to_file", shortName = "log", doc = "Set the logging location", required = false, exclusiveOf = "", validation = "") - @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) - var log_to_file: File = _ - - override def cmdLine = super.cmdLine + - required("-R", reference, spaceSeparated = true, escape = true, format = "%s") + - repeat("-V", variant, spaceSeparated = true, escape = true, format = "%s") + - required("-out", outputFile, spaceSeparated = true, escape = true, format = "%s") + - conditional(assumeSorted, "-assumeSorted", escape = true, format = "%s") + - optional("--variant_index_type", variant_index_type, spaceSeparated = true, escape = true, format = "%s") + - optional("--variant_index_parameter", variant_index_parameter, spaceSeparated = true, escape = true, format = "%s") + - optional("-l", logging_level, spaceSeparated = true, escape = true, format = "%s") + - optional("-log", log_to_file, spaceSeparated = true, escape = true, format = "%s") -} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineVariants.scala deleted file mode 100644 index 7873ba3e44a3be042a923024af37a36922bb46d4..0000000000000000000000000000000000000000 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineVariants.scala +++ /dev/null @@ -1,128 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile -import nl.lumc.sasc.biopet.core.ScatterGatherableFunction -import nl.lumc.sasc.biopet.utils.VcfUtils -import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } - -class CombineVariants(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { - def analysis_type = "CombineVariants" - scatterClass = classOf[LocusScatterFunction] - setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } - - /** VCF files to merge together */ - @Input(fullName = "variant", shortName = "V", doc = "VCF files to merge together", required = true, exclusiveOf = "", validation = "") - var variant: Seq[File] = Nil - - /** File to which variants should be written */ - @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "") - @Gather(classOf[CatVariantsGatherer]) - var out: File = _ - - /** Determines how we should merge genotype records for samples shared across the ROD files */ - @Argument(fullName = "genotypemergeoption", shortName = "genotypeMergeOptions", doc = "Determines how we should merge genotype records for samples shared across the ROD files", required = false, exclusiveOf = "", validation = "") - var genotypemergeoption: Option[String] = config("genotypemergeoption") - - /** Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields */ - @Argument(fullName = "filteredrecordsmergetype", shortName = "filteredRecordsMergeType", doc = "Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required = false, exclusiveOf = "", validation = "") - var filteredrecordsmergetype: Option[String] = config("filteredrecordsmergetype") - - /** Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel) */ - @Argument(fullName = "multipleallelesmergetype", shortName = "multipleAllelesMergeType", doc = "Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required = false, exclusiveOf = "", validation = "") - var multipleallelesmergetype: Option[String] = config("multipleallelesmergetype") - - /** Ordered list specifying priority for merging */ - @Argument(fullName = "rod_priority_list", shortName = "priority", doc = "Ordered list specifying priority for merging", required = false, exclusiveOf = "", validation = "") - var rod_priority_list: Option[String] = config("rod_priority_list") - - /** Emit interesting sites requiring complex compatibility merging to file */ - @Argument(fullName = "printComplexMerges", shortName = "printComplexMerges", doc = "Emit interesting sites requiring complex compatibility merging to file", required = false, exclusiveOf = "", validation = "") - var printComplexMerges: Boolean = config("printComplexMerges", default = false) - - /** Treat filtered variants as uncalled */ - @Argument(fullName = "filteredAreUncalled", shortName = "filteredAreUncalled", doc = "Treat filtered variants as uncalled", required = false, exclusiveOf = "", validation = "") - var filteredAreUncalled: Boolean = config("filteredAreUncalled", default = false) - - /** Emit a sites-only file */ - @Argument(fullName = "minimalVCF", shortName = "minimalVCF", doc = "Emit a sites-only file", required = false, exclusiveOf = "", validation = "") - var minimalVCF: Boolean = config("minimalVCF", default = false) - - /** Exclude sites where no variation is present after merging */ - @Argument(fullName = "excludeNonVariants", shortName = "env", doc = "Exclude sites where no variation is present after merging", required = false, exclusiveOf = "", validation = "") - var excludeNonVariants: Boolean = config("excludeNonVariants", default = false) - - /** Key name for the set attribute */ - @Argument(fullName = "setKey", shortName = "setKey", doc = "Key name for the set attribute", required = false, exclusiveOf = "", validation = "") - var setKey: Option[String] = config("set_key") - - /** Assume input VCFs have identical sample sets and disjoint calls */ - @Argument(fullName = "assumeIdenticalSamples", shortName = "assumeIdenticalSamples", doc = "Assume input VCFs have identical sample sets and disjoint calls", required = false, exclusiveOf = "", validation = "") - var assumeIdenticalSamples: Boolean = config("assumeIdenticalSamples", default = false) - - /** Minimum number of input files the site must be observed in to be included */ - @Argument(fullName = "minimumN", shortName = "minN", doc = "Minimum number of input files the site must be observed in to be included", required = false, exclusiveOf = "", validation = "") - var minimumN: Option[Int] = config("minimumN") - - /** Do not output the command line to the header */ - @Argument(fullName = "suppressCommandLineHeader", shortName = "suppressCommandLineHeader", doc = "Do not output the command line to the header", required = false, exclusiveOf = "", validation = "") - var suppressCommandLineHeader: Boolean = config("suppressCommandLineHeader", default = false) - - /** Use the INFO content of the record with the highest AC */ - @Argument(fullName = "mergeInfoWithMaxAC", shortName = "mergeInfoWithMaxAC", doc = "Use the INFO content of the record with the highest AC", required = false, exclusiveOf = "", validation = "") - var mergeInfoWithMaxAC: Boolean = config("mergeInfoWithMaxAC", default = false) - - /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ - @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") - var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) - - /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ - @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") - var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) - - /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ - @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") - var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) - - @Output - @Gather(enabled = false) - private var outputIndex: File = _ - - override def beforeGraph() { - super.beforeGraph() - deps ++= variant.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) - if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) - outputIndex = VcfUtils.getVcfIndexFile(out) - } - - override def cmdLine = super.cmdLine + - repeat("-V", variant, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + - optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + - optional("-genotypeMergeOptions", genotypemergeoption, spaceSeparated = true, escape = true, format = "%s") + - optional("-filteredRecordsMergeType", filteredrecordsmergetype, spaceSeparated = true, escape = true, format = "%s") + - optional("-multipleAllelesMergeType", multipleallelesmergetype, spaceSeparated = true, escape = true, format = "%s") + - optional("-priority", rod_priority_list, spaceSeparated = true, escape = true, format = "%s") + - conditional(printComplexMerges, "-printComplexMerges", escape = true, format = "%s") + - conditional(filteredAreUncalled, "-filteredAreUncalled", escape = true, format = "%s") + - conditional(minimalVCF, "-minimalVCF", escape = true, format = "%s") + - conditional(excludeNonVariants, "-env", escape = true, format = "%s") + - optional("-setKey", setKey, spaceSeparated = true, escape = true, format = "%s") + - conditional(assumeIdenticalSamples, "-assumeIdenticalSamples", escape = true, format = "%s") + - optional("-minN", minimumN, spaceSeparated = true, escape = true, format = "%s") + - conditional(suppressCommandLineHeader, "-suppressCommandLineHeader", escape = true, format = "%s") + - conditional(mergeInfoWithMaxAC, "-mergeInfoWithMaxAC", escape = true, format = "%s") + - conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + - conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + - conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") -} - -object CombineVariants { - def apply(root: Configurable, input: List[File], output: File): CombineVariants = { - val cv = new CombineVariants(root) - cv.variant = input - cv.out = output - cv - } -} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala deleted file mode 100644 index b8c3e6ba238836cf5a1c6f71a534a0359275e8c8..0000000000000000000000000000000000000000 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala +++ /dev/null @@ -1,44 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.gatk.engine.phonehome.GATKRunReport - -/** - * @deprecated - */ -trait GatkGeneral extends org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK with CommandLineResources with Reference with Version { - var executable: String = config("java", default = "java", namespace = "java", freeVar = false) - - override def subPath = "gatk" :: super.subPath - - jarFile = config("gatk_jar") - - reference_sequence = referenceFasta() - - override def defaultCoreMemory = 4.0 - override def faiRequired = true - override def dictRequired = true - - if (config.contains("intervals")) intervals = config("intervals").asFileList - if (config.contains("exclude_intervals")) excludeIntervals = config("exclude_intervals").asFileList - - Option(config("et").value) match { - case Some("NO_ET") => et = GATKRunReport.PhoneHomeOption.NO_ET - case Some("AWS") => et = GATKRunReport.PhoneHomeOption.AWS - case Some("STDOUT") => et = GATKRunReport.PhoneHomeOption.STDOUT - case Some(x) => throw new IllegalArgumentException(s"Unknown et option for gatk: $x") - case _ => - } - - if (config.contains("gatk_key")) gatk_key = config("gatk_key") - if (config.contains("pedigree")) pedigree = config("pedigree") - - def versionRegex = """(.*)""".r - override def versionExitcode = List(0, 1) - def versionCommand = "java" + " -jar " + jarFile + " -version" - - override def getVersion = { - BiopetCommandLineFunction.preProcessExecutable(executable).path.foreach(executable = _) - super.getVersion.collect { case v => "Gatk " + v } - } -} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/SelectVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/SelectVariants.scala deleted file mode 100644 index dd49dd75da5fb6d91f5cc0826338796799a198c3..0000000000000000000000000000000000000000 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/SelectVariants.scala +++ /dev/null @@ -1,262 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk.broad - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile -import nl.lumc.sasc.biopet.core.ScatterGatherableFunction -import nl.lumc.sasc.biopet.utils.VcfUtils -import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } - -class SelectVariants(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction { - def analysis_type = "SelectVariants" - scatterClass = classOf[LocusScatterFunction] - setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } - - /** Input VCF file */ - @Input(fullName = "variant", shortName = "V", doc = "Input VCF file", required = true, exclusiveOf = "", validation = "") - var variant: File = _ - - /** Output variants not called in this comparison track */ - @Input(fullName = "discordance", shortName = "disc", doc = "Output variants not called in this comparison track", required = false, exclusiveOf = "", validation = "") - var discordance: Option[File] = None - - /** Output variants also called in this comparison track */ - @Input(fullName = "concordance", shortName = "conc", doc = "Output variants also called in this comparison track", required = false, exclusiveOf = "", validation = "") - var concordance: Option[File] = None - - /** File to which variants should be written */ - @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "") - @Gather(classOf[CatVariantsGatherer]) - var out: File = _ - - /** Include genotypes from this sample */ - @Argument(fullName = "sample_name", shortName = "sn", doc = "Include genotypes from this sample", required = false, exclusiveOf = "", validation = "") - var sample_name: List[String] = config("sample_name", default = Nil) - - /** Regular expression to select multiple samples */ - @Argument(fullName = "sample_expressions", shortName = "se", doc = "Regular expression to select multiple samples", required = false, exclusiveOf = "", validation = "") - var sample_expressions: List[String] = config("sample_expressions", default = Nil) - - /** File containing a list of samples to include */ - @Input(fullName = "sample_file", shortName = "sf", doc = "File containing a list of samples to include", required = false, exclusiveOf = "", validation = "") - var sample_file: List[File] = config("sample_file", default = Nil) - - /** Exclude genotypes from this sample */ - @Argument(fullName = "exclude_sample_name", shortName = "xl_sn", doc = "Exclude genotypes from this sample", required = false, exclusiveOf = "", validation = "") - var exclude_sample_name: List[String] = config("exclude_sample_name", default = Nil) - - /** List of samples to exclude */ - @Input(fullName = "exclude_sample_file", shortName = "xl_sf", doc = "List of samples to exclude", required = false, exclusiveOf = "", validation = "") - var exclude_sample_file: List[File] = config("exclude_sample_file", default = Nil) - - /** List of sample expressions to exclude */ - @Input(fullName = "exclude_sample_expressions", shortName = "xl_se", doc = "List of sample expressions to exclude", required = false, exclusiveOf = "", validation = "") - var exclude_sample_expressions: List[File] = config("exclude_sample_expressions", default = Nil) - - /** One or more criteria to use when selecting the data */ - @Argument(fullName = "selectexpressions", shortName = "select", doc = "One or more criteria to use when selecting the data", required = false, exclusiveOf = "", validation = "") - var selectexpressions: List[String] = config("selectexpressions", default = Nil) - - /** Invert the selection criteria for -select */ - @Argument(fullName = "invertselect", shortName = "invertSelect", doc = "Invert the selection criteria for -select", required = false, exclusiveOf = "", validation = "") - var invertselect: Boolean = config("invertselect", default = false) - - /** Don't include non-variant sites */ - @Argument(fullName = "excludeNonVariants", shortName = "env", doc = "Don't include non-variant sites", required = false, exclusiveOf = "", validation = "") - var excludeNonVariants: Boolean = config("excludeNonVariants", default = false) - - /** Don't include filtered sites */ - @Argument(fullName = "excludeFiltered", shortName = "ef", doc = "Don't include filtered sites", required = false, exclusiveOf = "", validation = "") - var excludeFiltered: Boolean = config("excludeFiltered", default = false) - - /** Preserve original alleles, do not trim */ - @Argument(fullName = "preserveAlleles", shortName = "noTrim", doc = "Preserve original alleles, do not trim", required = false, exclusiveOf = "", validation = "") - var preserveAlleles: Boolean = config("preserveAlleles", default = false) - - /** Remove alternate alleles not present in any genotypes */ - @Argument(fullName = "removeUnusedAlternates", shortName = "trimAlternates", doc = "Remove alternate alleles not present in any genotypes", required = false, exclusiveOf = "", validation = "") - var removeUnusedAlternates: Boolean = config("removeUnusedAlternates", default = false) - - /** Select only variants of a particular allelicity */ - @Argument(fullName = "restrictAllelesTo", shortName = "restrictAllelesTo", doc = "Select only variants of a particular allelicity", required = false, exclusiveOf = "", validation = "") - var restrictAllelesTo: Option[String] = config("restrictAllelesTo") - - /** Store the original AC, AF, and AN values after subsetting */ - @Argument(fullName = "keepOriginalAC", shortName = "keepOriginalAC", doc = "Store the original AC, AF, and AN values after subsetting", required = false, exclusiveOf = "", validation = "") - var keepOriginalAC: Boolean = config("keepOriginalAC", default = false) - - /** Store the original DP value after subsetting */ - @Argument(fullName = "keepOriginalDP", shortName = "keepOriginalDP", doc = "Store the original DP value after subsetting", required = false, exclusiveOf = "", validation = "") - var keepOriginalDP: Boolean = config("keepOriginalDP", default = false) - - /** Output mendelian violation sites only */ - @Argument(fullName = "mendelianViolation", shortName = "mv", doc = "Output mendelian violation sites only", required = false, exclusiveOf = "", validation = "") - var mendelianViolation: Boolean = config("mendelianViolation", default = false) - - /** Output non-mendelian violation sites only */ - @Argument(fullName = "invertMendelianViolation", shortName = "invMv", doc = "Output non-mendelian violation sites only", required = false, exclusiveOf = "", validation = "") - var invertMendelianViolation: Boolean = config("invertMendelianViolation", default = false) - - /** Minimum GQ score for each trio member to accept a site as a violation */ - @Argument(fullName = "mendelianViolationQualThreshold", shortName = "mvq", doc = "Minimum GQ score for each trio member to accept a site as a violation", required = false, exclusiveOf = "", validation = "") - var mendelianViolationQualThreshold: Option[Double] = config("mendelianViolationQualThreshold") - - /** Format string for mendelianViolationQualThreshold */ - @Argument(fullName = "mendelianViolationQualThresholdFormat", shortName = "", doc = "Format string for mendelianViolationQualThreshold", required = false, exclusiveOf = "", validation = "") - var mendelianViolationQualThresholdFormat: String = "%s" - - /** Select a fraction of variants at random from the input */ - @Argument(fullName = "select_random_fraction", shortName = "fraction", doc = "Select a fraction of variants at random from the input", required = false, exclusiveOf = "", validation = "") - var select_random_fraction: Option[Double] = config("select_random_fraction") - - /** Format string for select_random_fraction */ - @Argument(fullName = "select_random_fractionFormat", shortName = "", doc = "Format string for select_random_fraction", required = false, exclusiveOf = "", validation = "") - var select_random_fractionFormat: String = "%s" - - /** Select a fraction of genotypes at random from the input and sets them to no-call */ - @Argument(fullName = "remove_fraction_genotypes", shortName = "fractionGenotypes", doc = "Select a fraction of genotypes at random from the input and sets them to no-call", required = false, exclusiveOf = "", validation = "") - var remove_fraction_genotypes: Option[Double] = config("remove_fraction_genotypes") - - /** Format string for remove_fraction_genotypes */ - @Argument(fullName = "remove_fraction_genotypesFormat", shortName = "", doc = "Format string for remove_fraction_genotypes", required = false, exclusiveOf = "", validation = "") - var remove_fraction_genotypesFormat: String = "%s" - - /** Select only a certain type of variants from the input file */ - @Argument(fullName = "selectTypeToInclude", shortName = "selectType", doc = "Select only a certain type of variants from the input file", required = false, exclusiveOf = "", validation = "") - var selectTypeToInclude: List[String] = config("selectTypeToInclude", default = Nil) - - /** Do not select certain type of variants from the input file */ - @Argument(fullName = "selectTypeToExclude", shortName = "xlSelectType", doc = "Do not select certain type of variants from the input file", required = false, exclusiveOf = "", validation = "") - var selectTypeToExclude: Seq[String] = config("selectTypeToExclude", default = Nil) - - /** List of variant IDs to select */ - @Input(fullName = "keepIDs", shortName = "IDs", doc = "List of variant IDs to select", required = false, exclusiveOf = "", validation = "") - var keepIDs: Option[File] = config("keepIDs") - - /** List of variant IDs to select */ - @Argument(fullName = "excludeIDs", shortName = "xlIDs", doc = "List of variant IDs to select", required = false, exclusiveOf = "", validation = "") - var excludeIDs: Option[File] = config("excludeIDs") - - /** If true, the incoming VariantContext will be fully decoded */ - @Argument(fullName = "fullyDecode", shortName = "", doc = "If true, the incoming VariantContext will be fully decoded", required = false, exclusiveOf = "", validation = "") - var fullyDecode: Boolean = config("fullyDecode", default = false) - - /** If true, we won't actually write the output file. For efficiency testing only */ - @Argument(fullName = "justRead", shortName = "", doc = "If true, we won't actually write the output file. For efficiency testing only", required = false, exclusiveOf = "", validation = "") - var justRead: Boolean = config("justRead", default = false) - - /** Maximum size of indels to include */ - @Argument(fullName = "maxIndelSize", shortName = "", doc = "Maximum size of indels to include", required = false, exclusiveOf = "", validation = "") - var maxIndelSize: Option[Int] = config("maxIndelSize") - - /** Minimum size of indels to include */ - @Argument(fullName = "minIndelSize", shortName = "", doc = "Minimum size of indels to include", required = false, exclusiveOf = "", validation = "") - var minIndelSize: Option[Int] = config("minIndelSize") - - /** Maximum number of samples filtered at the genotype level */ - @Argument(fullName = "maxFilteredGenotypes", shortName = "", doc = "Maximum number of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "") - var maxFilteredGenotypes: Option[Int] = config("maxFilteredGenotypes") - - /** Minimum number of samples filtered at the genotype level */ - @Argument(fullName = "minFilteredGenotypes", shortName = "", doc = "Minimum number of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "") - var minFilteredGenotypes: Option[Int] = config("minFilteredGenotypes") - - /** Maximum fraction of samples filtered at the genotype level */ - @Argument(fullName = "maxFractionFilteredGenotypes", shortName = "", doc = "Maximum fraction of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "") - var maxFractionFilteredGenotypes: Option[Double] = config("maxFractionFilteredGenotypes") - - /** Format string for maxFractionFilteredGenotypes */ - @Argument(fullName = "maxFractionFilteredGenotypesFormat", shortName = "", doc = "Format string for maxFractionFilteredGenotypes", required = false, exclusiveOf = "", validation = "") - var maxFractionFilteredGenotypesFormat: String = "%s" - - /** Maximum fraction of samples filtered at the genotype level */ - @Argument(fullName = "minFractionFilteredGenotypes", shortName = "", doc = "Maximum fraction of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "") - var minFractionFilteredGenotypes: Option[Double] = config("minFractionFilteredGenotypes") - - /** Format string for minFractionFilteredGenotypes */ - @Argument(fullName = "minFractionFilteredGenotypesFormat", shortName = "", doc = "Format string for minFractionFilteredGenotypes", required = false, exclusiveOf = "", validation = "") - var minFractionFilteredGenotypesFormat: String = "%s" - - /** Set filtered genotypes to no-call */ - @Argument(fullName = "setFilteredGtToNocall", shortName = "", doc = "Set filtered genotypes to no-call", required = false, exclusiveOf = "", validation = "") - var setFilteredGtToNocall: Boolean = config("setFilteredGtToNocall", default = false) - - /** Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored. */ - @Argument(fullName = "ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", shortName = "", doc = "Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.", required = false, exclusiveOf = "", validation = "") - var ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES: Boolean = config("ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", default = false) - - /** Forces output VCF to be compliant to up-to-date version */ - @Argument(fullName = "forceValidOutput", shortName = "", doc = "Forces output VCF to be compliant to up-to-date version", required = false, exclusiveOf = "", validation = "") - var forceValidOutput: Boolean = config("forceValidOutput", default = false) - - /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ - @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "") - var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) - - /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ - @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "") - var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) - - /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ - @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "") - var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) - - @Output - @Gather(enabled = false) - private var outputIndex: File = _ - - override def beforeGraph() { - super.beforeGraph() - if (variant != null) - deps :+= VcfUtils.getVcfIndexFile(variant) - discordance.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) - concordance.foreach(deps :+= VcfUtils.getVcfIndexFile(_)) - if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) - outputIndex = VcfUtils.getVcfIndexFile(out) - } - - override def cmdLine = super.cmdLine + - required(TaggedFile.formatCommandLineParameter("-V", variant), variant, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-disc", discordance), discordance, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-conc", concordance), concordance, spaceSeparated = true, escape = true, format = "%s") + - optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + - repeat("-sn", sample_name, spaceSeparated = true, escape = true, format = "%s") + - repeat("-se", sample_expressions, spaceSeparated = true, escape = true, format = "%s") + - repeat("-sf", sample_file, spaceSeparated = true, escape = true, format = "%s") + - repeat("-xl_sn", exclude_sample_name, spaceSeparated = true, escape = true, format = "%s") + - repeat("-xl_sf", exclude_sample_file, spaceSeparated = true, escape = true, format = "%s") + - repeat("-xl_se", exclude_sample_expressions, spaceSeparated = true, escape = true, format = "%s") + - repeat("-select", selectexpressions, spaceSeparated = true, escape = true, format = "%s") + - conditional(invertselect, "-invertSelect", escape = true, format = "%s") + - conditional(excludeNonVariants, "-env", escape = true, format = "%s") + - conditional(excludeFiltered, "-ef", escape = true, format = "%s") + - conditional(preserveAlleles, "-noTrim", escape = true, format = "%s") + - conditional(removeUnusedAlternates, "-trimAlternates", escape = true, format = "%s") + - optional("-restrictAllelesTo", restrictAllelesTo, spaceSeparated = true, escape = true, format = "%s") + - conditional(keepOriginalAC, "-keepOriginalAC", escape = true, format = "%s") + - conditional(keepOriginalDP, "-keepOriginalDP", escape = true, format = "%s") + - conditional(mendelianViolation, "-mv", escape = true, format = "%s") + - conditional(invertMendelianViolation, "-invMv", escape = true, format = "%s") + - optional("-mvq", mendelianViolationQualThreshold, spaceSeparated = true, escape = true, format = mendelianViolationQualThresholdFormat) + - optional("-fraction", select_random_fraction, spaceSeparated = true, escape = true, format = select_random_fractionFormat) + - optional("-fractionGenotypes", remove_fraction_genotypes, spaceSeparated = true, escape = true, format = remove_fraction_genotypesFormat) + - repeat("-selectType", selectTypeToInclude, spaceSeparated = true, escape = true, format = "%s") + - repeat("-xlSelectType", selectTypeToExclude, spaceSeparated = true, escape = true, format = "%s") + - optional("-IDs", keepIDs, spaceSeparated = true, escape = true, format = "%s") + - optional("-xlIDs", excludeIDs, spaceSeparated = true, escape = true, format = "%s") + - conditional(fullyDecode, "--fullyDecode", escape = true, format = "%s") + - conditional(justRead, "--justRead", escape = true, format = "%s") + - optional("--maxIndelSize", maxIndelSize, spaceSeparated = true, escape = true, format = "%s") + - optional("--minIndelSize", minIndelSize, spaceSeparated = true, escape = true, format = "%s") + - optional("--maxFilteredGenotypes", maxFilteredGenotypes, spaceSeparated = true, escape = true, format = "%s") + - optional("--minFilteredGenotypes", minFilteredGenotypes, spaceSeparated = true, escape = true, format = "%s") + - optional("--maxFractionFilteredGenotypes", maxFractionFilteredGenotypes, spaceSeparated = true, escape = true, format = maxFractionFilteredGenotypesFormat) + - optional("--minFractionFilteredGenotypes", minFractionFilteredGenotypes, spaceSeparated = true, escape = true, format = minFractionFilteredGenotypesFormat) + - conditional(setFilteredGtToNocall, "--setFilteredGtToNocall", escape = true, format = "%s") + - conditional(ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES, "--ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", escape = true, format = "%s") + - conditional(forceValidOutput, "--forceValidOutput", escape = true, format = "%s") + - conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + - conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + - conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") -} diff --git a/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala index 6e2aa683f8e6e2abe31e2e8307d71db8c41c5258..d7c40fb76197f77ddb944803c113b65cf124a0bf 100644 --- a/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala +++ b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala @@ -30,8 +30,8 @@ import nl.lumc.sasc.biopet.extensions.picard.CreateSequenceDictionary import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFaidx import nl.lumc.sasc.biopet.utils.ConfigUtils import org.broadinstitute.gatk.queue.QScript -import scala.language.reflectiveCalls +import scala.language.reflectiveCalls import scala.collection.JavaConversions._ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript { @@ -173,7 +173,7 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript genomeConfig.get("dbsnp_vcf_uri").foreach { dbsnpUri => val cv = new CombineVariants(this) - cv.reference = fastaFile + cv.reference_sequence = fastaFile cv.deps ::= createDict.output def addDownload(uri: String): Unit = { val curl = new Curl(this) @@ -181,7 +181,7 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript curl.output = new File(annotationDir, new File(curl.url).getName) curl.isIntermediate = true add(curl) - cv.inputFiles ::= curl.output + cv.variant :+= curl.output val tabix = new Tabix(this) tabix.input = curl.output @@ -198,7 +198,7 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript case _ => addDownload(dbsnpUri.toString) } - cv.outputFile = new File(annotationDir, "dbsnp.vcf.gz") + cv.out = new File(annotationDir, "dbsnp.vcf.gz") add(cv) } diff --git a/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala b/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala index afc6bbdc3ba737db63f5c4270009de0a60b8deaa..d2303ac3e014110652af209dbaee180565405ca0 100644 --- a/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala +++ b/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala @@ -82,10 +82,10 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R gensToVcf.outputVcf = new File(outputDirGens, gen._1.genotypes.getName + s".${gen._2}.vcf.gz") gensToVcf.isIntermediate = true add(gensToVcf) - cvChr.inputFiles :+= gensToVcf.outputVcf + cvChr.variant :+= gensToVcf.outputVcf } add(cvChr) - cvTotal.inputFiles :+= cvChr.outputFile + cvTotal.variant :+= cvChr.outputFile contig -> cvChr.outputFile } add(cvTotal) @@ -105,14 +105,14 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R bedFile.deleteOnExit() val sv = new SelectVariants(this) - sv.inputFiles :+= chrVcfFiles.getOrElse(region.chr, vcfFile) - sv.outputFile = new File(regionDir, s"$name.vcf.gz") + sv.variant = chrVcfFiles.getOrElse(region.chr, vcfFile) + sv.out = new File(regionDir, s"$name.vcf.gz") sv.intervals :+= bedFile sv.isIntermediate = true add(sv) val snptest = new Snptest(this) - snptest.inputGenotypes :+= sv.outputFile + snptest.inputGenotypes :+= sv.out snptest.inputSampleFiles :+= phenotypeFile snptest.outputFile = Some(new File(regionDir, s"$name.snptest")) add(snptest) @@ -127,7 +127,7 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R } val cv = new CatVariants(this) - cv.inputFiles = snpTests.map(_._2).toList + cv.variant = snpTests.map(_._2).toList cv.outputFile = new File(outputDir, "snptest" + File.separator + "snptest.vcf.gz") add(cv) } diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala index 7bebb491a55a52b7b77ce8141f11f162ab7eb643..ed0e1318d96c615346172b5e1add0df6dc4476d0 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala @@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.pipelines.shiva import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference } import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension -import nl.lumc.sasc.biopet.extensions.gatk.broad._ +import nl.lumc.sasc.biopet.extensions.gatk._ import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait import nl.lumc.sasc.biopet.pipelines.toucan.Toucan diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala index 6020412479bf3331a35ff69c82327c212d889352..c38cb8564ddb0f9243443bab60f6cb41ceec5b53 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala @@ -26,6 +26,7 @@ import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.{ VarscanCnsSingleSamp import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile /** * Implementation of ShivaVariantcalling @@ -86,10 +87,10 @@ class ShivaVariantcalling(val root: Configurable) extends QScript require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", ")) val cv = new CombineVariants(qscript) - cv.outputFile = finalFile - cv.setKey = "VariantCaller" - cv.genotypeMergeOptions = Some("PRIORITIZE") - cv.rodPriorityList = callers.map(_.name).mkString(",") + cv.out = finalFile + cv.setKey = Some("VariantCaller") + cv.genotypemergeoption = Some("PRIORITIZE") + cv.rod_priority_list = Some(callers.map(_.name).mkString(",")) for (caller <- callers) { caller.inputBams = inputBams caller.namePrefix = namePrefix @@ -110,17 +111,17 @@ class ShivaVariantcalling(val root: Configurable) extends QScript vtDecompose.inputVcf = vtNormalize.outputVcf vtDecompose.outputVcf = swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz") add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) - cv.addInput(vtDecompose.outputVcf, caller.name) + cv.variant :+= TaggedFile(vtDecompose.outputVcf, caller.name) } else if (normalize && !decompose) { vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz") add(vtNormalize, Tabix(this, vtNormalize.outputVcf)) - cv.addInput(vtNormalize.outputVcf, caller.name) + cv.variant :+= TaggedFile(vtNormalize.outputVcf, caller.name) } else if (!normalize && decompose) { vtDecompose.inputVcf = caller.outputFile vtDecompose.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz") add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) - cv.addInput(vtDecompose.outputVcf, caller.name) - } else cv.addInput(caller.outputFile, caller.name) + cv.variant :+= TaggedFile(vtDecompose.outputVcf, caller.name) + } else cv.variant :+= TaggedFile(caller.outputFile, caller.name) } add(cv) @@ -139,9 +140,9 @@ class ShivaVariantcalling(val root: Configurable) extends QScript referenceVcf.foreach(referenceVcfFile => { val gc = new GenotypeConcordance(this) - gc.evalFile = vcfFile - gc.compFile = referenceVcfFile - gc.outputFile = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt") + gc.eval = vcfFile + gc.comp = referenceVcfFile + gc.out = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt") referenceVcfRegions.foreach(gc.intervals ::= _) add(gc) addSummarizable(gc, s"$namePrefix-genotype_concordance-$name") diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala index 98fe0e0a06342cee60db461acc33f1a64b5c23b2..91f8468b189b878d756554782b956a8a0037ceef 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala @@ -41,7 +41,7 @@ class Delly(val root: Configurable) extends SvCaller { delly.analysistype = "DEL" delly.outputvcf = new File(dellyDir, sample + ".delly.del.vcf") add(delly) - catVariants.inputFiles :+= delly.outputvcf + catVariants.variant :+= delly.outputvcf } if (dup) { val delly = new DellyCaller(this) @@ -49,7 +49,7 @@ class Delly(val root: Configurable) extends SvCaller { delly.analysistype = "DUP" delly.outputvcf = new File(dellyDir, sample + ".delly.dup.vcf") add(delly) - catVariants.inputFiles :+= delly.outputvcf + catVariants.variant :+= delly.outputvcf } if (inv) { val delly = new DellyCaller(this) @@ -57,18 +57,18 @@ class Delly(val root: Configurable) extends SvCaller { delly.analysistype = "INV" delly.outputvcf = new File(dellyDir, sample + ".delly.inv.vcf") add(delly) - catVariants.inputFiles :+= delly.outputvcf + catVariants.variant :+= delly.outputvcf } if (tra) { val delly = new DellyCaller(this) delly.input = bamFile delly.analysistype = "TRA" delly.outputvcf = new File(dellyDir, sample + ".delly.tra.vcf") - catVariants.inputFiles :+= delly.outputvcf + catVariants.variant :+= delly.outputvcf add(delly) } - require(catVariants.inputFiles.nonEmpty, "Must atleast 1 SV-type be selected for Delly") + require(catVariants.variant.nonEmpty, "Must atleast 1 SV-type be selected for Delly") add(catVariants) addVCF(sample, catVariants.outputFile) diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCaller.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCaller.scala index cd039f946a182b70c23d715b9efc3cf57960f2b4..1224592eb7fb66eb4075eb9aff9215379d6553c4 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCaller.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCaller.scala @@ -5,7 +5,7 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers -import nl.lumc.sasc.biopet.extensions.gatk.broad +import nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.utils.config.Configurable /** Default mode for the haplotypecaller */ @@ -14,7 +14,7 @@ class HaplotypeCaller(val root: Configurable) extends Variantcaller { protected def defaultPrio = 1 def biopetScript() { - val hc = broad.HaplotypeCaller(this, inputBams.values.toList, outputFile) + val hc = gatk.HaplotypeCaller(this, inputBams.values.toList, outputFile) add(hc) } } diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerAllele.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerAllele.scala index 3de2234b78317c0182aaf8db6a163c087b4afe34..09e7b5e0286fee0da538c23ce9d8b5f639df1555 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerAllele.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerAllele.scala @@ -5,7 +5,7 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers -import nl.lumc.sasc.biopet.extensions.gatk.broad +import nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.utils.config.Configurable /** Allele mode for Haplotypecaller */ @@ -14,7 +14,7 @@ class HaplotypeCallerAllele(val root: Configurable) extends Variantcaller { protected def defaultPrio = 5 def biopetScript() { - val hc = broad.HaplotypeCaller(this, inputBams.values.toList, outputFile) + val hc = gatk.HaplotypeCaller(this, inputBams.values.toList, outputFile) hc.alleles = config("input_alleles") hc.genotyping_mode = Some("GENOTYPE_GIVEN_ALLELES") add(hc) diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala index 6a0cb91c9b4c3131d0479b853071d5f6826b0513..2f7b8446b4c0a64b1348edb396f43fedcaaab4d3 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala @@ -5,7 +5,7 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers -import nl.lumc.sasc.biopet.extensions.gatk.broad +import nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.utils.config.Configurable /** Gvcf mode for haplotypecaller */ @@ -15,12 +15,12 @@ class HaplotypeCallerGvcf(val root: Configurable) extends Variantcaller { def biopetScript() { val gvcfFiles = for ((sample, inputBam) <- inputBams) yield { - val hc = broad.HaplotypeCaller.gvcf(this, inputBam, new File(outputDir, sample + ".gvcf.vcf.gz")) + val hc = gatk.HaplotypeCaller.gvcf(this, inputBam, new File(outputDir, sample + ".gvcf.vcf.gz")) add(hc) hc.out } - val genotypeGVCFs = broad.GenotypeGVCFs(this, gvcfFiles.toList, outputFile) + val genotypeGVCFs = gatk.GenotypeGVCFs(this, gvcfFiles.toList, outputFile) add(genotypeGVCFs) } } diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala index 847e671166191da3153cc2df818828c66de37aa1..ec46b9c348b3761786195777057852a96f6b214f 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala @@ -15,11 +15,9 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers -import java.io.File - import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup -import nl.lumc.sasc.biopet.extensions.tools.{ VcfFilter, MpileupToVcf } +import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter } import nl.lumc.sasc.biopet.utils.config.Configurable /** Makes a vcf file from a mpileup without statistics */ @@ -60,9 +58,9 @@ class RawVcf(val root: Configurable) extends Variantcaller { } val cv = new CombineVariants(this) - cv.inputFiles = rawFiles.toList - cv.outputFile = outputFile - cv.setKey = "null" + cv.variant = rawFiles.toList + cv.out = outputFile + cv.setKey = Some("null") cv.excludeNonVariants = !keepRefCalls add(cv) } diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyper.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyper.scala index 96c3821bcb1343163507d9b0e9a950f477a47c6c..43fbe730d4b585edacff62b7a8388a5c82fbe062 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyper.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyper.scala @@ -5,7 +5,7 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers -import nl.lumc.sasc.biopet.extensions.gatk.broad +import nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.utils.config.Configurable /** Default mode for UnifiedGenotyper */ @@ -14,7 +14,7 @@ class UnifiedGenotyper(val root: Configurable) extends Variantcaller { protected def defaultPrio = 20 def biopetScript() { - val ug = broad.UnifiedGenotyper(this, inputBams.values.toList, outputFile) + val ug = gatk.UnifiedGenotyper(this, inputBams.values.toList, outputFile) add(ug) } } diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyperAllele.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyperAllele.scala index 8ffdcd962107840ccfbf175310c70035c8c668a1..364691f517c7434a39dc498a58ac1349e7e46d2f 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyperAllele.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyperAllele.scala @@ -5,7 +5,7 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers -import nl.lumc.sasc.biopet.extensions.gatk.broad +import nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.utils.config.Configurable /** Allele mode for GenotyperAllele */ @@ -14,7 +14,7 @@ class UnifiedGenotyperAllele(val root: Configurable) extends Variantcaller { protected def defaultPrio = 9 def biopetScript() { - val ug = broad.UnifiedGenotyper(this, inputBams.values.toList, outputFile) + val ug = gatk.UnifiedGenotyper(this, inputBams.values.toList, outputFile) ug.alleles = config("input_alleles") ug.genotyping_mode = Some("GENOTYPE_GIVEN_ALLELES") add(ug) diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala index 9a0fb2839413948de68d3d16101fc4ce912df5b3..cb213f28e6485c04c7bf8f76a7293cf062516d8a 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala @@ -35,7 +35,8 @@ class VarscanCnsSingleSample(val root: Configurable) extends Variantcaller { "disable_baq" -> true, "depth" -> 1000000 ), - "varscanmpileup2cns" -> Map("strand_filter" -> 0) + "varscanmpileup2cns" -> Map("strand_filter" -> 0), + "combinevariants" -> Map("scattercount" -> 20) ) override def fixedValues = Map( @@ -67,9 +68,9 @@ class VarscanCnsSingleSample(val root: Configurable) extends Variantcaller { } val cv = new CombineVariants(this) - cv.inputFiles = sampleVcfs - cv.outputFile = outputFile - cv.setKey = "null" + cv.variant = sampleVcfs + cv.out = outputFile + cv.setKey = Some("null") cv.excludeNonVariants = true add(cv) } diff --git a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala index 736cff399b1dc09d41ea1df8cc6adfc697503d44..eb1d40ece24ce6862023eae3343b84c840ea3e42 100644 --- a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala +++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala @@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.pipelines.shiva import java.io.{ File, FileOutputStream } import com.google.common.io.Files -import nl.lumc.sasc.biopet.extensions.gatk.broad._ +import nl.lumc.sasc.biopet.extensions.gatk.{ BaseRecalibrator, IndelRealigner, PrintReads, RealignerTargetCreator } import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates import nl.lumc.sasc.biopet.extensions.tools.VcfStats import nl.lumc.sasc.biopet.utils.ConfigUtils diff --git a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala index f85137cc27a87069021752d42517e3fe6685359a..8c9dcb1e5496d3e43792bcc83d56644396eaeee6 100644 --- a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala +++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala @@ -11,16 +11,14 @@ import com.google.common.io.Files import nl.lumc.sasc.biopet.core.BiopetPipe import nl.lumc.sasc.biopet.extensions.Freebayes import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge } +import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, HaplotypeCaller, UnifiedGenotyper } import nl.lumc.sasc.biopet.utils.config.Config -import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants -import nl.lumc.sasc.biopet.extensions.gatk.broad.{ HaplotypeCaller, UnifiedGenotyper } import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats } import nl.lumc.sasc.biopet.utils.ConfigUtils -import org.apache.commons.io.FileUtils import org.broadinstitute.gatk.queue.QSettings import org.scalatest.Matchers import org.scalatest.testng.TestNGSuite -import org.testng.annotations.{ AfterClass, DataProvider, Test } +import org.testng.annotations.{ DataProvider, Test } import scala.collection.mutable.ListBuffer