diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala deleted file mode 100644 index ceceed5f64ba51a75b098e4cd1b18beaa4f1894d..0000000000000000000000000000000000000000 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.core.{ Reference, BiopetJavaCommandLineFunction } -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Input, Output } - -class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction with Reference { - - javaMainClass = classOf[org.broadinstitute.gatk.tools.CatVariants].getName - - @Input(required = true) - var inputFiles: List[File] = Nil - - @Output(required = true) - var outputFile: File = null - - @Input - var reference: File = null - - var assumeSorted = false - - override def beforeGraph(): Unit = { - super.beforeGraph() - if (reference == null) reference = referenceFasta() - } - - override def cmdLine = super.cmdLine + - repeat("-V", inputFiles) + - required("-out", outputFile) + - required("-R", reference) + - conditional(assumeSorted, "--assumeSorted") -} - -object CatVariants { - def apply(root: Configurable, input: List[File], output: File): CatVariants = { - val cv = new CatVariants(root) - cv.inputFiles = input - cv.outputFile = output - cv - } -} \ No newline at end of file diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala deleted file mode 100644 index 343e2d769656dd6800d4cd552f51aa25cec7d28c..0000000000000000000000000000000000000000 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala +++ /dev/null @@ -1,80 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.utils.VcfUtils -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output } - -/** - * Extension for CombineVariants from GATK - * - * Created by pjvan_thof on 2/26/15. - * - * @deprecated - */ -class CombineVariants(val root: Configurable) extends Gatk { - val analysisType = "CombineVariants" - - @Input(doc = "", required = true) - var inputFiles: List[File] = Nil - - @Output(doc = "", required = true) - var outputFile: File = null - - var setKey: String = null - var rodPriorityList: String = null - var minimumN: Int = config("minimumN", default = 1) - var genotypeMergeOptions: Option[String] = config("genotypeMergeOptions") - var excludeNonVariants: Boolean = false - - var inputMap: Map[File, String] = Map() - - def addInput(file: File, name: String): Unit = { - inputFiles :+= file - inputMap += file -> name - } - - @Output - @Gather(enabled = false) - private var outputIndex: File = _ - - override def beforeGraph(): Unit = { - super.beforeGraph() - outputIndex = VcfUtils.getVcfIndexFile(outputFile) - genotypeMergeOptions match { - case Some("UNIQUIFY") | Some("PRIORITIZE") | Some("UNSORTED") | Some("REQUIRE_UNIQUE") | None => - case _ => throw new IllegalArgumentException("Wrong option for genotypeMergeOptions") - } - deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi")) - deps = deps.distinct - } - - override def cmdLine = super.cmdLine + - (for (file <- inputFiles) yield { - inputMap.get(file) match { - case Some(name) => required("-V:" + name, file) - case _ => required("-V", file) - } - }).mkString + - required("-o", outputFile) + - optional("--setKey", setKey) + - optional("--rod_priority_list", rodPriorityList) + - optional("-genotypeMergeOptions", genotypeMergeOptions) + - conditional(excludeNonVariants, "--excludeNonVariants") -} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala deleted file mode 100644 index 92ca40e02d94e4935f3f1c031cf1371e4b77b8a2..0000000000000000000000000000000000000000 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.core.{ Version, BiopetJavaCommandLineFunction, Reference } -import org.broadinstitute.gatk.utils.commandline.Input - -/** - * General extension for GATK module - * - * Created by pjvan_thof on 2/26/15. - * - * @deprecated - */ -abstract class Gatk extends BiopetJavaCommandLineFunction with Reference with Version { - override def subPath = "gatk" :: super.subPath - - jarFile = config("gatk_jar") - - val analysisType: String - - override def defaultCoreMemory = 3.0 - - @Input(required = true) - var reference: File = null - - @Input(required = false) - var gatkKey: Option[File] = config("gatk_key") - - @Input(required = false) - var intervals: List[File] = config("intervals", default = Nil) - - @Input(required = false) - var excludeIntervals: List[File] = config("exclude_intervals", default = Nil) - - @Input(required = false) - var pedigree: List[File] = config("pedigree", default = Nil) - - var et: Option[String] = config("et") - - def versionRegex = """(.*)""".r - override def versionExitcode = List(0, 1) - def versionCommand = executable + " -jar " + jarFile + " -version" - - override def getVersion = super.getVersion.collect { case version => "Gatk " + version } - override def dictRequired = true - - override def beforeGraph(): Unit = { - super.beforeGraph() - if (reference == null) reference = referenceFasta() - } - - override def cmdLine = super.cmdLine + - required("-T", analysisType) + - required("-R", reference) + - optional("-K", gatkKey) + - optional("-et", et) + - repeat("-L", intervals) + - repeat("-XL", excludeIntervals) + - repeat("-ped", pedigree) -} \ No newline at end of file diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala deleted file mode 100644 index 62d2457de0431b8c28ddb3e2eae58352a4b39b23..0000000000000000000000000000000000000000 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala +++ /dev/null @@ -1,96 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.core.summary.Summarizable -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Input, Output } -import org.broadinstitute.gatk.utils.report.{ GATKReportTable, GATKReport } - -/** - * Extension for CombineVariants from GATK - * - * Created by pjvan_thof on 2/26/15. - * - * @deprecated - */ -class GenotypeConcordance(val root: Configurable) extends Gatk with Summarizable { - val analysisType = "GenotypeConcordance" - - @Input(required = true) - var evalFile: File = null - - @Input(required = true) - var compFile: File = null - - @Output(required = true) - var outputFile: File = null - - var moltenize = true - - def summaryFiles = Map("output" -> outputFile) - - def summaryStats = { - val report = new GATKReport(outputFile) - val compProportions = report.getTable("GenotypeConcordance_CompProportions") - val counts = report.getTable("GenotypeConcordance_Counts") - val evalProportions = report.getTable("GenotypeConcordance_EvalProportions") - val genotypeSummary = report.getTable("GenotypeConcordance_Summary") - val siteSummary = report.getTable("SiteConcordance_Summary") - - val samples = for (i <- 0 until genotypeSummary.getNumRows) yield genotypeSummary.get(i, "Sample").toString - - def getMap(table: GATKReportTable, column: String) = samples.distinct.map(sample => sample -> { - (for (i <- 0 until table.getNumRows if table.get(i, "Sample") == sample) yield s"${table.get(i, "Eval_Genotype")}__${table.get(i, "Comp_Genotype")}" -> table.get(i, column)).toMap - }).toMap - - Map( - "compProportions" -> getMap(compProportions, "Proportion"), - "counts" -> getMap(counts, "Count"), - "evalProportions" -> getMap(evalProportions, "Proportion"), - "genotypeSummary" -> samples.distinct.map(sample => { - val i = samples.indexOf(sample) - sample -> Map( - "Non-Reference_Discrepancy" -> genotypeSummary.get(i, "Non-Reference_Discrepancy"), - "Non-Reference_Sensitivity" -> genotypeSummary.get(i, "Non-Reference_Sensitivity"), - "Overall_Genotype_Concordance" -> genotypeSummary.get(i, "Overall_Genotype_Concordance") - ) - }).toMap, - "siteSummary" -> Map( - "ALLELES_MATCH" -> siteSummary.get(0, "ALLELES_MATCH"), - "EVAL_SUPERSET_TRUTH" -> siteSummary.get(0, "EVAL_SUPERSET_TRUTH"), - "EVAL_SUBSET_TRUTH" -> siteSummary.get(0, "EVAL_SUBSET_TRUTH"), - "ALLELES_DO_NOT_MATCH" -> siteSummary.get(0, "ALLELES_DO_NOT_MATCH"), - "EVAL_ONLY" -> siteSummary.get(0, "EVAL_ONLY"), - "TRUTH_ONLY" -> siteSummary.get(0, "TRUTH_ONLY") - ) - ) - } - - override def beforeGraph(): Unit = { - super.beforeGraph() - deps :::= (evalFile :: compFile :: Nil).filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi")) - deps = deps.distinct - } - - override def cmdLine = super.cmdLine + - required("--eval", evalFile) + - required("--comp", compFile) + - required("-o", outputFile) + - conditional(moltenize, "--moltenize") -} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala deleted file mode 100644 index d98abff1485b59cd0424eff47d03b0d1dbfe585d..0000000000000000000000000000000000000000 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.utils.VcfUtils -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output } - -/** - * Extension for CombineVariants from GATK - * - * Created by pjvan_thof on 2/26/15. - * - * @deprecated - */ -class SelectVariants(val root: Configurable) extends Gatk { - val analysisType = "SelectVariants" - - @Input(doc = "", required = true) - var inputFiles: List[File] = Nil - - @Output(doc = "", required = true) - var outputFile: File = null - - var excludeNonVariants: Boolean = false - - var inputMap: Map[File, String] = Map() - - def addInput(file: File, name: String): Unit = { - inputFiles :+= file - inputMap += file -> name - } - - @Output - @Gather(enabled = false) - private var outputIndex: File = _ - - override def beforeGraph(): Unit = { - super.beforeGraph() - outputIndex = VcfUtils.getVcfIndexFile(outputFile) - deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi")) - deps = deps.distinct - } - - override def cmdLine = super.cmdLine + - (for (file <- inputFiles) yield { - inputMap.get(file) match { - case Some(name) => required("-V:" + name, file) - case _ => required("-V", file) - } - }).mkString + - required("-o", outputFile) + - conditional(excludeNonVariants, "--excludeNonVariants") -} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeConcordance.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeConcordance.scala new file mode 100644 index 0000000000000000000000000000000000000000..c5cf2d28ab375280d412ddcfbf26217dcee771ef --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeConcordance.scala @@ -0,0 +1,122 @@ +package nl.lumc.sasc.biopet.extensions.gatk.broad + +import java.io.File + +import nl.lumc.sasc.biopet.core.ScatterGatherableFunction +import nl.lumc.sasc.biopet.core.summary.Summarizable +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile +import org.broadinstitute.gatk.utils.commandline.{Argument, Gather, Output, _} +import org.broadinstitute.gatk.utils.report.{GATKReport, GATKReportTable} + +class GenotypeConcordance(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction with Summarizable { + analysisName = "GenotypeConcordance" + val analysis_type = "GenotypeConcordance" + scatterClass = classOf[LocusScatterFunction] + setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false } + + /** The variants and genotypes to evaluate */ + @Input(fullName="eval", shortName="eval", doc="The variants and genotypes to evaluate", required=true, exclusiveOf="", validation="") + var eval: File = _ + + /** The variants and genotypes to compare against */ + @Input(fullName="comp", shortName="comp", doc="The variants and genotypes to compare against", required=true, exclusiveOf="", validation="") + var comp: File = _ + + /** Filters will be ignored */ + @Argument(fullName="ignoreFilters", shortName="", doc="Filters will be ignored", required=false, exclusiveOf="", validation="") + var ignoreFilters: Boolean = config("ignoreFilters", default = false) + + /** One or more criteria to use to set EVAL genotypes to no-call. These genotype-level filters are only applied to the EVAL rod. */ + @Argument(fullName="genotypeFilterExpressionEval", shortName="gfe", doc="One or more criteria to use to set EVAL genotypes to no-call. These genotype-level filters are only applied to the EVAL rod.", required=false, exclusiveOf="", validation="") + var genotypeFilterExpressionEval: List[String] = config("genotypeFilterExpressionEval", default = Nil) + + /** One or more criteria to use to set COMP genotypes to no-call. These genotype-level filters are only applied to the COMP rod. */ + @Argument(fullName="genotypeFilterExpressionComp", shortName="gfc", doc="One or more criteria to use to set COMP genotypes to no-call. These genotype-level filters are only applied to the COMP rod.", required=false, exclusiveOf="", validation="") + var genotypeFilterExpressionComp: Seq[String] = config("genotypeFilterExpressionComp", default = Nil) + + /** Molten rather than tabular output */ + @Argument(fullName="moltenize", shortName="moltenize", doc="Molten rather than tabular output", required=false, exclusiveOf="", validation="") + var moltenize: Boolean = config("moltenize", default = true) + + /** File to output the discordant sites and genotypes. */ + @Output(fullName="printInterestingSites", shortName="sites", doc="File to output the discordant sites and genotypes.", required=false, exclusiveOf="", validation="") + var printInterestingSites: Option[File] = None + + /** An output file created by the walker. Will overwrite contents if file exists */ + @Output(fullName="out", shortName="o", doc="An output file created by the walker. Will overwrite contents if file exists", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */ + @Argument(fullName="filter_reads_with_N_cigar", shortName="filterRNC", doc="Filter out reads with CIGAR containing the N operator, instead of failing with an error", required=false, exclusiveOf="", validation="") + var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false) + + /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */ + @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required=false, exclusiveOf="", validation="") + var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false) + + /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */ + @Argument(fullName="filter_bases_not_stored", shortName="filterNoBases", doc="Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required=false, exclusiveOf="", validation="") + var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false) + + def summaryFiles = Map("output" -> out) + + def summaryStats = { + val report = new GATKReport(out) + val compProportions = report.getTable("GenotypeConcordance_CompProportions") + val counts = report.getTable("GenotypeConcordance_Counts") + val evalProportions = report.getTable("GenotypeConcordance_EvalProportions") + val genotypeSummary = report.getTable("GenotypeConcordance_Summary") + val siteSummary = report.getTable("SiteConcordance_Summary") + + val samples = for (i <- 0 until genotypeSummary.getNumRows) yield genotypeSummary.get(i, "Sample").toString + + def getMap(table: GATKReportTable, column: String) = samples.distinct.map(sample => sample -> { + (for (i <- 0 until table.getNumRows if table.get(i, "Sample") == sample) yield s"${table.get(i, "Eval_Genotype")}__${table.get(i, "Comp_Genotype")}" -> table.get(i, column)).toMap + }).toMap + + Map( + "compProportions" -> getMap(compProportions, "Proportion"), + "counts" -> getMap(counts, "Count"), + "evalProportions" -> getMap(evalProportions, "Proportion"), + "genotypeSummary" -> samples.distinct.map(sample => { + val i = samples.indexOf(sample) + sample -> Map( + "Non-Reference_Discrepancy" -> genotypeSummary.get(i, "Non-Reference_Discrepancy"), + "Non-Reference_Sensitivity" -> genotypeSummary.get(i, "Non-Reference_Sensitivity"), + "Overall_Genotype_Concordance" -> genotypeSummary.get(i, "Overall_Genotype_Concordance") + ) + }).toMap, + "siteSummary" -> Map( + "ALLELES_MATCH" -> siteSummary.get(0, "ALLELES_MATCH"), + "EVAL_SUPERSET_TRUTH" -> siteSummary.get(0, "EVAL_SUPERSET_TRUTH"), + "EVAL_SUBSET_TRUTH" -> siteSummary.get(0, "EVAL_SUBSET_TRUTH"), + "ALLELES_DO_NOT_MATCH" -> siteSummary.get(0, "ALLELES_DO_NOT_MATCH"), + "EVAL_ONLY" -> siteSummary.get(0, "EVAL_ONLY"), + "TRUTH_ONLY" -> siteSummary.get(0, "TRUTH_ONLY") + ) + ) + } + + + override def beforeGraph() { + super.beforeGraph() + if (eval != null) deps :+= VcfUtils.getVcfIndexFile(eval) + if (comp != null) deps :+= VcfUtils.getVcfIndexFile(comp) + } + + override def cmdLine = super.cmdLine + + required(TaggedFile.formatCommandLineParameter("-eval", eval), eval, spaceSeparated=true, escape=true, format="%s") + + required(TaggedFile.formatCommandLineParameter("-comp", comp), comp, spaceSeparated=true, escape=true, format="%s") + + conditional(ignoreFilters, "--ignoreFilters", escape=true, format="%s") + + repeat("-gfe", genotypeFilterExpressionEval, spaceSeparated=true, escape=true, format="%s") + + repeat("-gfc", genotypeFilterExpressionComp, spaceSeparated=true, escape=true, format="%s") + + conditional(moltenize, "-moltenize", escape=true, format="%s") + + optional("-sites", printInterestingSites, spaceSeparated=true, escape=true, format="%s") + + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + + conditional(filter_reads_with_N_cigar, "-filterRNC", escape=true, format="%s") + + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") + + conditional(filter_bases_not_stored, "-filterNoBases", escape=true, format="%s") +} diff --git a/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala index 6e2aa683f8e6e2abe31e2e8307d71db8c41c5258..ebba259271ebb0ea1174f56e5500468d04873387 100644 --- a/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala +++ b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala @@ -24,7 +24,7 @@ import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, BiopetQScript, Pipe import nl.lumc.sasc.biopet.extensions._ import nl.lumc.sasc.biopet.extensions.bowtie.{ Bowtie2Build, BowtieBuild } import nl.lumc.sasc.biopet.extensions.bwa.BwaIndex -import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants +import nl.lumc.sasc.biopet.extensions.gatk.broad.CombineVariants import nl.lumc.sasc.biopet.extensions.gmap.GmapBuild import nl.lumc.sasc.biopet.extensions.picard.CreateSequenceDictionary import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFaidx @@ -173,7 +173,7 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript genomeConfig.get("dbsnp_vcf_uri").foreach { dbsnpUri => val cv = new CombineVariants(this) - cv.reference = fastaFile + cv.reference_sequence = fastaFile cv.deps ::= createDict.output def addDownload(uri: String): Unit = { val curl = new Curl(this) @@ -181,7 +181,7 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript curl.output = new File(annotationDir, new File(curl.url).getName) curl.isIntermediate = true add(curl) - cv.inputFiles ::= curl.output + cv.variant :+= curl.output val tabix = new Tabix(this) tabix.input = curl.output @@ -198,7 +198,7 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript case _ => addDownload(dbsnpUri.toString) } - cv.outputFile = new File(annotationDir, "dbsnp.vcf.gz") + cv.out = new File(annotationDir, "dbsnp.vcf.gz") add(cv) } diff --git a/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala b/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala index afc6bbdc3ba737db63f5c4270009de0a60b8deaa..52f769fff21887a7556b97c18ba8b1f24b023d73 100644 --- a/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala +++ b/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala @@ -6,7 +6,7 @@ import java.util import htsjdk.samtools.reference.FastaSequenceFile import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand, Reference } import nl.lumc.sasc.biopet.extensions.Snptest -import nl.lumc.sasc.biopet.extensions.gatk.{ CatVariants, SelectVariants } +import nl.lumc.sasc.biopet.extensions.gatk.broad.{ CatVariants, SelectVariants } import nl.lumc.sasc.biopet.extensions.tools.{ GensToVcf, SnptestToVcf } import nl.lumc.sasc.biopet.pipelines.gwastest.impute.ImputeOutput import nl.lumc.sasc.biopet.utils.Logging @@ -82,10 +82,10 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R gensToVcf.outputVcf = new File(outputDirGens, gen._1.genotypes.getName + s".${gen._2}.vcf.gz") gensToVcf.isIntermediate = true add(gensToVcf) - cvChr.inputFiles :+= gensToVcf.outputVcf + cvChr.variant :+= gensToVcf.outputVcf } add(cvChr) - cvTotal.inputFiles :+= cvChr.outputFile + cvTotal.variant :+= cvChr.outputFile contig -> cvChr.outputFile } add(cvTotal) @@ -105,14 +105,14 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R bedFile.deleteOnExit() val sv = new SelectVariants(this) - sv.inputFiles :+= chrVcfFiles.getOrElse(region.chr, vcfFile) - sv.outputFile = new File(regionDir, s"$name.vcf.gz") + sv.variant :+= chrVcfFiles.getOrElse(region.chr, vcfFile) + sv.out = new File(regionDir, s"$name.vcf.gz") sv.intervals :+= bedFile sv.isIntermediate = true add(sv) val snptest = new Snptest(this) - snptest.inputGenotypes :+= sv.outputFile + snptest.inputGenotypes :+= sv.out snptest.inputSampleFiles :+= phenotypeFile snptest.outputFile = Some(new File(regionDir, s"$name.snptest")) add(snptest) @@ -127,7 +127,7 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R } val cv = new CatVariants(this) - cv.inputFiles = snpTests.map(_._2).toList + cv.variant = snpTests.map(_._2).toList cv.outputFile = new File(outputDir, "snptest" + File.separator + "snptest.vcf.gz") add(cv) } diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala index 6020412479bf3331a35ff69c82327c212d889352..acb2076013c56d8d2ef06479f942ab1775ab7abf 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala @@ -15,17 +15,18 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva -import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag } +import nl.lumc.sasc.biopet.core.{PipelineCommand, Reference, SampleLibraryTag} import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.extensions.Tabix -import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance } +import nl.lumc.sasc.biopet.extensions.gatk.broad.{CombineVariants, GenotypeConcordance} import nl.lumc.sasc.biopet.extensions.tools.VcfStats -import nl.lumc.sasc.biopet.extensions.vt.{ VtDecompose, VtNormalize } +import nl.lumc.sasc.biopet.extensions.vt.{VtDecompose, VtNormalize} import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions -import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.{ VarscanCnsSingleSample, _ } -import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.{VarscanCnsSingleSample, _} +import nl.lumc.sasc.biopet.utils.{BamUtils, Logging} import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript +import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile /** * Implementation of ShivaVariantcalling @@ -86,10 +87,10 @@ class ShivaVariantcalling(val root: Configurable) extends QScript require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", ")) val cv = new CombineVariants(qscript) - cv.outputFile = finalFile + cv.out = finalFile cv.setKey = "VariantCaller" - cv.genotypeMergeOptions = Some("PRIORITIZE") - cv.rodPriorityList = callers.map(_.name).mkString(",") + cv.genotypemergeoption = Some("PRIORITIZE") + cv.rod_priority_list = Some(callers.map(_.name).mkString(",")) for (caller <- callers) { caller.inputBams = inputBams caller.namePrefix = namePrefix @@ -110,17 +111,17 @@ class ShivaVariantcalling(val root: Configurable) extends QScript vtDecompose.inputVcf = vtNormalize.outputVcf vtDecompose.outputVcf = swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz") add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) - cv.addInput(vtDecompose.outputVcf, caller.name) + cv.variant :+= TaggedFile(vtDecompose.outputVcf, caller.name) } else if (normalize && !decompose) { vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz") add(vtNormalize, Tabix(this, vtNormalize.outputVcf)) - cv.addInput(vtNormalize.outputVcf, caller.name) + cv.variant :+= TaggedFile(vtNormalize.outputVcf, caller.name) } else if (!normalize && decompose) { vtDecompose.inputVcf = caller.outputFile vtDecompose.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz") add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) - cv.addInput(vtDecompose.outputVcf, caller.name) - } else cv.addInput(caller.outputFile, caller.name) + cv.variant :+= TaggedFile(vtDecompose.outputVcf, caller.name) + } else cv.variant :+= TaggedFile(caller.outputFile, caller.name) } add(cv) @@ -139,9 +140,9 @@ class ShivaVariantcalling(val root: Configurable) extends QScript referenceVcf.foreach(referenceVcfFile => { val gc = new GenotypeConcordance(this) - gc.evalFile = vcfFile - gc.compFile = referenceVcfFile - gc.outputFile = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt") + gc.eval = vcfFile + gc.comp = referenceVcfFile + gc.out = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt") referenceVcfRegions.foreach(gc.intervals ::= _) add(gc) addSummarizable(gc, s"$namePrefix-genotype_concordance-$name") diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala index 98fe0e0a06342cee60db461acc33f1a64b5c23b2..ddf7af84be9b26dcf759273eac370abdeb76a1b7 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala @@ -16,7 +16,7 @@ package nl.lumc.sasc.biopet.pipelines.shiva.svcallers import nl.lumc.sasc.biopet.extensions.delly.DellyCaller -import nl.lumc.sasc.biopet.extensions.gatk.CatVariants +import nl.lumc.sasc.biopet.extensions.gatk.broad.CatVariants import nl.lumc.sasc.biopet.utils.config.Configurable /** Script for sv caller delly */ @@ -41,7 +41,7 @@ class Delly(val root: Configurable) extends SvCaller { delly.analysistype = "DEL" delly.outputvcf = new File(dellyDir, sample + ".delly.del.vcf") add(delly) - catVariants.inputFiles :+= delly.outputvcf + catVariants.variant :+= delly.outputvcf } if (dup) { val delly = new DellyCaller(this) @@ -49,7 +49,7 @@ class Delly(val root: Configurable) extends SvCaller { delly.analysistype = "DUP" delly.outputvcf = new File(dellyDir, sample + ".delly.dup.vcf") add(delly) - catVariants.inputFiles :+= delly.outputvcf + catVariants.variant :+= delly.outputvcf } if (inv) { val delly = new DellyCaller(this) @@ -57,18 +57,18 @@ class Delly(val root: Configurable) extends SvCaller { delly.analysistype = "INV" delly.outputvcf = new File(dellyDir, sample + ".delly.inv.vcf") add(delly) - catVariants.inputFiles :+= delly.outputvcf + catVariants.variant :+= delly.outputvcf } if (tra) { val delly = new DellyCaller(this) delly.input = bamFile delly.analysistype = "TRA" delly.outputvcf = new File(dellyDir, sample + ".delly.tra.vcf") - catVariants.inputFiles :+= delly.outputvcf + catVariants.variant :+= delly.outputvcf add(delly) } - require(catVariants.inputFiles.nonEmpty, "Must atleast 1 SV-type be selected for Delly") + require(catVariants.variant.nonEmpty, "Must atleast 1 SV-type be selected for Delly") add(catVariants) addVCF(sample, catVariants.outputFile) diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala index 847e671166191da3153cc2df818828c66de37aa1..4fe9d88c10e5fa63742aa20b57769c47b0a96b61 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala @@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers import java.io.File -import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants +import nl.lumc.sasc.biopet.extensions.gatk.broad.CombineVariants import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup import nl.lumc.sasc.biopet.extensions.tools.{ VcfFilter, MpileupToVcf } import nl.lumc.sasc.biopet.utils.config.Configurable @@ -60,8 +60,8 @@ class RawVcf(val root: Configurable) extends Variantcaller { } val cv = new CombineVariants(this) - cv.inputFiles = rawFiles.toList - cv.outputFile = outputFile + cv.variant = rawFiles.toList + cv.out = outputFile cv.setKey = "null" cv.excludeNonVariants = !keepRefCalls add(cv) diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala index 9a0fb2839413948de68d3d16101fc4ce912df5b3..96c6d9a78bc42f561b3de20936bea603337135be 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala @@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers import java.io.PrintWriter -import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants +import nl.lumc.sasc.biopet.extensions.gatk.broad.CombineVariants import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup import nl.lumc.sasc.biopet.extensions.varscan.{ FixMpileup, VarscanMpileup2cns } import nl.lumc.sasc.biopet.extensions.{ Bgzip, Tabix } @@ -35,7 +35,8 @@ class VarscanCnsSingleSample(val root: Configurable) extends Variantcaller { "disable_baq" -> true, "depth" -> 1000000 ), - "varscanmpileup2cns" -> Map("strand_filter" -> 0) + "varscanmpileup2cns" -> Map("strand_filter" -> 0), + "combinevariants" -> Map("scattercount" -> 20) ) override def fixedValues = Map( @@ -67,9 +68,9 @@ class VarscanCnsSingleSample(val root: Configurable) extends Variantcaller { } val cv = new CombineVariants(this) - cv.inputFiles = sampleVcfs - cv.outputFile = outputFile - cv.setKey = "null" + cv.variant = sampleVcfs + cv.out = outputFile + cv.setKey = Some("null") cv.excludeNonVariants = true add(cv) } diff --git a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala index f85137cc27a87069021752d42517e3fe6685359a..e3e7d00d40dadf49ffd4390652f8d417f65653ae 100644 --- a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala +++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala @@ -12,7 +12,7 @@ import nl.lumc.sasc.biopet.core.BiopetPipe import nl.lumc.sasc.biopet.extensions.Freebayes import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge } import nl.lumc.sasc.biopet.utils.config.Config -import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants +import nl.lumc.sasc.biopet.extensions.gatk.broad.CombineVariants import nl.lumc.sasc.biopet.extensions.gatk.broad.{ HaplotypeCaller, UnifiedGenotyper } import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats } import nl.lumc.sasc.biopet.utils.ConfigUtils