diff --git a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaTest.scala b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaTest.scala index f8d94cd6a31db716cf1164faacfa6127ecae8cd7..634a3fb17945144093f26dbcd11427b65474f7b6 100644 --- a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaTest.scala +++ b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaTest.scala @@ -107,6 +107,9 @@ object ShivaTest { val config = Map( "name_prefix" -> "test", + "cache" -> true, + "dir" -> "test", + "vep_script" -> "test", "output_dir" -> outputDir, "reference" -> (outputDir + File.separator + "ref.fa"), "reference_fasta" -> (outputDir + File.separator + "ref.fa"), diff --git a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala index 8ebb3d95266da636fc4ef63712d5d35496b9ecaa..1609f5c31bb4a6b648a9a5aa4bb5a7de531e4714 100644 --- a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala +++ b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala @@ -122,6 +122,9 @@ object ShivaVariantcallingTest { val config = Map( "name_prefix" -> "test", "output_dir" -> outputDir, + "cache" -> true, + "dir" -> "test", + "vep_script" -> "test", "reference" -> (outputDir + File.separator + "ref.fa"), "reference_fasta" -> (outputDir + File.separator + "ref.fa"), "gatk_jar" -> "test", diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala index 1adfd543149fbb8648a9d561e04fbf13f33e32f9..b124a08aa5944beebff8507c09646618c4bc0ab7 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala @@ -28,7 +28,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output } class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFunction with Reference { executable = config("exe", submodule = "perl", default = "perl") - var vep_script: String = config("vep_script") + var vepScript: String = config("vep_script") @Input(doc = "input VCF", required = true) var input: File = null @@ -37,14 +37,14 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu var output: File = null override def versionRegex = """version (\d*)""".r - override def versionCommand = executable + " " + vep_script + " --help" + override def versionCommand = executable + " " + vepScript + " --help" //Boolean vars var v: Boolean = config("v", default = true) var q: Boolean = config("q", default = false) var offline: Boolean = config("offline", default = false) var no_progress: Boolean = config("no_progress", default = false) - var everything: Boolean = config("everything", default = true) + var everything: Boolean = config("everything", default = false) var force: Boolean = config("force", default = false) var no_stats: Boolean = config("no_stats", default = false) var stats_text: Boolean = config("stats_text", default = false) @@ -144,15 +144,15 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu override def beforeGraph(): Unit = { super.beforeGraph() if (!cache && !database) { - throw new IllegalArgumentException("Must supply either cache or database") + throw new IllegalArgumentException("Must supply either cache or database for VariantEffectPredictor") } else if (cache && dir.isEmpty) { - throw new IllegalArgumentException("Must supply dir to cache") + throw new IllegalArgumentException("Must supply dir to cache for VariantEffectPredictor") } } /** Returns command to execute */ def cmdLine = required(executable) + - required(vep_script) + + required(vepScript) + required("-i", input) + required("-o", output) + conditional(v, "-v") + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala index 9009d2918574fdba847adcdd5000ac10bd5e6a2f..8bdf74983208d5b1d5c480f7c9cbf511ecd7a4e1 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala @@ -20,10 +20,48 @@ import java.io.File import htsjdk.variant.variantcontext.VariantContextBuilder import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder } import htsjdk.variant.vcf._ -import nl.lumc.sasc.biopet.core.ToolCommand +import nl.lumc.sasc.biopet.core.{ ToolCommandFuntion, ToolCommand } +import nl.lumc.sasc.biopet.core.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Output, Input } import scala.collection.JavaConversions._ +/** + * Biopet extension for tool VcfWithVcf + */ +class VcfWithVcf(val root: Configurable) extends ToolCommandFuntion { + javaMainClass = getClass.getName + + @Input(doc = "Input vcf file", shortName = "input", required = true) + var input: File = _ + + @Input(doc = "Secondary vcf file", shortName = "secondary", required = true) + var secondaryVcf: File = _ + + @Output(doc = "Output vcf file", shortName = "output", required = true) + var output: File = _ + + @Output(doc = "Output vcf file index", shortName = "output", required = true) + private var outputIndex: File = _ + + var fields: List[(String, String, Option[String])] = List() + + override def defaultCoreMemory = 2.0 + + override def beforeGraph() { + super.beforeGraph() + if (output.getName.endsWith(".gz")) outputIndex = new File(output.getAbsolutePath + ".tbi") + if (output.getName.endsWith(".vcf")) outputIndex = new File(output.getAbsolutePath + ".idx") + if (fields.isEmpty) throw new IllegalArgumentException("No fields found for VcfWithVcf") + } + + override def commandLine = super.commandLine + + required("-I", input) + + required("-o", output) + + required("-s", secondaryVcf) + + repeat("-f", fields.map(x => x._1 + ":" + x._2 + ":" + x._3.getOrElse("none"))) +} + /** * This is a tool to annotate a vcf file with info value from a other vcf file * @@ -46,10 +84,10 @@ object VcfWithVcf extends ToolCommand { opt[File]('I', "inputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(inputFile = x) } - opt[File]('O', "outputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) => + opt[File]('o', "outputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(outputFile = x) } - opt[File]('S', "secondaryVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) => + opt[File]('s', "secondaryVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(secondaryVcf = x) } opt[String]('f', "field") unbounded () valueName "<field> or <input_field:output_field> or <input_field:output_field:method>" action { (x, c) => @@ -58,11 +96,11 @@ object VcfWithVcf extends ToolCommand { else if (values.size > 1) c.copy(fields = Fields(values(0), values(1)) :: c.fields) else c.copy(fields = Fields(x, x) :: c.fields) } text """| If only <field> is given, the field's identifier in the output VCF will be identical to <field>. - | By default we will return all values found for a given field. - | With <method> the values will processed after getting it from the secondary VCF file, posible methods are: - | - max : takes maximum of found value, only works for numeric (integer/float) fields - | - min : takes minemal of found value, only works for numeric (integer/float) fields - | - unique: takes only unique values """.stripMargin + | By default we will return all values found for a given field. + | With <method> the values will processed after getting it from the secondary VCF file, posible methods are: + | - max : takes maximum of found value, only works for numeric (integer/float) fields + | - min : takes minemal of found value, only works for numeric (integer/float) fields + | - unique: takes only unique values """.stripMargin opt[Boolean]("match") valueName "<Boolean>" maxOccurs 1 action { (x, c) => c.copy(matchAllele = x) } text "Match alternative alleles; default true" diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala index 5878d03f32d5b9943bfb4e734b2c5f7d0b806a41..8543b7706670ef40d9a877f27a0bf1b1ed3d77a9 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala @@ -45,7 +45,7 @@ class VepNormalizer(val root: Configurable) extends ToolCommandFuntion { var inputVCF: File = null @Output(doc = "Output VCF", shortName = "OutputFile", required = true) - var outputVCF: File = null + var outputVcf: File = null var mode: String = config("mode", default = "explode") var doNotRemove: Boolean = config("donotremove", default = false) @@ -54,7 +54,7 @@ class VepNormalizer(val root: Configurable) extends ToolCommandFuntion { override def commandLine = super.commandLine + required("-I", inputVCF) + - required("-O", outputVCF) + + required("-O", outputVcf) + required("-m", mode) + conditional(doNotRemove, "--do-not-remove") } diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala index 4578e32298b66cb76ee6e3e8b422626e18e73c25..9996dfed3befed4e8913ee1377d04953607dfbc9 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala @@ -15,6 +15,7 @@ */ package nl.lumc.sasc.biopet.tools +import java.io.File import java.nio.file.Paths import org.scalatest.Matchers @@ -36,25 +37,25 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { Paths.get(getClass.getResource(p).toURI).toString } - val vepped_path = resourcePath("/VEP_oneline.vcf.gz") - val unvepped_path = resourcePath("/unvepped.vcf.gz") + val veppedPath = resourcePath("/VEP_oneline.vcf.gz") + val unveppedPath = resourcePath("/unvepped.vcf.gz") val rand = new Random() @Test def testOutputTypeVcf() = { - val tmp_path = "/tmp/VcfWithVcf_" + rand.nextString(10) + ".vcf" - val arguments = Array("-I", unvepped_path, "-S", vepped_path, "-O", tmp_path, "-f", "CSQ") + val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath + val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ") main(arguments) } @Test def testOutputTypeVcfGz() = { - val tmp_path = "/tmp/VcfWithVcf_" + rand.nextString(10) + ".vcf.gz" - val arguments = Array("-I", unvepped_path, "-S", vepped_path, "-O", tmp_path, "-f", "CSQ") + val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath + val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ") main(arguments) } @Test def testOutputTypeBcf() = { - val tmp_path = "/tmp/VcfWithVcf_" + rand.nextString(10) + ".bcf" - val arguments = Array("-I", unvepped_path, "-S", vepped_path, "-O", tmp_path, "-f", "CSQ") + val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath + val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ") main(arguments) } diff --git a/public/shiva/pom.xml b/public/shiva/pom.xml index e7d211a68363a3e8987d98938df3caa74c5abf65..36c227e8496da9a602bcdf4c60afe07880dc8af4 100644 --- a/public/shiva/pom.xml +++ b/public/shiva/pom.xml @@ -40,6 +40,11 @@ <artifactId>Mapping</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Toucan</artifactId> + <version>${project.version}</version> + </dependency> <dependency> <groupId>org.testng</groupId> <artifactId>testng</artifactId> diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala index 5eb7bf4116c412729ecd4b943f6c3f5b9af62c5c..3f65bf2a20864e2017f512981daa0d37aca52772 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala @@ -24,6 +24,7 @@ import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq } import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.mapping.Mapping +import nl.lumc.sasc.biopet.pipelines.toucan.Toucan import scala.collection.JavaConversions._ @@ -280,9 +281,7 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { } else None lazy val svCalling = if (config("sv_calling", default = false).asBoolean) { - val svCalling = new ShivaSvCalling(this) - samples.foreach(x => x._2.preProcessBam.foreach(bam => svCalling.addBamFile(bam, Some(x._1)))) - Some(svCalling) + Some(new ShivaSvCalling(this)) } else None /** This will add the mutisample variantcalling */ @@ -294,10 +293,21 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { vc.biopetScript() addAll(vc.functions) addSummaryQScript(vc) + + if (config("annotation", default = true).asBoolean) { + val toucan = new Toucan(this) + toucan.outputDir = new File(outputDir, "annotation") + toucan.inputVCF = vc.finalFile + toucan.init() + toucan.biopetScript() + addAll(toucan.functions) + addSummaryQScript(toucan) + } }) svCalling.foreach(sv => { sv.outputDir = new File(outputDir, "sv_calling") + samples.foreach(x => x._2.preProcessBam.foreach(bam => sv.addBamFile(bam, Some(x._1)))) sv.init() sv.biopetScript() addAll(sv.functions) diff --git a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala index d7e7eadf623a3271f6495d95f713b6725d6c1cff..e37756ef76504268f746dfb7ceda62cf29ed2d75 100644 --- a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala +++ b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala @@ -103,6 +103,9 @@ object ShivaTest { val config = Map( "name_prefix" -> "test", "output_dir" -> outputDir, + "cache" -> true, + "dir" -> "test", + "vep_script" -> "test", "reference" -> (outputDir + File.separator + "ref.fa"), "reference_fasta" -> (outputDir + File.separator + "ref.fa"), "gatk_jar" -> "test", diff --git a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala index ed987ceaf4471a0659b627179a05781fa63b5b74..82e21f195aefde3199f19f5a5edf8088ea835201 100644 --- a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala +++ b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala @@ -103,6 +103,9 @@ object ShivaVariantcallingTest { val config = Map( "name_prefix" -> "test", "output_dir" -> outputDir, + "cache" -> true, + "dir" -> "test", + "vep_script" -> "test", "reference" -> (outputDir + File.separator + "ref.fa"), "reference_fasta" -> (outputDir + File.separator + "ref.fa"), "gatk_jar" -> "test", diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala index ef59e2680a7093210a91e527c54545495d8a92f4..ccd47d551363be2bf65d0fd0176f02f8092060a1 100644 --- a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala +++ b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala @@ -16,9 +16,10 @@ package nl.lumc.sasc.biopet.pipelines.toucan import nl.lumc.sasc.biopet.core.config.Configurable +import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand, Reference } import nl.lumc.sasc.biopet.extensions.VariantEffectPredictor -import nl.lumc.sasc.biopet.tools.VepNormalizer +import nl.lumc.sasc.biopet.tools.{ VcfWithVcf, VepNormalizer } import nl.lumc.sasc.biopet.utils.ConfigUtils import org.broadinstitute.gatk.queue.QScript @@ -27,14 +28,13 @@ import org.broadinstitute.gatk.queue.QScript * * Created by ahbbollen on 15-1-15. */ -class Toucan(val root: Configurable) extends QScript with BiopetQScript with Reference { +class Toucan(val root: Configurable) extends QScript with BiopetQScript with SummaryQScript with Reference { def this() = this(null) @Input(doc = "Input VCF file", shortName = "Input", required = true) var inputVCF: File = _ def init(): Unit = { - } override def defaults = ConfigUtils.mergeMaps(Map( @@ -52,10 +52,45 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Ref val normalizer = new VepNormalizer(this) normalizer.inputVCF = vep.output - normalizer.outputVCF = swapExt(vep.output, ".vcf", ".normalized.vcf.gz") + normalizer.outputVcf = swapExt(outputDir, vep.output, ".vcf", ".normalized.vcf.gz") add(normalizer) + + // Optional annotation steps, depend is some files existing in the config + val gonlVcfFile: Option[File] = config("gonl_vcf") + val exacVcfFile: Option[File] = config("exac_vcf") + + var outputFile = normalizer.outputVcf + + gonlVcfFile match { + case Some(gonlFile) => + val vcfWithVcf = new VcfWithVcf(this) + vcfWithVcf.input = outputFile + vcfWithVcf.secondaryVcf = gonlFile + vcfWithVcf.output = swapExt(outputDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz") + vcfWithVcf.fields ::= ("AF", "AF_gonl", None) + add(vcfWithVcf) + outputFile = vcfWithVcf.output + case _ => + } + + exacVcfFile match { + case Some(exacFile) => + val vcfWithVcf = new VcfWithVcf(this) + vcfWithVcf.input = outputFile + vcfWithVcf.secondaryVcf = exacFile + vcfWithVcf.output = swapExt(outputDir, outputFile, ".vcf.gz", ".exac.vcf.gz") + vcfWithVcf.fields ::= ("MAF", "MAF_exac", None) + add(vcfWithVcf) + outputFile = vcfWithVcf.output + case _ => + } } + def summaryFile = new File(outputDir, "Toucan.summary.json") + + def summaryFiles = Map() + + def summarySettings = Map() } object Toucan extends PipelineCommand \ No newline at end of file