Skip to content
Snippets Groups Projects
Commit ac634ac1 authored by bow's avatar bow
Browse files

Merge branch 'feature-add_toucan_to_shiva' into 'develop'

Feature add toucan to shiva

see also #176 and #182 and #183

See merge request !204
parents 182492a4 d05d08f9
No related branches found
No related tags found
No related merge requests found
Showing
with 132 additions and 31 deletions
...@@ -107,6 +107,9 @@ object ShivaTest { ...@@ -107,6 +107,9 @@ object ShivaTest {
val config = Map( val config = Map(
"name_prefix" -> "test", "name_prefix" -> "test",
"cache" -> true,
"dir" -> "test",
"vep_script" -> "test",
"output_dir" -> outputDir, "output_dir" -> outputDir,
"reference" -> (outputDir + File.separator + "ref.fa"), "reference" -> (outputDir + File.separator + "ref.fa"),
"reference_fasta" -> (outputDir + File.separator + "ref.fa"), "reference_fasta" -> (outputDir + File.separator + "ref.fa"),
......
...@@ -122,6 +122,9 @@ object ShivaVariantcallingTest { ...@@ -122,6 +122,9 @@ object ShivaVariantcallingTest {
val config = Map( val config = Map(
"name_prefix" -> "test", "name_prefix" -> "test",
"output_dir" -> outputDir, "output_dir" -> outputDir,
"cache" -> true,
"dir" -> "test",
"vep_script" -> "test",
"reference" -> (outputDir + File.separator + "ref.fa"), "reference" -> (outputDir + File.separator + "ref.fa"),
"reference_fasta" -> (outputDir + File.separator + "ref.fa"), "reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"gatk_jar" -> "test", "gatk_jar" -> "test",
......
...@@ -28,7 +28,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output } ...@@ -28,7 +28,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFunction with Reference { class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFunction with Reference {
executable = config("exe", submodule = "perl", default = "perl") executable = config("exe", submodule = "perl", default = "perl")
var vep_script: String = config("vep_script") var vepScript: String = config("vep_script")
@Input(doc = "input VCF", required = true) @Input(doc = "input VCF", required = true)
var input: File = null var input: File = null
...@@ -37,14 +37,14 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu ...@@ -37,14 +37,14 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
var output: File = null var output: File = null
override def versionRegex = """version (\d*)""".r override def versionRegex = """version (\d*)""".r
override def versionCommand = executable + " " + vep_script + " --help" override def versionCommand = executable + " " + vepScript + " --help"
//Boolean vars //Boolean vars
var v: Boolean = config("v", default = true) var v: Boolean = config("v", default = true)
var q: Boolean = config("q", default = false) var q: Boolean = config("q", default = false)
var offline: Boolean = config("offline", default = false) var offline: Boolean = config("offline", default = false)
var no_progress: Boolean = config("no_progress", default = false) var no_progress: Boolean = config("no_progress", default = false)
var everything: Boolean = config("everything", default = true) var everything: Boolean = config("everything", default = false)
var force: Boolean = config("force", default = false) var force: Boolean = config("force", default = false)
var no_stats: Boolean = config("no_stats", default = false) var no_stats: Boolean = config("no_stats", default = false)
var stats_text: Boolean = config("stats_text", default = false) var stats_text: Boolean = config("stats_text", default = false)
...@@ -144,15 +144,15 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu ...@@ -144,15 +144,15 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
override def beforeGraph(): Unit = { override def beforeGraph(): Unit = {
super.beforeGraph() super.beforeGraph()
if (!cache && !database) { if (!cache && !database) {
throw new IllegalArgumentException("Must supply either cache or database") throw new IllegalArgumentException("Must supply either cache or database for VariantEffectPredictor")
} else if (cache && dir.isEmpty) { } else if (cache && dir.isEmpty) {
throw new IllegalArgumentException("Must supply dir to cache") throw new IllegalArgumentException("Must supply dir to cache for VariantEffectPredictor")
} }
} }
/** Returns command to execute */ /** Returns command to execute */
def cmdLine = required(executable) + def cmdLine = required(executable) +
required(vep_script) + required(vepScript) +
required("-i", input) + required("-i", input) +
required("-o", output) + required("-o", output) +
conditional(v, "-v") + conditional(v, "-v") +
......
...@@ -20,10 +20,48 @@ import java.io.File ...@@ -20,10 +20,48 @@ import java.io.File
import htsjdk.variant.variantcontext.VariantContextBuilder import htsjdk.variant.variantcontext.VariantContextBuilder
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder } import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf._ import htsjdk.variant.vcf._
import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.core.{ ToolCommandFuntion, ToolCommand }
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
/**
* Biopet extension for tool VcfWithVcf
*/
class VcfWithVcf(val root: Configurable) extends ToolCommandFuntion {
javaMainClass = getClass.getName
@Input(doc = "Input vcf file", shortName = "input", required = true)
var input: File = _
@Input(doc = "Secondary vcf file", shortName = "secondary", required = true)
var secondaryVcf: File = _
@Output(doc = "Output vcf file", shortName = "output", required = true)
var output: File = _
@Output(doc = "Output vcf file index", shortName = "output", required = true)
private var outputIndex: File = _
var fields: List[(String, String, Option[String])] = List()
override def defaultCoreMemory = 2.0
override def beforeGraph() {
super.beforeGraph()
if (output.getName.endsWith(".gz")) outputIndex = new File(output.getAbsolutePath + ".tbi")
if (output.getName.endsWith(".vcf")) outputIndex = new File(output.getAbsolutePath + ".idx")
if (fields.isEmpty) throw new IllegalArgumentException("No fields found for VcfWithVcf")
}
override def commandLine = super.commandLine +
required("-I", input) +
required("-o", output) +
required("-s", secondaryVcf) +
repeat("-f", fields.map(x => x._1 + ":" + x._2 + ":" + x._3.getOrElse("none")))
}
/** /**
* This is a tool to annotate a vcf file with info value from a other vcf file * This is a tool to annotate a vcf file with info value from a other vcf file
* *
...@@ -46,10 +84,10 @@ object VcfWithVcf extends ToolCommand { ...@@ -46,10 +84,10 @@ object VcfWithVcf extends ToolCommand {
opt[File]('I', "inputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) => opt[File]('I', "inputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(inputFile = x) c.copy(inputFile = x)
} }
opt[File]('O', "outputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) => opt[File]('o', "outputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(outputFile = x) c.copy(outputFile = x)
} }
opt[File]('S', "secondaryVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) => opt[File]('s', "secondaryVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(secondaryVcf = x) c.copy(secondaryVcf = x)
} }
opt[String]('f', "field") unbounded () valueName "<field> or <input_field:output_field> or <input_field:output_field:method>" action { (x, c) => opt[String]('f', "field") unbounded () valueName "<field> or <input_field:output_field> or <input_field:output_field:method>" action { (x, c) =>
...@@ -58,11 +96,11 @@ object VcfWithVcf extends ToolCommand { ...@@ -58,11 +96,11 @@ object VcfWithVcf extends ToolCommand {
else if (values.size > 1) c.copy(fields = Fields(values(0), values(1)) :: c.fields) else if (values.size > 1) c.copy(fields = Fields(values(0), values(1)) :: c.fields)
else c.copy(fields = Fields(x, x) :: c.fields) else c.copy(fields = Fields(x, x) :: c.fields)
} text """| If only <field> is given, the field's identifier in the output VCF will be identical to <field>. } text """| If only <field> is given, the field's identifier in the output VCF will be identical to <field>.
| By default we will return all values found for a given field. | By default we will return all values found for a given field.
| With <method> the values will processed after getting it from the secondary VCF file, posible methods are: | With <method> the values will processed after getting it from the secondary VCF file, posible methods are:
| - max : takes maximum of found value, only works for numeric (integer/float) fields | - max : takes maximum of found value, only works for numeric (integer/float) fields
| - min : takes minemal of found value, only works for numeric (integer/float) fields | - min : takes minemal of found value, only works for numeric (integer/float) fields
| - unique: takes only unique values """.stripMargin | - unique: takes only unique values """.stripMargin
opt[Boolean]("match") valueName "<Boolean>" maxOccurs 1 action { (x, c) => opt[Boolean]("match") valueName "<Boolean>" maxOccurs 1 action { (x, c) =>
c.copy(matchAllele = x) c.copy(matchAllele = x)
} text "Match alternative alleles; default true" } text "Match alternative alleles; default true"
......
...@@ -45,7 +45,7 @@ class VepNormalizer(val root: Configurable) extends ToolCommandFuntion { ...@@ -45,7 +45,7 @@ class VepNormalizer(val root: Configurable) extends ToolCommandFuntion {
var inputVCF: File = null var inputVCF: File = null
@Output(doc = "Output VCF", shortName = "OutputFile", required = true) @Output(doc = "Output VCF", shortName = "OutputFile", required = true)
var outputVCF: File = null var outputVcf: File = null
var mode: String = config("mode", default = "explode") var mode: String = config("mode", default = "explode")
var doNotRemove: Boolean = config("donotremove", default = false) var doNotRemove: Boolean = config("donotremove", default = false)
...@@ -54,7 +54,7 @@ class VepNormalizer(val root: Configurable) extends ToolCommandFuntion { ...@@ -54,7 +54,7 @@ class VepNormalizer(val root: Configurable) extends ToolCommandFuntion {
override def commandLine = super.commandLine + override def commandLine = super.commandLine +
required("-I", inputVCF) + required("-I", inputVCF) +
required("-O", outputVCF) + required("-O", outputVcf) +
required("-m", mode) + required("-m", mode) +
conditional(doNotRemove, "--do-not-remove") conditional(doNotRemove, "--do-not-remove")
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
*/ */
package nl.lumc.sasc.biopet.tools package nl.lumc.sasc.biopet.tools
import java.io.File
import java.nio.file.Paths import java.nio.file.Paths
import org.scalatest.Matchers import org.scalatest.Matchers
...@@ -36,25 +37,25 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -36,25 +37,25 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
Paths.get(getClass.getResource(p).toURI).toString Paths.get(getClass.getResource(p).toURI).toString
} }
val vepped_path = resourcePath("/VEP_oneline.vcf.gz") val veppedPath = resourcePath("/VEP_oneline.vcf.gz")
val unvepped_path = resourcePath("/unvepped.vcf.gz") val unveppedPath = resourcePath("/unvepped.vcf.gz")
val rand = new Random() val rand = new Random()
@Test def testOutputTypeVcf() = { @Test def testOutputTypeVcf() = {
val tmp_path = "/tmp/VcfWithVcf_" + rand.nextString(10) + ".vcf" val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath
val arguments = Array("-I", unvepped_path, "-S", vepped_path, "-O", tmp_path, "-f", "CSQ") val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ")
main(arguments) main(arguments)
} }
@Test def testOutputTypeVcfGz() = { @Test def testOutputTypeVcfGz() = {
val tmp_path = "/tmp/VcfWithVcf_" + rand.nextString(10) + ".vcf.gz" val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath
val arguments = Array("-I", unvepped_path, "-S", vepped_path, "-O", tmp_path, "-f", "CSQ") val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ")
main(arguments) main(arguments)
} }
@Test def testOutputTypeBcf() = { @Test def testOutputTypeBcf() = {
val tmp_path = "/tmp/VcfWithVcf_" + rand.nextString(10) + ".bcf" val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath
val arguments = Array("-I", unvepped_path, "-S", vepped_path, "-O", tmp_path, "-f", "CSQ") val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ")
main(arguments) main(arguments)
} }
......
...@@ -40,6 +40,11 @@ ...@@ -40,6 +40,11 @@
<artifactId>Mapping</artifactId> <artifactId>Mapping</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Toucan</artifactId>
<version>${project.version}</version>
</dependency>
<dependency> <dependency>
<groupId>org.testng</groupId> <groupId>org.testng</groupId>
<artifactId>testng</artifactId> <artifactId>testng</artifactId>
......
...@@ -24,6 +24,7 @@ import nl.lumc.sasc.biopet.extensions.Ln ...@@ -24,6 +24,7 @@ import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq } import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq }
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
...@@ -280,9 +281,7 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { ...@@ -280,9 +281,7 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference {
} else None } else None
lazy val svCalling = if (config("sv_calling", default = false).asBoolean) { lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
val svCalling = new ShivaSvCalling(this) Some(new ShivaSvCalling(this))
samples.foreach(x => x._2.preProcessBam.foreach(bam => svCalling.addBamFile(bam, Some(x._1))))
Some(svCalling)
} else None } else None
/** This will add the mutisample variantcalling */ /** This will add the mutisample variantcalling */
...@@ -294,10 +293,21 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { ...@@ -294,10 +293,21 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference {
vc.biopetScript() vc.biopetScript()
addAll(vc.functions) addAll(vc.functions)
addSummaryQScript(vc) addSummaryQScript(vc)
if (config("annotation", default = true).asBoolean) {
val toucan = new Toucan(this)
toucan.outputDir = new File(outputDir, "annotation")
toucan.inputVCF = vc.finalFile
toucan.init()
toucan.biopetScript()
addAll(toucan.functions)
addSummaryQScript(toucan)
}
}) })
svCalling.foreach(sv => { svCalling.foreach(sv => {
sv.outputDir = new File(outputDir, "sv_calling") sv.outputDir = new File(outputDir, "sv_calling")
samples.foreach(x => x._2.preProcessBam.foreach(bam => sv.addBamFile(bam, Some(x._1))))
sv.init() sv.init()
sv.biopetScript() sv.biopetScript()
addAll(sv.functions) addAll(sv.functions)
......
...@@ -103,6 +103,9 @@ object ShivaTest { ...@@ -103,6 +103,9 @@ object ShivaTest {
val config = Map( val config = Map(
"name_prefix" -> "test", "name_prefix" -> "test",
"output_dir" -> outputDir, "output_dir" -> outputDir,
"cache" -> true,
"dir" -> "test",
"vep_script" -> "test",
"reference" -> (outputDir + File.separator + "ref.fa"), "reference" -> (outputDir + File.separator + "ref.fa"),
"reference_fasta" -> (outputDir + File.separator + "ref.fa"), "reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"gatk_jar" -> "test", "gatk_jar" -> "test",
......
...@@ -103,6 +103,9 @@ object ShivaVariantcallingTest { ...@@ -103,6 +103,9 @@ object ShivaVariantcallingTest {
val config = Map( val config = Map(
"name_prefix" -> "test", "name_prefix" -> "test",
"output_dir" -> outputDir, "output_dir" -> outputDir,
"cache" -> true,
"dir" -> "test",
"vep_script" -> "test",
"reference" -> (outputDir + File.separator + "ref.fa"), "reference" -> (outputDir + File.separator + "ref.fa"),
"reference_fasta" -> (outputDir + File.separator + "ref.fa"), "reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"gatk_jar" -> "test", "gatk_jar" -> "test",
......
...@@ -16,9 +16,10 @@ ...@@ -16,9 +16,10 @@
package nl.lumc.sasc.biopet.pipelines.toucan package nl.lumc.sasc.biopet.pipelines.toucan
import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand, Reference } import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand, Reference }
import nl.lumc.sasc.biopet.extensions.VariantEffectPredictor import nl.lumc.sasc.biopet.extensions.VariantEffectPredictor
import nl.lumc.sasc.biopet.tools.VepNormalizer import nl.lumc.sasc.biopet.tools.{ VcfWithVcf, VepNormalizer }
import nl.lumc.sasc.biopet.utils.ConfigUtils import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
...@@ -27,14 +28,13 @@ import org.broadinstitute.gatk.queue.QScript ...@@ -27,14 +28,13 @@ import org.broadinstitute.gatk.queue.QScript
* *
* Created by ahbbollen on 15-1-15. * Created by ahbbollen on 15-1-15.
*/ */
class Toucan(val root: Configurable) extends QScript with BiopetQScript with Reference { class Toucan(val root: Configurable) extends QScript with BiopetQScript with SummaryQScript with Reference {
def this() = this(null) def this() = this(null)
@Input(doc = "Input VCF file", shortName = "Input", required = true) @Input(doc = "Input VCF file", shortName = "Input", required = true)
var inputVCF: File = _ var inputVCF: File = _
def init(): Unit = { def init(): Unit = {
} }
override def defaults = ConfigUtils.mergeMaps(Map( override def defaults = ConfigUtils.mergeMaps(Map(
...@@ -52,10 +52,45 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Ref ...@@ -52,10 +52,45 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Ref
val normalizer = new VepNormalizer(this) val normalizer = new VepNormalizer(this)
normalizer.inputVCF = vep.output normalizer.inputVCF = vep.output
normalizer.outputVCF = swapExt(vep.output, ".vcf", ".normalized.vcf.gz") normalizer.outputVcf = swapExt(outputDir, vep.output, ".vcf", ".normalized.vcf.gz")
add(normalizer) add(normalizer)
// Optional annotation steps, depend is some files existing in the config
val gonlVcfFile: Option[File] = config("gonl_vcf")
val exacVcfFile: Option[File] = config("exac_vcf")
var outputFile = normalizer.outputVcf
gonlVcfFile match {
case Some(gonlFile) =>
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = gonlFile
vcfWithVcf.output = swapExt(outputDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz")
vcfWithVcf.fields ::= ("AF", "AF_gonl", None)
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
}
exacVcfFile match {
case Some(exacFile) =>
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = exacFile
vcfWithVcf.output = swapExt(outputDir, outputFile, ".vcf.gz", ".exac.vcf.gz")
vcfWithVcf.fields ::= ("MAF", "MAF_exac", None)
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
}
} }
def summaryFile = new File(outputDir, "Toucan.summary.json")
def summaryFiles = Map()
def summarySettings = Map()
} }
object Toucan extends PipelineCommand object Toucan extends PipelineCommand
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment