Commit ac634ac1 authored by bow's avatar bow
Browse files

Merge branch 'feature-add_toucan_to_shiva' into 'develop'

Feature add toucan to shiva

see also #176 and #182 and #183

See merge request !204
parents 182492a4 d05d08f9
......@@ -107,6 +107,9 @@ object ShivaTest {
val config = Map(
"name_prefix" -> "test",
"cache" -> true,
"dir" -> "test",
"vep_script" -> "test",
"output_dir" -> outputDir,
"reference" -> (outputDir + File.separator + "ref.fa"),
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
......
......@@ -122,6 +122,9 @@ object ShivaVariantcallingTest {
val config = Map(
"name_prefix" -> "test",
"output_dir" -> outputDir,
"cache" -> true,
"dir" -> "test",
"vep_script" -> "test",
"reference" -> (outputDir + File.separator + "ref.fa"),
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"gatk_jar" -> "test",
......
......@@ -28,7 +28,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFunction with Reference {
executable = config("exe", submodule = "perl", default = "perl")
var vep_script: String = config("vep_script")
var vepScript: String = config("vep_script")
@Input(doc = "input VCF", required = true)
var input: File = null
......@@ -37,14 +37,14 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
var output: File = null
override def versionRegex = """version (\d*)""".r
override def versionCommand = executable + " " + vep_script + " --help"
override def versionCommand = executable + " " + vepScript + " --help"
//Boolean vars
var v: Boolean = config("v", default = true)
var q: Boolean = config("q", default = false)
var offline: Boolean = config("offline", default = false)
var no_progress: Boolean = config("no_progress", default = false)
var everything: Boolean = config("everything", default = true)
var everything: Boolean = config("everything", default = false)
var force: Boolean = config("force", default = false)
var no_stats: Boolean = config("no_stats", default = false)
var stats_text: Boolean = config("stats_text", default = false)
......@@ -144,15 +144,15 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
override def beforeGraph(): Unit = {
super.beforeGraph()
if (!cache && !database) {
throw new IllegalArgumentException("Must supply either cache or database")
throw new IllegalArgumentException("Must supply either cache or database for VariantEffectPredictor")
} else if (cache && dir.isEmpty) {
throw new IllegalArgumentException("Must supply dir to cache")
throw new IllegalArgumentException("Must supply dir to cache for VariantEffectPredictor")
}
}
/** Returns command to execute */
def cmdLine = required(executable) +
required(vep_script) +
required(vepScript) +
required("-i", input) +
required("-o", output) +
conditional(v, "-v") +
......
......@@ -20,10 +20,48 @@ import java.io.File
import htsjdk.variant.variantcontext.VariantContextBuilder
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf._
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.{ ToolCommandFuntion, ToolCommand }
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.JavaConversions._
/**
* Biopet extension for tool VcfWithVcf
*/
class VcfWithVcf(val root: Configurable) extends ToolCommandFuntion {
javaMainClass = getClass.getName
@Input(doc = "Input vcf file", shortName = "input", required = true)
var input: File = _
@Input(doc = "Secondary vcf file", shortName = "secondary", required = true)
var secondaryVcf: File = _
@Output(doc = "Output vcf file", shortName = "output", required = true)
var output: File = _
@Output(doc = "Output vcf file index", shortName = "output", required = true)
private var outputIndex: File = _
var fields: List[(String, String, Option[String])] = List()
override def defaultCoreMemory = 2.0
override def beforeGraph() {
super.beforeGraph()
if (output.getName.endsWith(".gz")) outputIndex = new File(output.getAbsolutePath + ".tbi")
if (output.getName.endsWith(".vcf")) outputIndex = new File(output.getAbsolutePath + ".idx")
if (fields.isEmpty) throw new IllegalArgumentException("No fields found for VcfWithVcf")
}
override def commandLine = super.commandLine +
required("-I", input) +
required("-o", output) +
required("-s", secondaryVcf) +
repeat("-f", fields.map(x => x._1 + ":" + x._2 + ":" + x._3.getOrElse("none")))
}
/**
* This is a tool to annotate a vcf file with info value from a other vcf file
*
......@@ -46,10 +84,10 @@ object VcfWithVcf extends ToolCommand {
opt[File]('I', "inputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(inputFile = x)
}
opt[File]('O', "outputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
opt[File]('o', "outputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(outputFile = x)
}
opt[File]('S', "secondaryVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
opt[File]('s', "secondaryVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(secondaryVcf = x)
}
opt[String]('f', "field") unbounded () valueName "<field> or <input_field:output_field> or <input_field:output_field:method>" action { (x, c) =>
......@@ -58,11 +96,11 @@ object VcfWithVcf extends ToolCommand {
else if (values.size > 1) c.copy(fields = Fields(values(0), values(1)) :: c.fields)
else c.copy(fields = Fields(x, x) :: c.fields)
} text """| If only <field> is given, the field's identifier in the output VCF will be identical to <field>.
| By default we will return all values found for a given field.
| With <method> the values will processed after getting it from the secondary VCF file, posible methods are:
| - max : takes maximum of found value, only works for numeric (integer/float) fields
| - min : takes minemal of found value, only works for numeric (integer/float) fields
| - unique: takes only unique values """.stripMargin
| By default we will return all values found for a given field.
| With <method> the values will processed after getting it from the secondary VCF file, posible methods are:
| - max : takes maximum of found value, only works for numeric (integer/float) fields
| - min : takes minemal of found value, only works for numeric (integer/float) fields
| - unique: takes only unique values """.stripMargin
opt[Boolean]("match") valueName "<Boolean>" maxOccurs 1 action { (x, c) =>
c.copy(matchAllele = x)
} text "Match alternative alleles; default true"
......
......@@ -45,7 +45,7 @@ class VepNormalizer(val root: Configurable) extends ToolCommandFuntion {
var inputVCF: File = null
@Output(doc = "Output VCF", shortName = "OutputFile", required = true)
var outputVCF: File = null
var outputVcf: File = null
var mode: String = config("mode", default = "explode")
var doNotRemove: Boolean = config("donotremove", default = false)
......@@ -54,7 +54,7 @@ class VepNormalizer(val root: Configurable) extends ToolCommandFuntion {
override def commandLine = super.commandLine +
required("-I", inputVCF) +
required("-O", outputVCF) +
required("-O", outputVcf) +
required("-m", mode) +
conditional(doNotRemove, "--do-not-remove")
}
......
......@@ -15,6 +15,7 @@
*/
package nl.lumc.sasc.biopet.tools
import java.io.File
import java.nio.file.Paths
import org.scalatest.Matchers
......@@ -36,25 +37,25 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
Paths.get(getClass.getResource(p).toURI).toString
}
val vepped_path = resourcePath("/VEP_oneline.vcf.gz")
val unvepped_path = resourcePath("/unvepped.vcf.gz")
val veppedPath = resourcePath("/VEP_oneline.vcf.gz")
val unveppedPath = resourcePath("/unvepped.vcf.gz")
val rand = new Random()
@Test def testOutputTypeVcf() = {
val tmp_path = "/tmp/VcfWithVcf_" + rand.nextString(10) + ".vcf"
val arguments = Array("-I", unvepped_path, "-S", vepped_path, "-O", tmp_path, "-f", "CSQ")
val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath
val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ")
main(arguments)
}
@Test def testOutputTypeVcfGz() = {
val tmp_path = "/tmp/VcfWithVcf_" + rand.nextString(10) + ".vcf.gz"
val arguments = Array("-I", unvepped_path, "-S", vepped_path, "-O", tmp_path, "-f", "CSQ")
val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath
val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ")
main(arguments)
}
@Test def testOutputTypeBcf() = {
val tmp_path = "/tmp/VcfWithVcf_" + rand.nextString(10) + ".bcf"
val arguments = Array("-I", unvepped_path, "-S", vepped_path, "-O", tmp_path, "-f", "CSQ")
val tmpPath = File.createTempFile("VcfWithVcf_", ".vcf").getAbsolutePath
val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpPath, "-f", "CSQ")
main(arguments)
}
......
......@@ -40,6 +40,11 @@
<artifactId>Mapping</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Toucan</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
......
......@@ -24,6 +24,7 @@ import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq }
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
import scala.collection.JavaConversions._
......@@ -280,9 +281,7 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference {
} else None
lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
val svCalling = new ShivaSvCalling(this)
samples.foreach(x => x._2.preProcessBam.foreach(bam => svCalling.addBamFile(bam, Some(x._1))))
Some(svCalling)
Some(new ShivaSvCalling(this))
} else None
/** This will add the mutisample variantcalling */
......@@ -294,10 +293,21 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference {
vc.biopetScript()
addAll(vc.functions)
addSummaryQScript(vc)
if (config("annotation", default = true).asBoolean) {
val toucan = new Toucan(this)
toucan.outputDir = new File(outputDir, "annotation")
toucan.inputVCF = vc.finalFile
toucan.init()
toucan.biopetScript()
addAll(toucan.functions)
addSummaryQScript(toucan)
}
})
svCalling.foreach(sv => {
sv.outputDir = new File(outputDir, "sv_calling")
samples.foreach(x => x._2.preProcessBam.foreach(bam => sv.addBamFile(bam, Some(x._1))))
sv.init()
sv.biopetScript()
addAll(sv.functions)
......
......@@ -103,6 +103,9 @@ object ShivaTest {
val config = Map(
"name_prefix" -> "test",
"output_dir" -> outputDir,
"cache" -> true,
"dir" -> "test",
"vep_script" -> "test",
"reference" -> (outputDir + File.separator + "ref.fa"),
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"gatk_jar" -> "test",
......
......@@ -103,6 +103,9 @@ object ShivaVariantcallingTest {
val config = Map(
"name_prefix" -> "test",
"output_dir" -> outputDir,
"cache" -> true,
"dir" -> "test",
"vep_script" -> "test",
"reference" -> (outputDir + File.separator + "ref.fa"),
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"gatk_jar" -> "test",
......
......@@ -16,9 +16,10 @@
package nl.lumc.sasc.biopet.pipelines.toucan
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand, Reference }
import nl.lumc.sasc.biopet.extensions.VariantEffectPredictor
import nl.lumc.sasc.biopet.tools.VepNormalizer
import nl.lumc.sasc.biopet.tools.{ VcfWithVcf, VepNormalizer }
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.queue.QScript
......@@ -27,14 +28,13 @@ import org.broadinstitute.gatk.queue.QScript
*
* Created by ahbbollen on 15-1-15.
*/
class Toucan(val root: Configurable) extends QScript with BiopetQScript with Reference {
class Toucan(val root: Configurable) extends QScript with BiopetQScript with SummaryQScript with Reference {
def this() = this(null)
@Input(doc = "Input VCF file", shortName = "Input", required = true)
var inputVCF: File = _
def init(): Unit = {
}
override def defaults = ConfigUtils.mergeMaps(Map(
......@@ -52,10 +52,45 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Ref
val normalizer = new VepNormalizer(this)
normalizer.inputVCF = vep.output
normalizer.outputVCF = swapExt(vep.output, ".vcf", ".normalized.vcf.gz")
normalizer.outputVcf = swapExt(outputDir, vep.output, ".vcf", ".normalized.vcf.gz")
add(normalizer)
// Optional annotation steps, depend is some files existing in the config
val gonlVcfFile: Option[File] = config("gonl_vcf")
val exacVcfFile: Option[File] = config("exac_vcf")
var outputFile = normalizer.outputVcf
gonlVcfFile match {
case Some(gonlFile) =>
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = gonlFile
vcfWithVcf.output = swapExt(outputDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz")
vcfWithVcf.fields ::= ("AF", "AF_gonl", None)
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
}
exacVcfFile match {
case Some(exacFile) =>
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = exacFile
vcfWithVcf.output = swapExt(outputDir, outputFile, ".vcf.gz", ".exac.vcf.gz")
vcfWithVcf.fields ::= ("MAF", "MAF_exac", None)
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
}
}
def summaryFile = new File(outputDir, "Toucan.summary.json")
def summaryFiles = Map()
def summarySettings = Map()
}
object Toucan extends PipelineCommand
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment