diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala index 4a41248e5653ee9dd6a1e32e64845447b4198b06..344f88b89e902bad748628d8e591b79c2daec5d1 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala @@ -73,6 +73,15 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => def beforeGraph() {} override def freezeFieldValues() { + + this match { + case r: Reference => + if (r.dictRequired) deps :+= r.referenceDict + if (r.faiRequired) deps :+= r.referenceFai + deps = deps.distinct + case _ => + } + preProcessExecutable() beforeGraph() internalBeforeGraph() diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala index 6a39ca3eb9c68205cf1b5354829b4bb87944bfa9..f00de9e17af5564d23a73bac0ad6645d1535ab24 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala @@ -61,25 +61,25 @@ trait Reference extends Configurable { } /** When set override this on true the pipeline with raise an exception when fai index is not found */ - protected def faiRequired = false + def faiRequired = false /** When set override this on true the pipeline with raise an exception when dict index is not found */ - protected def dictRequired = this.isInstanceOf[Summarizable] || this.isInstanceOf[SummaryQScript] + def dictRequired = this.isInstanceOf[Summarizable] || this.isInstanceOf[SummaryQScript] + + /** Returns the dict file belonging to the fasta file */ + def referenceDict = new File(referenceFasta().getAbsolutePath + .stripSuffix(".fa") + .stripSuffix(".fasta") + .stripSuffix(".fna") + ".dict") + + /** Returns the fai file belonging to the fasta file */ + def referenceFai = new File(referenceFasta().getAbsolutePath + ".fai") /** Returns the fasta file */ def referenceFasta(): File = { val file: File = config("reference_fasta") - if (config.contains("reference_fasta")) { - checkFasta(file) - - val dict = new File(file.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta").stripSuffix(".fna") + ".dict") - val fai = new File(file.getAbsolutePath + ".fai") - - this match { - case c: BiopetCommandLineFunction => c.deps :::= dict :: fai :: Nil - case _ => - } - } else { + if (config.contains("reference_fasta")) checkFasta(file) + else { val defaults = ConfigUtils.mergeMaps(this.defaults, this.internalDefaults) def getReferences(map: Map[String, Any]): Set[(String, String)] = (for ( diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortVcf.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortVcf.scala new file mode 100644 index 0000000000000000000000000000000000000000..eb36ba9faad4980d28df0308162237b655e7ad4c --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortVcf.scala @@ -0,0 +1,61 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.picard + +import java.io.File + +import nl.lumc.sasc.biopet.core.Reference +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +/** Extension for picard SortVcf */ +class SortVcf(val root: Configurable) extends Picard with Reference { + javaMainClass = new picard.vcf.SortVcf().getClass.getName + + @Input(doc = "Input VCF(s) to be sorted. Multiple inputs must have the same sample names (in order)", required = true) + var input: File = _ + + @Output(doc = "Output VCF to be written.", required = true) + var output: File = _ + + @Input(doc = "Sequence dictionary to use", required = true) + var sequenceDictionary: File = _ + + override val dictRequired = true + + override def beforeGraph(): Unit = { + super.beforeGraph() + if (sequenceDictionary == null) sequenceDictionary = referenceDict + } + + /** Returns command to execute */ + override def cmdLine = super.cmdLine + + (if (inputAsStdin) required("INPUT=", new File("/dev/stdin"), spaceSeparated = false) + else required("INPUT=", input, spaceSeparated = false)) + + (if (outputAsStsout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false) + else required("OUTPUT=", output, spaceSeparated = false)) + + required("SEQUENCE_DICTIONARY=", sequenceDictionary, spaceSeparated = false) +} + +object SortVcf { + /** Returns default SortSam */ + def apply(root: Configurable, input: File, output: File): SortVcf = { + val sortVcf = new SortVcf(root) + sortVcf.input = input + sortVcf.output = output + sortVcf + } +} \ No newline at end of file diff --git a/public/tinycap/src/test/resources/log4j.properties b/public/tinycap/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/public/tinycap/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file