Commit deab689c authored by bow's avatar bow
Browse files

Merge branch 'feature-gubbins' into 'develop'

Feature gubbins

For #30

See merge request !60
parents 801b72ed 7e8cd038
...@@ -4,8 +4,7 @@ import java.io.File ...@@ -4,8 +4,7 @@ import java.io.File
import nl.lumc.sasc.biopet.core.MultiSampleQScript import nl.lumc.sasc.biopet.core.MultiSampleQScript
import nl.lumc.sasc.biopet.core.PipelineCommand import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.Cat import nl.lumc.sasc.biopet.extensions.{ RunGubbins, Cat, Raxml }
import nl.lumc.sasc.biopet.extensions.Raxml
import nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline import nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline
import nl.lumc.sasc.biopet.tools.BastyGenerateFasta import nl.lumc.sasc.biopet.tools.BastyGenerateFasta
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
...@@ -57,13 +56,16 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript { ...@@ -57,13 +56,16 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
add(catConsensusVariantsSnps) add(catConsensusVariantsSnps)
val seed: Int = config("seed", default = 12345) val seed: Int = config("seed", default = 12345)
def addRaxml(input: File, outputDir: String, outputName: String) { def addTreeJobs(input: File, outputDir: String, outputName: String) {
val dirSufixRaxml = if (outputDir.endsWith(File.separator)) "raxml" else File.separator + "raxml"
val dirSufixGubbins = if (outputDir.endsWith(File.separator)) "gubbins" else File.separator + "gubbins"
val raxmlMl = new Raxml(this) val raxmlMl = new Raxml(this)
raxmlMl.input = input raxmlMl.input = input
raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX") raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlMl.p = seed raxmlMl.p = seed
raxmlMl.n = outputName + "_ml" raxmlMl.n = outputName + "_ml"
raxmlMl.w = outputDir raxmlMl.w = outputDir + dirSufixRaxml
raxmlMl.N = config("ml_runs", default = 20, submodule = "raxml") raxmlMl.N = config("ml_runs", default = 20, submodule = "raxml")
add(raxmlMl) add(raxmlMl)
...@@ -76,7 +78,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript { ...@@ -76,7 +78,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX") raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlBoot.p = seed raxmlBoot.p = seed
raxmlBoot.b = math.abs(r.nextInt) raxmlBoot.b = math.abs(r.nextInt)
raxmlBoot.w = outputDir raxmlBoot.w = outputDir + dirSufixRaxml
raxmlBoot.N = 1 raxmlBoot.N = 1
raxmlBoot.n = outputName + "_boot_" + t raxmlBoot.n = outputName + "_boot_" + t
add(raxmlBoot) add(raxmlBoot)
...@@ -94,11 +96,18 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript { ...@@ -94,11 +96,18 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
raxmlBi.p = seed raxmlBi.p = seed
raxmlBi.f = "b" raxmlBi.f = "b"
raxmlBi.n = outputName + "_bi" raxmlBi.n = outputName + "_bi"
raxmlBi.w = outputDir raxmlBi.w = outputDir + dirSufixRaxml
add(raxmlBi) add(raxmlBi)
val gubbins = new RunGubbins(this)
gubbins.fastafile = input
gubbins.startingTree = raxmlBi.getBipartitionsFile
gubbins.outputDirectory = outputDir + dirSufixGubbins
add(gubbins)
} }
addRaxml(catVariantsSnps.output, outputDir + "raxml", "snps") addTreeJobs(catVariantsSnps.output, outputDir + "trees" + File.separator + "snps_only", "snps_only")
addTreeJobs(catVariants.output, outputDir + "trees" + File.separator + "snps_indels", "snps_indels")
} }
// Called for each sample // Called for each sample
......
...@@ -146,8 +146,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr ...@@ -146,8 +146,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
add(vcfFilter) add(vcfFilter)
scriptOutput.rawFilterVcfFile = vcfFilter.outputVcf scriptOutput.rawFilterVcfFile = vcfFilter.outputVcf
} else if (rawVcfInput != null) scriptOutput.rawFilterVcfFile = rawVcfInput } else if (rawVcfInput != null) scriptOutput.rawFilterVcfFile = rawVcfInput
if (scriptOutput.rawFilterVcfFile == null) throw new IllegalStateException("Files can't be empty") if (scriptOutput.rawFilterVcfFile != null) mergBuffer += ("9.raw" -> scriptOutput.rawFilterVcfFile)
mergBuffer += ("9.raw" -> scriptOutput.rawFilterVcfFile)
} }
// Allele mode // Allele mode
......
...@@ -72,6 +72,7 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction { ...@@ -72,6 +72,7 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction {
def getBestTreeFile: File = new File(w + File.separator + "RAxML_bestTree." + n) def getBestTreeFile: File = new File(w + File.separator + "RAxML_bestTree." + n)
def getBootstrapFile: File = new File(w + File.separator + "RAxML_bootstrap." + n) def getBootstrapFile: File = new File(w + File.separator + "RAxML_bootstrap." + n)
def getBipartitionsFile: File = new File(w + File.separator + "RAxML_bipartitions." + n)
def getInfoFile: File = new File(w + File.separator + "RAxML_info." + n) def getInfoFile: File = new File(w + File.separator + "RAxML_info." + n)
def cmdLine = required(executable) + def cmdLine = required(executable) +
......
package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Contaminants", required = false)
var startingTree: File = config("starting_tree")
@Input(doc = "Fasta file", shortName = "FQ")
var fastafile: File = _
@Output(doc = "Output", shortName = "out")
var outputFiles: List[File] = Nil
@Argument(required = true)
var outputDirectory: String = _
executable = config("exe", default = "run_gubbins.py")
var outgroup: String = config("outgroup")
var filterPercentage: String = config("filter_percentage")
var treeBuilder: String = config("tree_builder")
var iterations: Option[Int] = config("iterations")
var minSnps: Option[Int] = config("min_snps")
var convergeMethod: String = config("converge_method")
var useTimeStamp: Boolean = config("use_time_stamp")
var prefix: String = config("prefix")
var verbose: Boolean = config("verbose")
var noCleanup: Boolean = config("no_cleanup")
override def afterGraph: Unit = {
super.afterGraph
jobLocalDir = new File(outputDirectory)
if (prefix == null) prefix = fastafile.getName
val out: List[String] = List(".recombination_predictions.embl",
".recombination_predictions.gff",
".branch_base_reconstruction.embl",
".summary_of_snp_distribution.vcf",
".per_branch_statistics.csv",
".filtered_polymorphic_sites.fasta",
".filtered_polymorphic_sites.phylip",
".final_tree.tre")
for (t <- out) outputFiles ::= new File(outputDirectory + File.separator + prefix + t)
}
def cmdLine = required("cd", outputDirectory) + " && " + required(executable) +
optional("--outgroup", outgroup) +
optional("--starting_tree", startingTree) +
optional("--filter_percentage", filterPercentage) +
optional("--tree_builder", treeBuilder) +
optional("--iterations", iterations) +
optional("--min_snps", minSnps) +
optional("--converge_method", convergeMethod) +
conditional(useTimeStamp, "--use_time_stamp") +
optional("--prefix", prefix) +
conditional(verbose, "--verbose") +
conditional(noCleanup, "--no_cleanup") +
required(fastafile)
}
...@@ -236,7 +236,7 @@ object BastyGenerateFasta extends ToolCommand { ...@@ -236,7 +236,7 @@ object BastyGenerateFasta extends ToolCommand {
protected def writeVariantsOnly() { protected def writeVariantsOnly() {
val writer = new PrintWriter(cmdArgs.outputVariants) val writer = new PrintWriter(cmdArgs.outputVariants)
writer.println(">" + cmdArgs.sampleName) writer.println(">" + cmdArgs.outputName)
val vcfReader = new VCFFileReader(cmdArgs.inputVcf, false) val vcfReader = new VCFFileReader(cmdArgs.inputVcf, false)
for (vcfRecord <- vcfReader if (!cmdArgs.snpsOnly || vcfRecord.isSNP)) yield { for (vcfRecord <- vcfReader if (!cmdArgs.snpsOnly || vcfRecord.isSNP)) yield {
writer.print(getMaxAllele(vcfRecord)) writer.print(getMaxAllele(vcfRecord))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment