diff --git a/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala b/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala index 87489074610658c0104673e76eb3066b73e5f658..c48621951c5d160e49e6a1f59e17397d048a7a2a 100644 --- a/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala +++ b/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala @@ -4,8 +4,7 @@ import java.io.File import nl.lumc.sasc.biopet.core.MultiSampleQScript import nl.lumc.sasc.biopet.core.PipelineCommand import nl.lumc.sasc.biopet.core.config.Configurable -import nl.lumc.sasc.biopet.extensions.Cat -import nl.lumc.sasc.biopet.extensions.Raxml +import nl.lumc.sasc.biopet.extensions.{ RunGubbins, Cat, Raxml } import nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline import nl.lumc.sasc.biopet.tools.BastyGenerateFasta import org.broadinstitute.gatk.queue.QScript @@ -57,13 +56,16 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript { add(catConsensusVariantsSnps) val seed: Int = config("seed", default = 12345) - def addRaxml(input: File, outputDir: String, outputName: String) { + def addTreeJobs(input: File, outputDir: String, outputName: String) { + val dirSufixRaxml = if (outputDir.endsWith(File.separator)) "raxml" else File.separator + "raxml" + val dirSufixGubbins = if (outputDir.endsWith(File.separator)) "gubbins" else File.separator + "gubbins" + val raxmlMl = new Raxml(this) raxmlMl.input = input raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX") raxmlMl.p = seed raxmlMl.n = outputName + "_ml" - raxmlMl.w = outputDir + raxmlMl.w = outputDir + dirSufixRaxml raxmlMl.N = config("ml_runs", default = 20, submodule = "raxml") add(raxmlMl) @@ -76,7 +78,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript { raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX") raxmlBoot.p = seed raxmlBoot.b = math.abs(r.nextInt) - raxmlBoot.w = outputDir + raxmlBoot.w = outputDir + dirSufixRaxml raxmlBoot.N = 1 raxmlBoot.n = outputName + "_boot_" + t add(raxmlBoot) @@ -94,11 +96,18 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript { raxmlBi.p = seed raxmlBi.f = "b" raxmlBi.n = outputName + "_bi" - raxmlBi.w = outputDir + raxmlBi.w = outputDir + dirSufixRaxml add(raxmlBi) + + val gubbins = new RunGubbins(this) + gubbins.fastafile = input + gubbins.startingTree = raxmlBi.getBipartitionsFile + gubbins.outputDirectory = outputDir + dirSufixGubbins + add(gubbins) } - addRaxml(catVariantsSnps.output, outputDir + "raxml", "snps") + addTreeJobs(catVariantsSnps.output, outputDir + "trees" + File.separator + "snps_only", "snps_only") + addTreeJobs(catVariants.output, outputDir + "trees" + File.separator + "snps_indels", "snps_indels") } // Called for each sample diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala index d34466a00a4745ff159e4585349e7f110c7c718d..5c883cde4809c2fff4f35dee914213f9a997ab6f 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala @@ -146,8 +146,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr add(vcfFilter) scriptOutput.rawFilterVcfFile = vcfFilter.outputVcf } else if (rawVcfInput != null) scriptOutput.rawFilterVcfFile = rawVcfInput - if (scriptOutput.rawFilterVcfFile == null) throw new IllegalStateException("Files can't be empty") - mergBuffer += ("9.raw" -> scriptOutput.rawFilterVcfFile) + if (scriptOutput.rawFilterVcfFile != null) mergBuffer += ("9.raw" -> scriptOutput.rawFilterVcfFile) } // Allele mode diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala index a18b2056e29f9b86cdb47b4b9b70fbfa25c8503c..db214a24b5b8f5f309dda9213795e058b242e0a3 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala @@ -72,6 +72,7 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction { def getBestTreeFile: File = new File(w + File.separator + "RAxML_bestTree." + n) def getBootstrapFile: File = new File(w + File.separator + "RAxML_bootstrap." + n) + def getBipartitionsFile: File = new File(w + File.separator + "RAxML_bipartitions." + n) def getInfoFile: File = new File(w + File.separator + "RAxML_info." + n) def cmdLine = required(executable) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala new file mode 100644 index 0000000000000000000000000000000000000000..acfcdb94a2e4cc00d30f632d0ee91884a679c2aa --- /dev/null +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala @@ -0,0 +1,63 @@ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +import nl.lumc.sasc.biopet.core.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } + +class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction { + + @Input(doc = "Contaminants", required = false) + var startingTree: File = config("starting_tree") + + @Input(doc = "Fasta file", shortName = "FQ") + var fastafile: File = _ + + @Output(doc = "Output", shortName = "out") + var outputFiles: List[File] = Nil + + @Argument(required = true) + var outputDirectory: String = _ + + executable = config("exe", default = "run_gubbins.py") + var outgroup: String = config("outgroup") + var filterPercentage: String = config("filter_percentage") + var treeBuilder: String = config("tree_builder") + var iterations: Option[Int] = config("iterations") + var minSnps: Option[Int] = config("min_snps") + var convergeMethod: String = config("converge_method") + var useTimeStamp: Boolean = config("use_time_stamp") + var prefix: String = config("prefix") + var verbose: Boolean = config("verbose") + var noCleanup: Boolean = config("no_cleanup") + + override def afterGraph: Unit = { + super.afterGraph + jobLocalDir = new File(outputDirectory) + if (prefix == null) prefix = fastafile.getName + val out: List[String] = List(".recombination_predictions.embl", + ".recombination_predictions.gff", + ".branch_base_reconstruction.embl", + ".summary_of_snp_distribution.vcf", + ".per_branch_statistics.csv", + ".filtered_polymorphic_sites.fasta", + ".filtered_polymorphic_sites.phylip", + ".final_tree.tre") + for (t <- out) outputFiles ::= new File(outputDirectory + File.separator + prefix + t) + } + + def cmdLine = required("cd", outputDirectory) + " && " + required(executable) + + optional("--outgroup", outgroup) + + optional("--starting_tree", startingTree) + + optional("--filter_percentage", filterPercentage) + + optional("--tree_builder", treeBuilder) + + optional("--iterations", iterations) + + optional("--min_snps", minSnps) + + optional("--converge_method", convergeMethod) + + conditional(useTimeStamp, "--use_time_stamp") + + optional("--prefix", prefix) + + conditional(verbose, "--verbose") + + conditional(noCleanup, "--no_cleanup") + + required(fastafile) +} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala index 552ab987dd4c0cc13149a019c02b991001bd4616..80dc052bcb6a0f942426d29f0838d038584d774e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala @@ -236,7 +236,7 @@ object BastyGenerateFasta extends ToolCommand { protected def writeVariantsOnly() { val writer = new PrintWriter(cmdArgs.outputVariants) - writer.println(">" + cmdArgs.sampleName) + writer.println(">" + cmdArgs.outputName) val vcfReader = new VCFFileReader(cmdArgs.inputVcf, false) for (vcfRecord <- vcfReader if (!cmdArgs.snpsOnly || vcfRecord.isSNP)) yield { writer.print(getMaxAllele(vcfRecord))