diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala index c0046d0535ccb5c429b2fa8b5707868c74f71f41..a413c8bad83199c38a1b156a67118d2e7262b50c 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala @@ -83,13 +83,14 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => /** Set default output file, threads and vmem for current job */ final def internalBeforeGraph(): Unit = { - pipesJobs.foreach(_.beforeGraph()) - pipesJobs.foreach(_.internalBeforeGraph()) + _pipesJobs.foreach(_.beforeGraph()) + _pipesJobs.foreach(_.internalBeforeGraph()) } /** * Can override this value is executable may not be converted to CanonicalPath + * * @deprecated */ val executableToCanonicalPath = true @@ -121,6 +122,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => /** * This operator sends stdout to `that` and combine this into 1 command line function + * * @param that Function that will read from stdin * @return BiopetPipe function */ @@ -141,6 +143,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => /** * This operator can be used to give a program a file as stdin + * * @param file File that will become stdin for this program * @return It's own class */ @@ -152,6 +155,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => /** * This operator can be used to give a program a file write it's atdout + * * @param file File that will become stdout for this program * @return It's own class */ @@ -169,6 +173,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => /** * This function needs to be implemented to define the command that is executed + * * @return Command to run */ protected[core] def cmdLine: String @@ -176,6 +181,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => /** * implementing a final version of the commandLine from org.broadinstitute.gatk.queue.function.CommandLineFunction * User needs to implement cmdLine instead + * * @return Command to run */ override final def commandLine: String = { @@ -187,10 +193,11 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => cmd } - private[core] var pipesJobs: List[BiopetCommandLineFunction] = Nil + private[core] var _pipesJobs: List[BiopetCommandLineFunction] = Nil + def pipesJobs = _pipesJobs def addPipeJob(job: BiopetCommandLineFunction) { - pipesJobs :+= job - pipesJobs = pipesJobs.distinct + _pipesJobs :+= job + _pipesJobs = _pipesJobs.distinct } } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala index 8b4f1f801681f8ead4a58e2e02264acf8680c57d..287064130a8a055b7457a2e583517c8e60a1b5df 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala @@ -67,8 +67,8 @@ class BiopetFifoPipe(val root: Configurable, deps :::= inputs.values.toList.flatten.filter(!fifoFiles.contains(_)) deps = deps.distinct - pipesJobs :::= commands - pipesJobs = pipesJobs.distinct + _pipesJobs :::= commands + _pipesJobs = _pipesJobs.distinct } override def beforeCmd(): Unit = { diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala index f7be2d57f1c3cb6a724a9c89acb51592f8045432..af98f00969a0c0b0b77c59db903f7b3346ce3274 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala @@ -41,7 +41,7 @@ class BiopetPipe(val commands: List[BiopetCommandLineFunction]) extends BiopetCo case e: Exception => Nil } - pipesJobs :::= commands + _pipesJobs :::= commands override def beforeGraph() { super.beforeGraph() @@ -61,7 +61,7 @@ class BiopetPipe(val commands: List[BiopetCommandLineFunction]) extends BiopetCo } override def setResources(): Unit = { - combineResources(pipesJobs) + combineResources(_pipesJobs) } override def setupRetry(): Unit = { diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Freebayes.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Freebayes.scala index f2a387a08b50e37f44dcb21911acff65bab808c3..3cf3b47d5c598644154c363a2b036423c49c048e 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Freebayes.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Freebayes.scala @@ -37,7 +37,92 @@ class Freebayes(val root: Configurable) extends BiopetCommandLineFunction with R @Output(required = true) var outputVcf: File = null + @Input(required = false) + var bam_list: Option[File] = config("bam_list") + + @Input(required = false) + var targets: Option[File] = config("targets") + + @Input(required = false) + var samples: Option[File] = config("samples") + + @Input(required = false) + var populations: Option[File] = config("populations") + + @Input(required = false) + var cnv_map: Option[File] = config("cnv_map") + + @Input(required = false) + var trace: Option[File] = config("trace") + + @Input(required = false) + var failed_alleles: Option[File] = config("failed_alleles") + + @Input(required = false) + var observation_bias: Option[File] = config("observation_bias") + + @Input(required = false) + var contamination_estimates: Option[File] = config("contamination_estimates") + + @Input(required = false) + var variant_input: Option[File] = config("variant_input") + + @Input(required = false) + var haplotype_basis_alleles: Option[File] = config("haplotype_basis_alleles") + + var pvar: Option[Int] = config("pvar") + var theta: Option[Int] = config("theta") var ploidy: Option[Int] = config("ploidy") + var use_best_n_alleles: Option[Int] = config("use_best_n_alleles") + var max_complex_gap: Option[Int] = config("max_complex_gap") + var min_repeat_size: Option[Int] = config("min_repeat_size") + var min_repeat_entropy: Option[Int] = config("min_repeat_entropy") + var read_mismatch_limit: Option[Int] = config("read_mismatch_limit") + var read_max_mismatch_fraction: Option[Int] = config("read_max_mismatch_fraction") + var read_snp_limit: Option[Int] = config("read_snp_limit") + var read_indel_limit: Option[Int] = config("read_indel_limit") + var min_alternate_fraction: Option[Double] = config("min_alternate_fraction") + var min_alternate_count: Option[Int] = config("min_alternate_count") + var min_alternate_qsum: Option[Int] = config("min_alternate_qsum") + var min_alternate_total: Option[Int] = config("min_alternate_total") + var min_coverage: Option[Int] = config("min_coverage") + var genotyping_max_iterations: Option[Int] = config("genotyping_max_iterations") + var genotyping_max_banddepth: Option[Int] = config("genotyping_max_banddepth") + var genotype_variant_threshold: Option[Int] = config("genotype_variant_threshold") + var read_dependence_factor: Option[Int] = config("read_dependence_factor") + var min_mapping_quality: Option[Double] = config("min_mapping_quality") + var min_base_quality: Option[Double] = config("min_base_quality") + var min_supporting_allele_qsum: Option[Double] = config("min_supporting_allele_qsum") + var min_supporting_mapping_qsum: Option[Double] = config("min_supporting_mapping_qsum") + var mismatch_base_quality_threshold: Option[Double] = config("mismatch_base_quality_threshold") + var base_quality_cap: Option[Double] = config("base_quality_cap") + var prob_contamination: Option[Double] = config("prob_contamination") + var only_use_input_alleles: Boolean = config("only_use_input_alleles", default = false) + var report_all_haplotype_alleles: Boolean = config("report_all_haplotype_alleles", default = false) + var report_monomorphic: Boolean = config("report_monomorphic", default = false) + var pooled_discrete: Boolean = config("pooled_discrete", default = false) + var pooled_continuous: Boolean = config("pooled_continuous", default = false) + var use_reference_allele: Boolean = config("use_reference_allele", default = false) + var no_snps: Boolean = config("no_snps", default = false) + var no_indels: Boolean = config("no_indels", default = false) + var no_mnps: Boolean = config("no_mnps", default = false) + var no_complex: Boolean = config("no_complex", default = false) + var no_partial_observations: Boolean = config("no_partial_observations", default = false) + var dont_left_align_indels: Boolean = config("dont_left_align_indels", default = false) + var use_duplicate_reads: Boolean = config("use_duplicate_reads", default = false) + var standard_filters: Boolean = config("standard_filters", default = false) + var no_population_priors: Boolean = config("no_population_priors", default = false) + var hwe_priors_off: Boolean = config("hwe_priors_off", default = false) + var binomial_obs_priors_off: Boolean = config("binomial_obs_priors_off", default = false) + var allele_balance_priors_off: Boolean = config("allele_balance_priors_off", default = false) + var legacy_gls: Boolean = config("legacy_gls", default = false) + var report_genotype_likelihood_max: Boolean = config("report_genotype_likelihood_max", default = false) + var exclude_unobserved_genotypes: Boolean = config("exclude_unobserved_genotypes", default = false) + var use_mapping_quality: Boolean = config("use_mapping_quality", default = false) + var harmonic_indel_quality: Boolean = config("harmonic_indel_quality", default = false) + var genotype_qualities: Boolean = config("genotype_qualities", default = false) + var debug: Boolean = config("debug", default = logger.isDebugEnabled) + var haplotypeLength: Option[Int] = config("haplotype_length") executable = config("exe", default = "freebayes") @@ -52,7 +137,70 @@ class Freebayes(val root: Configurable) extends BiopetCommandLineFunction with R def cmdLine = executable + required("--fasta-reference", reference) + repeat("--bam", bamfiles) + - optional("--vcf", outputVcf) + + optional("--bam-list", bam_list) + + optional("--targets", targets) + + optional("--samples", samples) + + optional("--populations", populations) + + optional("--cnv-map", cnv_map) + + optional("--trace", trace) + + optional("--failed-alleles", failed_alleles) + + optional("--observation-bias", observation_bias) + + optional("--contamination-estimates", contamination_estimates) + + optional("--variant-input", variant_input) + + optional("--haplotype-basis-alleles", haplotype_basis_alleles) + + optional("--pvar", pvar) + + optional("--theta", theta) + optional("--ploidy", ploidy) + - optional("--haplotype-length", haplotypeLength) + optional("--use-best-n-alleles", use_best_n_alleles) + + optional("--max-complex-gap", max_complex_gap) + + optional("--min-repeat-size", min_repeat_size) + + optional("--min-repeat-entropy", min_repeat_entropy) + + optional("--read-mismatch-limit", read_mismatch_limit) + + optional("--read-max-mismatch-fraction", read_max_mismatch_fraction) + + optional("--read-snp-limit", read_snp_limit) + + optional("--read-indel-limit", read_indel_limit) + + optional("--min-alternate-fraction", min_alternate_fraction) + + optional("--min-alternate-count", min_alternate_count) + + optional("--min-alternate-qsum", min_alternate_qsum) + + optional("--min-alternate-total", min_alternate_total) + + optional("--min-coverage", min_coverage) + + optional("--genotyping-max-iterations", genotyping_max_iterations) + + optional("--genotyping-max-banddepth", genotyping_max_banddepth) + + optional("--genotype-variant-threshold", genotype_variant_threshold) + + optional("--read-dependence-factor", read_dependence_factor) + + optional("--min-mapping-quality", min_mapping_quality) + + optional("--min-base-quality", min_base_quality) + + optional("--min-supporting-allele-qsum", min_supporting_allele_qsum) + + optional("--min-supporting-mapping-qsum", min_supporting_mapping_qsum) + + optional("--mismatch-base-quality-threshold", mismatch_base_quality_threshold) + + optional("--base-quality-cap", base_quality_cap) + + optional("--prob-contamination", prob_contamination) + + conditional(only_use_input_alleles, "--only-use-input-alleles") + + conditional(report_all_haplotype_alleles, "--report-all-haplotype-alleles") + + conditional(report_monomorphic, "--report-monomorphic") + + conditional(pooled_discrete, "--pooled-discrete") + + conditional(pooled_continuous, "--pooled-continuous") + + conditional(use_reference_allele, "--use-reference-allele") + + conditional(no_snps, "--no-snps") + + conditional(no_indels, "--no-indels") + + conditional(no_mnps, "--no-mnps") + + conditional(no_complex, "--no-complex") + + conditional(no_partial_observations, "--no-partial-observations") + + conditional(dont_left_align_indels, "--dont-left-align-indels") + + conditional(use_duplicate_reads, "--use-duplicate-reads") + + conditional(standard_filters, "--standard-filters") + + conditional(no_population_priors, "--no-population-priors") + + conditional(hwe_priors_off, "--hwe-priors-off") + + conditional(binomial_obs_priors_off, "--binomial-obs-priors-off") + + conditional(allele_balance_priors_off, "--allele-balance-priors-off") + + conditional(legacy_gls, "--legacy-gls") + + conditional(report_genotype_likelihood_max, "--report-genotype-likelihood-max") + + conditional(exclude_unobserved_genotypes, "--exclude-unobserved-genotypes") + + conditional(use_mapping_quality, "--use-mapping-quality") + + conditional(harmonic_indel_quality, "--harmonic-indel-quality") + + conditional(genotype_qualities, "--genotype-qualities") + + conditional(debug, "--debug") + + optional("--haplotype-length", haplotypeLength) + + (if (inputAsStdin) required("--stdin") else "") + + (if (outputAsStsout) "" else optional("--vcf", outputVcf)) } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala index 97b4c74134a59ce94fea03ce0e843a5b6ee3adbc..fad98dabc9fbfba859c5d33c8dc933ed9f3875be 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala @@ -148,6 +148,8 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu // ought to be a flag, but is BUG in VEP; becomes numeric ("1" is true) var failed: Option[Int] = config("failed") + override def defaultCoreMemory = 4.0 + override def beforeGraph(): Unit = { super.beforeGraph() if (!cache && !database) { @@ -155,6 +157,7 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu } else if (cache && dir.isEmpty) { Logging.addError("Must supply dir to cache for VariantEffectPredictor") } + if (stats_text) outputFiles :+= new File(output.getAbsolutePath + "_summary.txt") } /** Returns command to execute */ diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala index e4470ffec4f37c3b2c71a0b88f095cefb079741e..2316db57e4fd9d7c7b1d591d6c61d4e2b9e48f72 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala @@ -53,7 +53,7 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers def versionCommand = executable + " --version" - override def defaultCoreMemory = 15.0 + override def defaultCoreMemory = 17.0 override def defaultThreads = 4 diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickClosedReferenceOtus.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickClosedReferenceOtus.scala index 265a6d21f941bfae7bc3c6b7c742993b9638e6c3..49181292137d86e2f67d3b2aa4f7b1bd905cb0e1 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickClosedReferenceOtus.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/qiime/PickClosedReferenceOtus.scala @@ -17,8 +17,8 @@ class PickClosedReferenceOtus(val root: Configurable) extends BiopetCommandLineF var outputDir: File = null - override def defaultThreads = 2 - override def defaultCoreMemory = 10.0 + override def defaultThreads = 3 + override def defaultCoreMemory = 12.0 def versionCommand = executable + " --version" def versionRegex = """Version: (.*)""".r diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala index 3227c2dc2fc845fdeb18f3915ae4c692369fdcca..d276abc7c940b3c70e589b3888b544dc4d19a490 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala @@ -14,15 +14,8 @@ class Freebayes(val root: Configurable) extends Variantcaller { val fb = new nl.lumc.sasc.biopet.extensions.Freebayes(this) fb.bamfiles = inputBams.values.toList fb.outputVcf = new File(outputDir, namePrefix + ".freebayes.vcf") - fb.isIntermediate = true - add(fb) + add(fb | new Bgzip(this) > outputFile) - //TODO: need piping for this, see also issue #114 - val bz = new Bgzip(this) - bz.input = List(fb.outputVcf) - bz.output = outputFile - add(bz) - - add(Tabix.apply(this, bz.output)) + add(Tabix.apply(this, outputFile)) } } diff --git a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala index 44c3d60cf762325cbbbe8207b9b7a977a0398791..b1699c7b97132b1df20c88d49c6202164fa89227 100644 --- a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala +++ b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala @@ -18,16 +18,17 @@ package nl.lumc.sasc.biopet.pipelines.shiva import java.io.{ File, FileOutputStream } import com.google.common.io.Files -import nl.lumc.sasc.biopet.utils.config.Config +import nl.lumc.sasc.biopet.core.BiopetPipe import nl.lumc.sasc.biopet.extensions.Freebayes +import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge } import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants -import nl.lumc.sasc.biopet.extensions.tools.VcfFilter +import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter } import nl.lumc.sasc.biopet.utils.ConfigUtils -import org.apache.commons.io.FileUtils +import nl.lumc.sasc.biopet.utils.config.Config import org.broadinstitute.gatk.queue.QSettings import org.scalatest.Matchers import org.scalatest.testng.TestNGSuite -import org.testng.annotations.{ AfterClass, DataProvider, Test } +import org.testng.annotations.{ DataProvider, Test } import scala.collection.mutable.ListBuffer @@ -88,11 +89,13 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { pipeline.init() pipeline.script() + val pipesJobs = pipeline.functions.filter(_.isInstanceOf[BiopetPipe]).flatMap(_.asInstanceOf[BiopetPipe].pipesJobs) + pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe (1 + (if (raw) 1 else 0) + (if (varscanCnsSinglesample) 1 else 0)) - //pipeline.functions.count(_.isInstanceOf[Bcftools]) shouldBe (if (bcftools) 1 else 0) - //FIXME: Can not check for bcftools because of piping - pipeline.functions.count(_.isInstanceOf[Freebayes]) shouldBe (if (freebayes) 1 else 0) - //pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0) + pipesJobs.count(_.isInstanceOf[BcftoolsCall]) shouldBe (if (bcftools) 1 else 0) + (if (bcftoolsSinglesample) bams else 0) + pipeline.functions.count(_.isInstanceOf[BcftoolsMerge]) shouldBe (if (bcftoolsSinglesample && bams > 1) 1 else 0) + pipesJobs.count(_.isInstanceOf[Freebayes]) shouldBe (if (freebayes) 1 else 0) + pipesJobs.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0) pipeline.functions.count(_.isInstanceOf[VcfFilter]) shouldBe (if (raw) bams else 0) } }