diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala new file mode 100644 index 0000000000000000000000000000000000000000..eceb5b60ff0b6d33d2102a11334cc3b646d1d61b --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala @@ -0,0 +1,179 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.pindel + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference, Version } +import nl.lumc.sasc.biopet.utils.Logging +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } + +/** + * Extension for pindel + * + * Based on version 0.2.5b8 + */ + +class Pindel(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version { + executable = config("exe", default = "pindel", freeVar = false) + + override def defaultCoreMemory = 3.0 + override def defaultThreads = 4 + + override def versionRegex = """Pindel version:? (.*)""".r + override def versionExitcode = List(1) + override def versionCommand = executable + + /** + * Required parameters + */ + var reference: File = referenceFasta + + @Input(doc = "Input specification for Pindel to use") + var input: File = _ + + @Argument(doc = "The pindel configuration file") + var pindel_file: Option[File] = _ + + @Argument(doc = "Configuration file with: bam-location/insert size/name") + var config_file: Option[File] = _ + + @Argument(doc = "Work directory") + var output_prefix: File = _ + + var RP: Option[Int] = config("RP") + var min_distance_to_the_end: Option[Int] = config("min_distance_to_the_end") + // var threads + var max_range_index: Option[Int] = config("max_range_index") + var window_size: Option[Int] = config("window_size") + var sequencing_error_rate: Option[Float] = config("sequencing_error_rate") + var sensitivity: Option[Float] = config("sensitivity") + + var maximum_allowed_mismatch_rate: Option[Float] = config("maximum_allowed_mismatch_rate") + var nm: Option[Int] = config("nm") + + var report_inversions: Boolean = config("report_inversions") + var report_duplications: Boolean = config("report_duplications") + var report_long_insertions: Boolean = config("report_long_insertions") + var report_breakpoints: Boolean = config("report_breakpoints") + var report_close_mapped_reads: Boolean = config("report_close_mapped_reads") + var report_only_close_mapped_reads: Boolean = config("report_only_close_mapped_reads") + var report_interchromosomal_events: Boolean = config("report_interchromosomal_events") + + var IndelCorrection: Boolean = config("IndelCorrection") + var NormalSamples: Boolean = config("NormalSamples") + + var breakdancer: Option[File] = config("breakdancer") + var include: Option[File] = config("include") + var exclude: Option[File] = config("exclude") + + var additional_mismatch: Option[Int] = config("additional_mismatch") + var min_perfect_match_around_BP: Option[Int] = config("min_perfect_match_around_BP") + var min_inversion_size: Option[Int] = config("min_inversion_size") + var min_num_matched_bases: Option[Int] = config("min_num_matched_bases") + var balance_cutoff: Option[Int] = config("balance_cutoff") + var anchor_quality: Option[Int] = config("anchor_quality") + var minimum_support_for_event: Option[Int] = config("minimum_support_for_event") + var input_SV_Calls_for_assembly: Option[File] = config("input_SV_Calls_for_assembly") + + var genotyping: Boolean = config("genotyping") + var output_of_breakdancer_events: Option[File] = config("output_of_breakdancer_events") + var name_of_logfile: Option[File] = config("name_of_logfile") + + var Ploidy: Option[File] = config("ploidy") + var detect_DD: Boolean = config("detect_DD") + + var MAX_DD_BREAKPOINT_DISTANCE: Option[Int] = config("MAX_DD_BREAKPOINT_DISTANCE") + var MAX_DISTANCE_CLUSTER_READS: Option[Int] = config("MAX_DISTANCE_CLUSTER_READS") + var MIN_DD_CLUSTER_SIZE: Option[Int] = config("MIN_DD_CLUSTER_SIZE") + var MIN_DD_BREAKPOINT_SUPPORT: Option[Int] = config("MIN_DD_BREAKPOINT_SUPPORT") + var MIN_DD_MAP_DISTANCE: Option[Int] = config("MIN_DD_MAP_DISTANCE") + var DD_REPORT_DUPLICATION_READS: Option[Int] = config("DD_REPORT_DUPLICATION_READS") + + override def beforeCmd(): Unit = { + // we should check whether the `pindel-config-file` is set or the `config-file` for the bam-list + // at least one of them should be set. + (pindel_file, config_file) match { + case (None, None) => Logging.addError("No pindel config is given") + case (Some(a), Some(b)) => Logging.addError(s"Please specify either a pindel config or bam-config. Not both for Pindel: $a or $b") + case (Some(a), None) => { + Logging.logger.info(s"Using $pindel_file as pindel config for Pindel") + input = a + } + case (None, Some(b)) => { + Logging.logger.info(s"Using $config_file as bam config for Pindel") + input = b + } + } + } + + def cmdLine = required(executable) + + required("--fasta ", reference) + + optional("--pindel-config-file", pindel_file) + + optional("--config-file", config_file) + + required("--output-prefix ", output_prefix) + + optional("--RP", RP) + + optional("--min_distance_to_the_end", min_distance_to_the_end) + + optional("--number_of_threads", threads) + + optional("--max_range_index", max_range_index) + + optional("--windows_size", window_size) + + optional("--sequencing_error_rate", sequencing_error_rate) + + optional("--sensitivity", sensitivity) + + optional("--maximum_allowed_mismatch_rate", maximum_allowed_mismatch_rate) + + optional("--NM", nm) + + conditional(report_inversions, "--report_inversions") + + conditional(report_duplications, "--report_duplications") + + conditional(report_long_insertions, "--report_long_insertions") + + conditional(report_breakpoints, "--report_breakpoints") + + conditional(report_close_mapped_reads, "--report_close_mapped_reads") + + conditional(report_only_close_mapped_reads, "--report_only_close_mapped_reads") + + conditional(report_interchromosomal_events, "--report_interchromosomal_events") + + conditional(IndelCorrection, "--IndelCorrection") + + conditional(NormalSamples, "--NormalSamples") + + optional("--breakdancer", breakdancer) + + optional("--include", include) + + optional("--exclude", exclude) + + optional("--additional_mismatch", additional_mismatch) + + optional("--min_perfect_match_around_BP", min_perfect_match_around_BP) + + optional("--min_inversion_size", min_inversion_size) + + optional("--min_num_matched_bases", min_num_matched_bases) + + optional("--balance_cutoff", balance_cutoff) + + optional("--anchor_quality", anchor_quality) + + optional("--minimum_support_for_event", minimum_support_for_event) + + optional("--input_SV_Calls_for_assembly", input_SV_Calls_for_assembly) + + conditional(genotyping, "-g") + + optional("--output_of_breakdancer_events", output_of_breakdancer_events) + + optional("--name_of_logfile", name_of_logfile) + + optional("--number_of_threads", threads) + + optional("--Ploidy", Ploidy) + + conditional(detect_DD, "detect_DD") + + optional("--MAX_DD_BREAKPOINT_DISTANCE", MAX_DD_BREAKPOINT_DISTANCE) + + optional("--MAX_DISTANCE_CLUSTER_READS", MAX_DISTANCE_CLUSTER_READS) + + optional("--MIN_DD_CLUSTER_SIZE", MIN_DD_CLUSTER_SIZE) + + optional("--MIN_DD_BREAKPOINT_SUPPORT", MIN_DD_BREAKPOINT_SUPPORT) + + optional("--MIN_DD_MAP_DISTANCE", MIN_DD_MAP_DISTANCE) + + optional("--DD_REPORT_DUPLICATION_READS", DD_REPORT_DUPLICATION_READS) + +} + +object Pindel { + def apply(root: Configurable, configFile: File, outputDir: File): Pindel = { + val caller = new Pindel(root) + caller.config_file = Some(configFile) + caller.output_prefix = outputDir + caller + } +} diff --git a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala similarity index 84% rename from public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala rename to public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala index 497fe21e342ed754e21f6be253b1ae5fdd0813fb..77ab79f1e513f087d967d72ce757f8a0d1b95853 100644 --- a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala @@ -17,7 +17,8 @@ package nl.lumc.sasc.biopet.extensions.pindel import java.io.File -import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, ToolCommand } +import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction +import nl.lumc.sasc.biopet.utils.ToolCommand import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } @@ -81,6 +82,15 @@ object PindelConfig extends ToolCommand { val input: File = commandArgs.inputbam + // the logic here is to pull the libraries stored in the bam file and output this to a pindel config file. + // see: http://gmt.genome.wustl.edu/packages/pindel/quick-start.html + // this is called bam-configuration file + /** + * filename<tab>avg insert size<tab>sample_label or name for reporting + * tumor_sample_1222.bam<tab>250<tab>TUMOR_1222 + * somatic_sample_1222.bam<tab>250<tab>HEALTHY_1222 + */ + } } diff --git a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelPipeline.scala similarity index 91% rename from public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala rename to public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelPipeline.scala index 10795533490ad7f0b696de22a64ca86d54a3492d..e5d276727eda9104fefebddc303747200415c354 100644 --- a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelPipeline.scala @@ -22,7 +22,7 @@ import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript /// Pindel is actually a mini pipeline executing binaries from the pindel package -class Pindel(val root: Configurable) extends QScript with BiopetQScript { +class PindelPipeline(val root: Configurable) extends QScript with BiopetQScript { def this() = this(null) @Input(doc = "Input file (bam)") @@ -60,7 +60,7 @@ class Pindel(val root: Configurable) extends QScript with BiopetQScript { add(cfg) val output: File = this.outputvcf - val pindel = PindelCaller(this, cfg.output, output) + val pindel = Pindel(this, cfg.output, output) add(pindel) outputFiles += ("pindel_tsv" -> pindel.output) @@ -72,9 +72,9 @@ class Pindel(val root: Configurable) extends QScript with BiopetQScript { // private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".pindel.tsv" } -object Pindel extends PipelineCommand { - def apply(root: Configurable, input: File, reference: File, runDir: String): Pindel = { - val pindel = new Pindel(root) +object PindelPipeline extends PipelineCommand { + def apply(root: Configurable, input: File, reference: File, runDir: String): PindelPipeline = { + val pindel = new PindelPipeline(root) pindel.input = input pindel.reference = reference pindel.workdir = runDir diff --git a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala b/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala deleted file mode 100644 index cbe957e79fe1c3e013fb399fe30859e144ae2ed5..0000000000000000000000000000000000000000 --- a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.pindel - -import java.io.File - -import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } - -class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction { - executable = config("exe", default = "pindel", freeVar = false) - - override def defaultCoreMemory = 5.0 - override def defaultThreads = 8 - - override def versionRegex = """Pindel version:? (.*)""".r - override def versionExitcode = List(1) - override def versionCommand = executable - - @Input(doc = "The pindel configuration file") - var input: File = _ - - @Input(doc = "Fasta reference") - var reference: File = config("reference") - - // this is a pointer to where the results files will be stored - // inside this directory, we can expect files named: - // <prefix>_D - // <prefix>_SI - // <prefix>_I - // <prefix>_TR - @Argument(doc = "Work directory") - var workdir: String = _ - - @Output(doc = "Pindel VCF output") - var output: File = _ - - var window_size: Option[Int] = config("window_size", default = 5) - - override def beforeCmd() { - } - - def cmdLine = required(executable) + - "-i " + required(input) + - "-f " + required(reference) + - "-o " + required(output) + - optional("-w", window_size) + - optional("-T", nCoresRequest) -} - -object PindelCaller { - def apply(root: Configurable, input: File, output: File): PindelCaller = { - val caller = new PindelCaller(root) - caller.input = input - caller.output = output - caller - } -}