From 7213a031e6d8615f56446803b9beb203d00e6170 Mon Sep 17 00:00:00 2001
From: Wai Yi Leung <w.y.leung@lumc.nl>
Date: Tue, 12 Jan 2016 16:50:19 +0100
Subject: [PATCH] Adding the pindel wrapper to the extensions. Updated to
 support v0.2.5b8

---
 .../biopet/extensions/pindel/Pindel.scala     | 179 ++++++++++++++++++
 .../extensions/pindel/PindelConfig.scala      |  12 +-
 .../extensions/pindel/PindelPipeline.scala}   |  10 +-
 .../extensions/pindel/PindelCaller.scala      |  72 -------
 4 files changed, 195 insertions(+), 78 deletions(-)
 create mode 100644 public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala
 rename public/{yamsvp/src_old => biopet-extensions/src}/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala (84%)
 rename public/{yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala => biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelPipeline.scala} (91%)
 delete mode 100644 public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala

diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala
new file mode 100644
index 000000000..eceb5b60f
--- /dev/null
+++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala
@@ -0,0 +1,179 @@
+/**
+ * Biopet is built on top of GATK Queue for building bioinformatic
+ * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
+ * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
+ * should also be able to execute Biopet tools and pipelines.
+ *
+ * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
+ *
+ * Contact us at: sasc@lumc.nl
+ *
+ * A dual licensing mode is applied. The source code within this project that are
+ * not part of GATK Queue is freely available for non-commercial use under an AGPL
+ * license; For commercial users or users who do not want to follow the AGPL
+ * license, please contact us to obtain a separate license.
+ */
+package nl.lumc.sasc.biopet.extensions.pindel
+
+import java.io.File
+
+import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference, Version }
+import nl.lumc.sasc.biopet.utils.Logging
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
+
+/**
+ * Extension for pindel
+ *
+ * Based on version 0.2.5b8
+ */
+
+class Pindel(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
+  executable = config("exe", default = "pindel", freeVar = false)
+
+  override def defaultCoreMemory = 3.0
+  override def defaultThreads = 4
+
+  override def versionRegex = """Pindel version:? (.*)""".r
+  override def versionExitcode = List(1)
+  override def versionCommand = executable
+
+  /**
+   * Required parameters
+   */
+  var reference: File = referenceFasta
+
+  @Input(doc = "Input specification for Pindel to use")
+  var input: File = _
+
+  @Argument(doc = "The pindel configuration file")
+  var pindel_file: Option[File] = _
+
+  @Argument(doc = "Configuration file with: bam-location/insert size/name")
+  var config_file: Option[File] = _
+
+  @Argument(doc = "Work directory")
+  var output_prefix: File = _
+
+  var RP: Option[Int] = config("RP")
+  var min_distance_to_the_end: Option[Int] = config("min_distance_to_the_end")
+  // var threads
+  var max_range_index: Option[Int] = config("max_range_index")
+  var window_size: Option[Int] = config("window_size")
+  var sequencing_error_rate: Option[Float] = config("sequencing_error_rate")
+  var sensitivity: Option[Float] = config("sensitivity")
+
+  var maximum_allowed_mismatch_rate: Option[Float] = config("maximum_allowed_mismatch_rate")
+  var nm: Option[Int] = config("nm")
+
+  var report_inversions: Boolean = config("report_inversions")
+  var report_duplications: Boolean = config("report_duplications")
+  var report_long_insertions: Boolean = config("report_long_insertions")
+  var report_breakpoints: Boolean = config("report_breakpoints")
+  var report_close_mapped_reads: Boolean = config("report_close_mapped_reads")
+  var report_only_close_mapped_reads: Boolean = config("report_only_close_mapped_reads")
+  var report_interchromosomal_events: Boolean = config("report_interchromosomal_events")
+
+  var IndelCorrection: Boolean = config("IndelCorrection")
+  var NormalSamples: Boolean = config("NormalSamples")
+
+  var breakdancer: Option[File] = config("breakdancer")
+  var include: Option[File] = config("include")
+  var exclude: Option[File] = config("exclude")
+
+  var additional_mismatch: Option[Int] = config("additional_mismatch")
+  var min_perfect_match_around_BP: Option[Int] = config("min_perfect_match_around_BP")
+  var min_inversion_size: Option[Int] = config("min_inversion_size")
+  var min_num_matched_bases: Option[Int] = config("min_num_matched_bases")
+  var balance_cutoff: Option[Int] = config("balance_cutoff")
+  var anchor_quality: Option[Int] = config("anchor_quality")
+  var minimum_support_for_event: Option[Int] = config("minimum_support_for_event")
+  var input_SV_Calls_for_assembly: Option[File] = config("input_SV_Calls_for_assembly")
+
+  var genotyping: Boolean = config("genotyping")
+  var output_of_breakdancer_events: Option[File] = config("output_of_breakdancer_events")
+  var name_of_logfile: Option[File] = config("name_of_logfile")
+
+  var Ploidy: Option[File] = config("ploidy")
+  var detect_DD: Boolean = config("detect_DD")
+
+  var MAX_DD_BREAKPOINT_DISTANCE: Option[Int] = config("MAX_DD_BREAKPOINT_DISTANCE")
+  var MAX_DISTANCE_CLUSTER_READS: Option[Int] = config("MAX_DISTANCE_CLUSTER_READS")
+  var MIN_DD_CLUSTER_SIZE: Option[Int] = config("MIN_DD_CLUSTER_SIZE")
+  var MIN_DD_BREAKPOINT_SUPPORT: Option[Int] = config("MIN_DD_BREAKPOINT_SUPPORT")
+  var MIN_DD_MAP_DISTANCE: Option[Int] = config("MIN_DD_MAP_DISTANCE")
+  var DD_REPORT_DUPLICATION_READS: Option[Int] = config("DD_REPORT_DUPLICATION_READS")
+
+  override def beforeCmd(): Unit = {
+    // we should check whether the `pindel-config-file` is set or the `config-file` for the bam-list
+    // at least one of them should be set.
+    (pindel_file, config_file) match {
+      case (None, None)       => Logging.addError("No pindel config is given")
+      case (Some(a), Some(b)) => Logging.addError(s"Please specify either a pindel config or bam-config. Not both for Pindel: $a or $b")
+      case (Some(a), None) => {
+        Logging.logger.info(s"Using $pindel_file as pindel config for Pindel")
+        input = a
+      }
+      case (None, Some(b)) => {
+        Logging.logger.info(s"Using $config_file as bam config for Pindel")
+        input = b
+      }
+    }
+  }
+
+  def cmdLine = required(executable) +
+    required("--fasta ", reference) +
+    optional("--pindel-config-file", pindel_file) +
+    optional("--config-file", config_file) +
+    required("--output-prefix ", output_prefix) +
+    optional("--RP", RP) +
+    optional("--min_distance_to_the_end", min_distance_to_the_end) +
+    optional("--number_of_threads", threads) +
+    optional("--max_range_index", max_range_index) +
+    optional("--windows_size", window_size) +
+    optional("--sequencing_error_rate", sequencing_error_rate) +
+    optional("--sensitivity", sensitivity) +
+    optional("--maximum_allowed_mismatch_rate", maximum_allowed_mismatch_rate) +
+    optional("--NM", nm) +
+    conditional(report_inversions, "--report_inversions") +
+    conditional(report_duplications, "--report_duplications") +
+    conditional(report_long_insertions, "--report_long_insertions") +
+    conditional(report_breakpoints, "--report_breakpoints") +
+    conditional(report_close_mapped_reads, "--report_close_mapped_reads") +
+    conditional(report_only_close_mapped_reads, "--report_only_close_mapped_reads") +
+    conditional(report_interchromosomal_events, "--report_interchromosomal_events") +
+    conditional(IndelCorrection, "--IndelCorrection") +
+    conditional(NormalSamples, "--NormalSamples") +
+    optional("--breakdancer", breakdancer) +
+    optional("--include", include) +
+    optional("--exclude", exclude) +
+    optional("--additional_mismatch", additional_mismatch) +
+    optional("--min_perfect_match_around_BP", min_perfect_match_around_BP) +
+    optional("--min_inversion_size", min_inversion_size) +
+    optional("--min_num_matched_bases", min_num_matched_bases) +
+    optional("--balance_cutoff", balance_cutoff) +
+    optional("--anchor_quality", anchor_quality) +
+    optional("--minimum_support_for_event", minimum_support_for_event) +
+    optional("--input_SV_Calls_for_assembly", input_SV_Calls_for_assembly) +
+    conditional(genotyping, "-g") +
+    optional("--output_of_breakdancer_events", output_of_breakdancer_events) +
+    optional("--name_of_logfile", name_of_logfile) +
+    optional("--number_of_threads", threads) +
+    optional("--Ploidy", Ploidy) +
+    conditional(detect_DD, "detect_DD") +
+    optional("--MAX_DD_BREAKPOINT_DISTANCE", MAX_DD_BREAKPOINT_DISTANCE) +
+    optional("--MAX_DISTANCE_CLUSTER_READS", MAX_DISTANCE_CLUSTER_READS) +
+    optional("--MIN_DD_CLUSTER_SIZE", MIN_DD_CLUSTER_SIZE) +
+    optional("--MIN_DD_BREAKPOINT_SUPPORT", MIN_DD_BREAKPOINT_SUPPORT) +
+    optional("--MIN_DD_MAP_DISTANCE", MIN_DD_MAP_DISTANCE) +
+    optional("--DD_REPORT_DUPLICATION_READS", DD_REPORT_DUPLICATION_READS) +
+}
+
+object Pindel {
+  def apply(root: Configurable, configFile: File, outputDir: File): Pindel = {
+    val caller = new Pindel(root)
+    caller.config_file = Some(configFile)
+    caller.output_prefix = outputDir
+    caller
+  }
+}
diff --git a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala
similarity index 84%
rename from public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala
rename to public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala
index 497fe21e3..77ab79f1e 100644
--- a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala
+++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala
@@ -17,7 +17,8 @@ package nl.lumc.sasc.biopet.extensions.pindel
 
 import java.io.File
 
-import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, ToolCommand }
+import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
+import nl.lumc.sasc.biopet.utils.ToolCommand
 import nl.lumc.sasc.biopet.utils.config.Configurable
 import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
 
@@ -81,6 +82,15 @@ object PindelConfig extends ToolCommand {
 
     val input: File = commandArgs.inputbam
 
+    // the logic here is to pull the libraries stored in the bam file and output this to a pindel config file.
+    // see: http://gmt.genome.wustl.edu/packages/pindel/quick-start.html
+    // this is called bam-configuration file
+    /**
+     * filename<tab>avg insert size<tab>sample_label or name for reporting
+     * tumor_sample_1222.bam<tab>250<tab>TUMOR_1222
+     * somatic_sample_1222.bam<tab>250<tab>HEALTHY_1222
+     */
+
   }
 }
 
diff --git a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelPipeline.scala
similarity index 91%
rename from public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala
rename to public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelPipeline.scala
index 107955334..e5d276727 100644
--- a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala
+++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelPipeline.scala
@@ -22,7 +22,7 @@ import nl.lumc.sasc.biopet.utils.config.Configurable
 import org.broadinstitute.gatk.queue.QScript
 
 /// Pindel is actually a mini pipeline executing binaries from the pindel package
-class Pindel(val root: Configurable) extends QScript with BiopetQScript {
+class PindelPipeline(val root: Configurable) extends QScript with BiopetQScript {
   def this() = this(null)
 
   @Input(doc = "Input file (bam)")
@@ -60,7 +60,7 @@ class Pindel(val root: Configurable) extends QScript with BiopetQScript {
     add(cfg)
 
     val output: File = this.outputvcf
-    val pindel = PindelCaller(this, cfg.output, output)
+    val pindel = Pindel(this, cfg.output, output)
     add(pindel)
     outputFiles += ("pindel_tsv" -> pindel.output)
 
@@ -72,9 +72,9 @@ class Pindel(val root: Configurable) extends QScript with BiopetQScript {
   //  private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".pindel.tsv"
 }
 
-object Pindel extends PipelineCommand {
-  def apply(root: Configurable, input: File, reference: File, runDir: String): Pindel = {
-    val pindel = new Pindel(root)
+object PindelPipeline extends PipelineCommand {
+  def apply(root: Configurable, input: File, reference: File, runDir: String): PindelPipeline = {
+    val pindel = new PindelPipeline(root)
     pindel.input = input
     pindel.reference = reference
     pindel.workdir = runDir
diff --git a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala b/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala
deleted file mode 100644
index cbe957e79..000000000
--- a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Biopet is built on top of GATK Queue for building bioinformatic
- * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
- * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
- * should also be able to execute Biopet tools and pipelines.
- *
- * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
- *
- * Contact us at: sasc@lumc.nl
- *
- * A dual licensing mode is applied. The source code within this project that are
- * not part of GATK Queue is freely available for non-commercial use under an AGPL
- * license; For commercial users or users who do not want to follow the AGPL
- * license, please contact us to obtain a separate license.
- */
-package nl.lumc.sasc.biopet.extensions.pindel
-
-import java.io.File
-
-import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
-
-class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction {
-  executable = config("exe", default = "pindel", freeVar = false)
-
-  override def defaultCoreMemory = 5.0
-  override def defaultThreads = 8
-
-  override def versionRegex = """Pindel version:? (.*)""".r
-  override def versionExitcode = List(1)
-  override def versionCommand = executable
-
-  @Input(doc = "The pindel configuration file")
-  var input: File = _
-
-  @Input(doc = "Fasta reference")
-  var reference: File = config("reference")
-
-  // this is a pointer to where the results files will be stored
-  // inside this directory, we can expect files named:
-  // <prefix>_D
-  // <prefix>_SI
-  // <prefix>_I
-  // <prefix>_TR
-  @Argument(doc = "Work directory")
-  var workdir: String = _
-
-  @Output(doc = "Pindel VCF output")
-  var output: File = _
-
-  var window_size: Option[Int] = config("window_size", default = 5)
-
-  override def beforeCmd() {
-  }
-
-  def cmdLine = required(executable) +
-    "-i " + required(input) +
-    "-f " + required(reference) +
-    "-o " + required(output) +
-    optional("-w", window_size) +
-    optional("-T", nCoresRequest)
-}
-
-object PindelCaller {
-  def apply(root: Configurable, input: File, output: File): PindelCaller = {
-    val caller = new PindelCaller(root)
-    caller.input = input
-    caller.output = output
-    caller
-  }
-}
-- 
GitLab