From 3c2f4a07686409a8984a8ded08654605748cdc4b Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Fri, 24 Jul 2015 11:33:51 +0200
Subject: [PATCH] Add ShivaSvCalling

---
 .../biopet/core/BiopetExecutablePublic.scala  |   3 +-
 .../pipelines/shiva/ShivaSvCalling.scala      | 149 ++++++++++++++++++
 2 files changed, 151 insertions(+), 1 deletion(-)
 create mode 100644 public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala

diff --git a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala
index a52f6ab3f..89a195b98 100644
--- a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala
+++ b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala
@@ -25,7 +25,8 @@ object BiopetExecutablePublic extends BiopetExecutable {
     nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig,
     nl.lumc.sasc.biopet.pipelines.carp.Carp,
     nl.lumc.sasc.biopet.pipelines.toucan.Toucan,
-    nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp
+    nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp,
+    nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling
   )
 
   def pipelines: List[MainCommand] = List(
diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala
new file mode 100644
index 000000000..3e4de155e
--- /dev/null
+++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala
@@ -0,0 +1,149 @@
+/**
+ * Biopet is built on top of GATK Queue for building bioinformatic
+ * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
+ * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
+ * should also be able to execute Biopet tools and pipelines.
+ *
+ * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
+ *
+ * Contact us at: sasc@lumc.nl
+ *
+ * A dual licensing mode is applied. The source code within this project that are
+ * not part of GATK Queue is freely available for non-commercial use under an AGPL
+ * license; For commercial users or users who do not want to follow the AGPL
+ * license, please contact us to obtain a separate license.
+ */
+package nl.lumc.sasc.biopet.pipelines.shiva
+
+import java.io.File
+
+import htsjdk.samtools.SamReaderFactory
+import nl.lumc.sasc.biopet.core.summary.SummaryQScript
+import nl.lumc.sasc.biopet.core.{PipelineCommand, BiopetQScript, Reference, SampleLibraryTag}
+import nl.lumc.sasc.biopet.extensions.breakdancer.Breakdancer
+import nl.lumc.sasc.biopet.extensions.clever.CleverCaller
+import nl.lumc.sasc.biopet.extensions.delly.Delly
+import org.broadinstitute.gatk.utils.commandline.Input
+import scala.collection.JavaConversions._
+
+/**
+ * Common trait for ShivaVariantcalling
+ *
+ * Created by pjvan_thof on 2/26/15.
+ */
+class ShivaSvCalling extends SummaryQScript with SampleLibraryTag with Reference {
+  qscript =>
+
+  @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
+  protected var inputBamsArg: List[File] = Nil
+
+  protected var inputBams: Map[String, File] = Map()
+
+  protected def addBamFile(file: File, sampleId: Option[String] = None): Unit = {
+    sampleId match {
+      case Some(sample) => inputBams += sample -> file
+      case _ if !file.exists() => throw new IllegalArgumentException("Bam file does not exits: " + file)
+      case _ => {
+        val inputSam = SamReaderFactory.makeDefault.open(file)
+        val samples = inputSam.getFileHeader.getReadGroups.map(_.getSample).distinct
+        if (samples.size == 1) {
+          inputBams += samples.head -> file
+        } else throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file)
+      }
+    }
+  }
+
+  /** Executed before script */
+  def init(): Unit = {
+    inputBamsArg.foreach(addBamFile(_))
+  }
+
+  /** Variantcallers requested by the config */
+  protected val configCallers: Set[String] = config("sv_callers")
+
+  /** This will add jobs for this pipeline */
+  def biopetScript(): Unit = {
+    for (cal <- configCallers) {
+      if (!callersList.exists(_.name == cal))
+        BiopetQScript.addError("variantcaller '" + cal + "' does not exist, possible to use: " + callersList.map(_.name).mkString(", "))
+    }
+
+    val callers = callersList.filter(x => configCallers.contains(x.name)).sortBy(_.prio)
+
+    require(inputBams.nonEmpty, "No input bams found")
+    require(callers.nonEmpty, "must select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", "))
+
+    addSummaryJobs()
+  }
+
+  /** Will generate all available variantcallers */
+  protected def callersList: List[SvCaller] = List(new Breakdancer, new Clever, new Delly)
+
+  /** General trait for a variantcaller mode */
+  trait SvCaller {
+    /** Name of mode, this should also be used in the config */
+    val name: String
+
+    /** Output dir for this mode */
+    def outputDir = new File(qscript.outputDir, name)
+
+    /** This should add the variantcaller jobs */
+    def addJobs()
+  }
+
+  /** default mode of freebayes */
+  class Breakdancer extends SvCaller {
+    val name = "breakdancer"
+
+    def addJobs() {
+      //TODO: move minipipeline of breakdancer to here
+      for ((sample, bamFile) <- inputBams) {
+        val breakdancerDir = new File(outputDir, sample)
+        val breakdancer = Breakdancer(qscript, bamFile, qscript.reference, breakdancerDir)
+        addAll(breakdancer.functions)
+      }
+    }
+  }
+
+  /** default mode of bcftools */
+  class Clever extends SvCaller {
+    val name = "clever"
+
+    def addJobs() {
+      //TODO: check double directories
+      for ((sample, bamFile) <- inputBams) {
+        val cleverDir = new File(outputDir, sampleId)
+        val clever = CleverCaller(qscript, bamFile, qscript.reference, cleverDir, cleverDir)
+        add(clever)
+      }
+    }
+  }
+
+  /** Makes a vcf file from a mpileup without statistics */
+  class Delly extends SvCaller {
+    val name = "delly"
+
+    def addJobs() {
+      //TODO: Move mini delly pipeline to here
+      for ((sample, bamFile) <- inputBams) {
+        val dellyDir = new File(outputDir, sampleId)
+        val delly = Delly(qscript, bamFile, dellyDir)
+        addAll(delly.functions)
+      }
+    }
+  }
+
+  /** Location of summary file */
+  def summaryFile = new File(outputDir, "ShivaSvCalling.summary.json")
+
+  /** Settings for the summary */
+  def summarySettings = Map("sv_callers" -> configCallers.toList)
+
+  /** Files for the summary */
+  def summaryFiles: Map[String, File] = {
+    val callers: Set[String] = config("sv_callers")
+    callersList.filter(x => callers.contains(x.name)).map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile)
+  }
+}
+
+object ShivaSvCalling extends PipelineCommand
\ No newline at end of file
-- 
GitLab