From 5164aa32b8dff52bd545cd5e35bdc9bdfa2b6c78 Mon Sep 17 00:00:00 2001
From: Wai Yi Leung <w.y.leung@lumc.nl>
Date: Fri, 29 Jan 2016 14:45:54 +0100
Subject: [PATCH] Adding samplingSize to BamUtils - estimateInsertSize

---
 .../nl/lumc/sasc/biopet/utils/BamUtils.scala  |  8 +++----
 .../pipelines/shiva/svcallers/Pindel.scala    | 21 ++++---------------
 2 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala
index 7b7e2ee02..56e42f1ce 100644
--- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala
+++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala
@@ -67,10 +67,10 @@ object BamUtils {
    * @param bamFile bamfile to estimate avg insertsize from
    * @return
    */
-  def sampleBamInsertSize(bamFile: File): Int = {
+  def sampleBamInsertSize(bamFile: File, samplingSize: Int = 100000): Int = {
     val inputSam: SamReader = SamReaderFactory.makeDefault.open(bamFile)
     val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.par.map({
-      contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, 1, contig.getSequenceLength)
+      contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, 1, contig.getSequenceLength, samplingSize)
     }).toList
     val counts = baminsertsizes.flatMap(x => x)
     val sum = counts.reduceLeft(_ + _)
@@ -84,8 +84,8 @@ object BamUtils {
    * @param bamFiles input bam files
    * @return
    */
-  def sampleBamInsertSize(bamFiles: List[File]): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile =>
-    bamFile -> sampleBamInsertSize(bamFile)
+  def sampleBamsInsertSize(bamFiles: List[File], samplingSize: Int = 100000): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile =>
+    bamFile -> sampleBamInsertSize(bamFile, samplingSize)
   }.toMap
 
 }
diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala
index a452c0fee..44393b3fb 100644
--- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala
+++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala
@@ -15,11 +15,9 @@
  */
 package nl.lumc.sasc.biopet.pipelines.shiva.svcallers
 
-import java.io.File
 import java.text.SimpleDateFormat
 import java.util.Calendar
 
-import nl.lumc.sasc.biopet.core.PipelineCommand
 import nl.lumc.sasc.biopet.extensions.pindel._
 import nl.lumc.sasc.biopet.utils.BamUtils
 import nl.lumc.sasc.biopet.utils.config.Configurable
@@ -31,9 +29,10 @@ class Pindel(val root: Configurable) extends SvCaller {
   def this() = this(null)
 
   /** Default pipeline config */
-  override def defaults = Map("pindelvcf" -> Map(
-    "rdate" -> new SimpleDateFormat("yyyyMMdd").format(Calendar.getInstance().getTime())
-  ))
+  override def defaults = Map(
+    "pindelvcf" -> Map(
+      "rdate" -> new SimpleDateFormat("yyyyMMdd").format(Calendar.getInstance().getTime())
+    ))
 
   def biopetScript() {
     for ((sample, bamFile) <- inputBams) {
@@ -44,8 +43,6 @@ class Pindel(val root: Configurable) extends SvCaller {
       cfg.input = bamFile
 
       val insertSize: Int = BamUtils.sampleBamInsertSize(bamFile)
-
-      // FIXME: get the real insert size of the bam (from bammetrics?)
       cfg.insertsize = insertSize
       cfg.sampleName = sample
       cfg.output = config_file
@@ -67,14 +64,4 @@ class Pindel(val root: Configurable) extends SvCaller {
     }
 
   }
-}
-
-object Pindel extends PipelineCommand {
-  def apply(root: Configurable, input: File, reference: File, runDir: String): Pindel = {
-    val pindel = new Pindel(root)
-    // run the following for activating the pipeline steps
-    pindel.init()
-    pindel.biopetScript()
-    pindel
-  }
 }
\ No newline at end of file
-- 
GitLab