diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala index 7b7e2ee02b6d978d635c2d8987a4d4dc36d38990..56e42f1ceab428e9f4b31fead30042d21b34d58f 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala @@ -67,10 +67,10 @@ object BamUtils { * @param bamFile bamfile to estimate avg insertsize from * @return */ - def sampleBamInsertSize(bamFile: File): Int = { + def sampleBamInsertSize(bamFile: File, samplingSize: Int = 100000): Int = { val inputSam: SamReader = SamReaderFactory.makeDefault.open(bamFile) val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.par.map({ - contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, 1, contig.getSequenceLength) + contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, 1, contig.getSequenceLength, samplingSize) }).toList val counts = baminsertsizes.flatMap(x => x) val sum = counts.reduceLeft(_ + _) @@ -84,8 +84,8 @@ object BamUtils { * @param bamFiles input bam files * @return */ - def sampleBamInsertSize(bamFiles: List[File]): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile => - bamFile -> sampleBamInsertSize(bamFile) + def sampleBamsInsertSize(bamFiles: List[File], samplingSize: Int = 100000): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile => + bamFile -> sampleBamInsertSize(bamFile, samplingSize) }.toMap } diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala index a452c0feee4b14b9f3f57fec403b4e006f05727b..44393b3fb54a8803fe9f3f6773a32a3a1c42ba5b 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala @@ -15,11 +15,9 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva.svcallers -import java.io.File import java.text.SimpleDateFormat import java.util.Calendar -import nl.lumc.sasc.biopet.core.PipelineCommand import nl.lumc.sasc.biopet.extensions.pindel._ import nl.lumc.sasc.biopet.utils.BamUtils import nl.lumc.sasc.biopet.utils.config.Configurable @@ -31,9 +29,10 @@ class Pindel(val root: Configurable) extends SvCaller { def this() = this(null) /** Default pipeline config */ - override def defaults = Map("pindelvcf" -> Map( - "rdate" -> new SimpleDateFormat("yyyyMMdd").format(Calendar.getInstance().getTime()) - )) + override def defaults = Map( + "pindelvcf" -> Map( + "rdate" -> new SimpleDateFormat("yyyyMMdd").format(Calendar.getInstance().getTime()) + )) def biopetScript() { for ((sample, bamFile) <- inputBams) { @@ -44,8 +43,6 @@ class Pindel(val root: Configurable) extends SvCaller { cfg.input = bamFile val insertSize: Int = BamUtils.sampleBamInsertSize(bamFile) - - // FIXME: get the real insert size of the bam (from bammetrics?) cfg.insertsize = insertSize cfg.sampleName = sample cfg.output = config_file @@ -67,14 +64,4 @@ class Pindel(val root: Configurable) extends SvCaller { } } -} - -object Pindel extends PipelineCommand { - def apply(root: Configurable, input: File, reference: File, runDir: String): Pindel = { - val pindel = new Pindel(root) - // run the following for activating the pipeline steps - pindel.init() - pindel.biopetScript() - pindel - } } \ No newline at end of file