Commit 5164aa32 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Adding samplingSize to BamUtils - estimateInsertSize

parent 75b7230c
......@@ -67,10 +67,10 @@ object BamUtils {
* @param bamFile bamfile to estimate avg insertsize from
* @return
*/
def sampleBamInsertSize(bamFile: File): Int = {
def sampleBamInsertSize(bamFile: File, samplingSize: Int = 100000): Int = {
val inputSam: SamReader = SamReaderFactory.makeDefault.open(bamFile)
val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.par.map({
contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, 1, contig.getSequenceLength)
contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, 1, contig.getSequenceLength, samplingSize)
}).toList
val counts = baminsertsizes.flatMap(x => x)
val sum = counts.reduceLeft(_ + _)
......@@ -84,8 +84,8 @@ object BamUtils {
* @param bamFiles input bam files
* @return
*/
def sampleBamInsertSize(bamFiles: List[File]): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile =>
bamFile -> sampleBamInsertSize(bamFile)
def sampleBamsInsertSize(bamFiles: List[File], samplingSize: Int = 100000): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile =>
bamFile -> sampleBamInsertSize(bamFile, samplingSize)
}.toMap
}
......@@ -15,11 +15,9 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva.svcallers
import java.io.File
import java.text.SimpleDateFormat
import java.util.Calendar
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.extensions.pindel._
import nl.lumc.sasc.biopet.utils.BamUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
......@@ -31,9 +29,10 @@ class Pindel(val root: Configurable) extends SvCaller {
def this() = this(null)
/** Default pipeline config */
override def defaults = Map("pindelvcf" -> Map(
"rdate" -> new SimpleDateFormat("yyyyMMdd").format(Calendar.getInstance().getTime())
))
override def defaults = Map(
"pindelvcf" -> Map(
"rdate" -> new SimpleDateFormat("yyyyMMdd").format(Calendar.getInstance().getTime())
))
def biopetScript() {
for ((sample, bamFile) <- inputBams) {
......@@ -44,8 +43,6 @@ class Pindel(val root: Configurable) extends SvCaller {
cfg.input = bamFile
val insertSize: Int = BamUtils.sampleBamInsertSize(bamFile)
// FIXME: get the real insert size of the bam (from bammetrics?)
cfg.insertsize = insertSize
cfg.sampleName = sample
cfg.output = config_file
......@@ -67,14 +64,4 @@ class Pindel(val root: Configurable) extends SvCaller {
}
}
}
object Pindel extends PipelineCommand {
def apply(root: Configurable, input: File, reference: File, runDir: String): Pindel = {
val pindel = new Pindel(root)
// run the following for activating the pipeline steps
pindel.init()
pindel.biopetScript()
pindel
}
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment