Skip to content
Snippets Groups Projects
Commit 5827c070 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Adding sampleBamInsertSize (with some error, too be fixed later)

parent d4901714
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,7 @@ package nl.lumc.sasc.biopet.utils
import java.io.File
import htsjdk.samtools.SamReaderFactory
import htsjdk.samtools.{SamReader, SamReaderFactory}
import scala.collection.JavaConversions._
......@@ -30,4 +30,33 @@ object BamUtils {
if (temp.map(_._1).distinct.size != temp.size) throw new IllegalArgumentException("Samples has been found twice")
temp.toMap
}
/**
* Estimate the insertsize for each bam file and return Map[<sampleName>, <insertSize>]
*
* @param bamFiles input bam files
* @return
*/
def sampleBamInsertSize(bamFiles: List[File]): Map[File, Float] = bamFiles.map { file =>
val inputSam: SamReader = SamReaderFactory.makeDefault.open(file)
val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.map {
contig =>
val insertsizes: Iterator[Int] = for {
read <- inputSam.query( contig.getSequenceName, 1, contig.getSequenceLength, true) //.toStream.slice(0, 100).toList
insertsize = read.getInferredInsertSize
paired = read.getReadPairedFlag
bothMapped = (read.getReadUnmappedFlag == false) && (read.getMateUnmappedFlag == false)
if paired && bothMapped
} yield {
insertsize
}
val contigInsertSize = insertsizes.foldLeft((0.0,0))((t, r) => (t._1 + r, t._2 +1))
contigInsertSize._1 / contigInsertSize._2
}.foldLeft((0.0,0))((t, r) => (t._1 + r, t._2 +1))
file -> baminsertsizes._1 / baminsertsizes._2
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment