Skip to content
Snippets Groups Projects
Commit 6a1866c0 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Insertsize estimator

parent eebab126
No related branches found
No related tags found
No related merge requests found
...@@ -2,9 +2,10 @@ package nl.lumc.sasc.biopet.utils ...@@ -2,9 +2,10 @@ package nl.lumc.sasc.biopet.utils
import java.io.File import java.io.File
import htsjdk.samtools.{ SAMSequenceRecord, SamReader, SamReaderFactory } import htsjdk.samtools.{ SamReader, SamReaderFactory }
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
import scala.collection.parallel.immutable
/** /**
* Created by pjvan_thof on 11/19/15. * Created by pjvan_thof on 11/19/15.
...@@ -31,19 +32,31 @@ object BamUtils { ...@@ -31,19 +32,31 @@ object BamUtils {
temp.toMap temp.toMap
} }
def contigInsertSize(inputSam: SamReader, contig: SAMSequenceRecord): Int = { /**
* Estimate the insertsize of fragments within the given contig.
val insertsizes: Iterator[Int] = for { * Uses the properly paired reads according to flags set by the aligner
read <- inputSam.query(contig.getSequenceName, 1, contig.getSequenceLength, true) //.toStream.slice(0, 100).toList *
insertsize = read.getInferredInsertSize * @param inputBam input bam file
paired = read.getReadPairedFlag * @param contig contig to scan for
bothMapped = (read.getReadUnmappedFlag == false) && (read.getMateUnmappedFlag == false) * @param end postion to stop scanning
if paired && bothMapped * @return Int with insertsize for this contig
*/
def contigInsertSize(inputBam: File, contig: String, end: Int): Option[Int] = {
val inputSam: SamReader = SamReaderFactory.makeDefault.open(inputBam)
val samIterator = inputSam.query(contig, 1, end, true)
val insertsizes: List[Int] = (for {
read <- samIterator.toStream.takeWhile(rec => {
rec.getReadPairedFlag && ((rec.getReadUnmappedFlag == false) && (rec.getMateUnmappedFlag == false) && rec.getProperPairFlag)
}).take(100000)
} yield { } yield {
insertsize read.getInferredInsertSize.asInstanceOf[Int].abs
} })(collection.breakOut)
val contigInsertSize = insertsizes.foldLeft((0.0, 0))((t, r) => (t._1 + r, t._2 + 1)) val cti = insertsizes.foldLeft((0.0, 0))((t, r) => (t._1 + r, t._2 + 1))
(contigInsertSize._1 / contigInsertSize._2).toInt
samIterator.close()
inputSam.close()
val ret = if (cti._2 == 0) None else Some((cti._1 / cti._2).toInt)
ret
} }
/** /**
...@@ -52,16 +65,14 @@ object BamUtils { ...@@ -52,16 +65,14 @@ object BamUtils {
* @param bamFiles input bam files * @param bamFiles input bam files
* @return * @return
*/ */
def sampleBamInsertSize(bamFiles: List[File]): Map[File, Int] = bamFiles.map { file => def sampleBamInsertSize(bamFiles: List[File]): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile =>
val inputSam: SamReader = SamReaderFactory.makeDefault.open(file) val inputSam: SamReader = SamReaderFactory.makeDefault.open(bamFile)
val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.map { val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.par.map({
contig => contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, contig.getSequenceLength)
val insertSize = BamUtils.contigInsertSize(inputSam, contig) }).toList
val sum = baminsertsizes.flatMap(x => x).reduceLeft(_ + _)
Logging.logger.debug(s"Insertsize ${contig}: ${insertSize}") val n = baminsertsizes.flatMap(x => x).size
insertSize bamFile -> (sum / n).toInt
}
file -> (baminsertsizes.sum / baminsertsizes.size)
}.toMap }.toMap
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment