Commit 162d021a authored by Peter van 't Hof's avatar Peter van 't Hof

Make common methods for fraction

parent 1b13fd65
......@@ -17,7 +17,6 @@ package nl.lumc.sasc.biopet.utils.intervals
import java.io.{ File, PrintWriter }
import htsjdk.samtools.SAMSequenceDictionary
import htsjdk.samtools.reference.FastaSequenceFile
import scala.collection.JavaConversions._
import scala.collection.mutable
......@@ -112,6 +111,15 @@ case class BedRecordList(val chrRecords: Map[String, List[BedRecord]], val heade
allRecords.foreach(writer.println)
writer.close()
}
/** This return the fraction of the regions comparing to a length */
def fractionOf(length: Long): Double = length.toDouble / length
/** This return the fraction of the regions comparing to a reference */
def fractionOfReference(dict: SAMSequenceDictionary): Double = fractionOf(dict.getReferenceLength)
/** This return the fraction of the regions comparing to a reference */
def fractionOfReference(file: File): Double = fractionOfReference(FastaUtils.getCachedDict(file))
}
object BedRecordList {
......@@ -131,6 +139,42 @@ object BedRecordList {
def fromList(records: TraversableOnce[BedRecord]): BedRecordList = fromListWithHeader(records, Nil)
/**
* This creates a [[BedRecordList]] based on multiple files
*
* @param bedFiles Input bed files
* @return
*/
def fromFiles(bedFiles: File*) = {
fromFiles(bedFiles, combine = false)
}
/**
* This creates a [[BedRecordList]] based on multiple files. This method combines overlapping regions
*
* @param bedFiles Input bed files
* @return
*/
def fromFilesCombine(bedFiles: File*) = {
fromFiles(bedFiles, combine = true)
}
/**
* This creates a [[BedRecordList]] based on multiple files
*
* @param bedFiles Input bed files
* @param combine When true overlaping regions are merged
* @return
*/
def fromFiles(bedFiles: Seq[File], combine: Boolean = false) = {
val list = bedFiles.foldLeft(empty)((a,b) => fromList(fromFile(b).allRecords ++ a.allRecords))
if (combine) list.combineOverlap
else list
}
/** This created a empty [[BedRecordList]] */
def empty = fromList(Nil)
def fromFile(bedFile: File) = {
val reader = Source.fromFile(bedFile)
val all = reader.getLines().toList
......
......@@ -43,28 +43,9 @@ class HaplotypeCallerGvcf(val parent: Configurable) extends Variantcaller {
val haploidRegionsMale: Option[File] = config("haploid_regions_male")
val haploidRegionsFemale: Option[File] = config("haploid_regions_female")
lazy val referenceSize = referenceDict.getReferenceLength
lazy val fractionMale: Double = calculateFraction(haploidRegions, haploidRegionsMale)
lazy val fractionFemale: Double = calculateFraction(haploidRegions, haploidRegionsFemale)
lazy val fractionUnknown: Double = calculateFraction(haploidRegions, None)
/**
* This method will calculate the fraction of the given bed files of the used reference
*
* @param file1 Bed file
* @param file2 Bed file
* @return Fraction
*/
def calculateFraction(file1: Option[File], file2: Option[File]): Double = {
(file1.map(BedRecordList.fromFile(_)), file2.map(BedRecordList.fromFile(_))) match {
case (Some(l), None) => l.length.toDouble / referenceSize
case (None, Some(l)) => l.length.toDouble / referenceSize
case (Some(l1), Some(l2)) =>
BedRecordList.fromList(l1.allRecords ++ l2.allRecords).combineOverlap.length.toDouble / referenceSize
case (None, None) => 0.0
}
}
lazy val fractionMale: Double = BedRecordList.fromFiles(Seq(haploidRegions, haploidRegionsMale).flatten, true).fractionOfReference(referenceDict)
lazy val fractionFemale: Double = BedRecordList.fromFiles(Seq(haploidRegions, haploidRegionsFemale).flatten, true).fractionOfReference(referenceDict)
lazy val fractionUnknown: Double = BedRecordList.fromFiles(Seq(haploidRegions).flatten, true).fractionOfReference(referenceDict)
override def fixedValues = Map("haplotypecaller" -> Map("emitRefConfidence" -> "GVCF"))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment