Commit 75b7230c authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Added insert size estimation. Fix pindel2vcf dependency (depend on pindel, use...

Added insert size estimation. Fix pindel2vcf dependency (depend on pindel, use beforegraph to set outputpath)
parent 6a1866c0
......@@ -59,19 +59,19 @@ class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction wit
@Output(doc = "Output file of pindel, pointing to the DEL file")
var outputFile: File = _
@Output(doc="", required=false)
@Output(doc = "", required = false)
var outputINV: File = _
@Output(doc="", required=false)
@Output(doc = "", required = false)
var outputTD: File = _
@Output(doc="", required=false)
@Output(doc = "", required = false)
var outputLI: File = _
@Output(doc="", required=false)
@Output(doc = "", required = false)
var outputBP: File = _
@Output(doc="", required=false)
@Output(doc = "", required = false)
var outputSI: File = _
@Output(doc="", required=false)
@Output(doc = "", required = false)
var outputRP: File = _
@Output(doc="", required=false)
@Output(doc = "", required = false)
var outputCloseEndMapped: File = _
var RP: Option[Int] = config("RP")
......@@ -148,7 +148,7 @@ class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction wit
if (reportLongInsertions) {
outputLI = new File(outputPrefix + File.separator, "sample_LI")
}
if (reportBreakpoints){
if (reportBreakpoints) {
outputBP = new File(outputPrefix + File.separator, "sample_BP")
}
outputSI = new File(outputPrefix + File.separator, "sample_SI")
......@@ -215,6 +215,7 @@ object PindelCaller {
val caller = new PindelCaller(root)
caller.configFile = Some(configFile)
caller.outputPrefix = outputDir
caller.beforeGraph
caller
}
}
......@@ -32,9 +32,10 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R
override def beforeGraph: Unit = {
if (reference == null) reference = referenceFasta()
println(pindelOutputInputHolder)
}
@Input
@Input(doc = "Make this file a dependency before pindel2vcf can run. Usually a file generated by Pindel such as a _D file")
var pindelOutputInputHolder: File = _
var pindelOutput: Option[File] = config("pindel_output")
......@@ -66,6 +67,7 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R
required("--reference", reference) +
required("--reference_name", referenceSpecies) +
required("--reference_date", rDate) +
required("--fake_biopet_input_holder", pindelOutputInputHolder) +
optional("--pindel_output", pindelOutput) +
optional("--pindel_output_root", pindelOutputRoot) +
required("--vcf", outputVCF) +
......
......@@ -41,13 +41,15 @@ object BamUtils {
* @param end postion to stop scanning
* @return Int with insertsize for this contig
*/
def contigInsertSize(inputBam: File, contig: String, end: Int): Option[Int] = {
def contigInsertSize(inputBam: File, contig: String, start: Int, end: Int, samplingSize: Int = 100000): Option[Int] = {
val inputSam: SamReader = SamReaderFactory.makeDefault.open(inputBam)
val samIterator = inputSam.query(contig, 1, end, true)
val samIterator = inputSam.query(contig, start, end, true)
val insertsizes: List[Int] = (for {
read <- samIterator.toStream.takeWhile(rec => {
rec.getReadPairedFlag && ((rec.getReadUnmappedFlag == false) && (rec.getMateUnmappedFlag == false) && rec.getProperPairFlag)
}).take(100000)
val paired = rec.getReadPairedFlag && rec.getProperPairFlag
val bothMapped = (rec.getReadUnmappedFlag == false) && (rec.getMateUnmappedFlag == false)
paired && bothMapped
}).take(samplingSize)
} yield {
read.getInferredInsertSize.asInstanceOf[Int].abs
})(collection.breakOut)
......@@ -60,19 +62,30 @@ object BamUtils {
}
/**
* Estimate the insertsize for each bam file and return Map[<sampleName>, <insertSize>]
* Estimate the insertsize for one single bamfile and return the insertsize
*
* @param bamFiles input bam files
* @param bamFile bamfile to estimate avg insertsize from
* @return
*/
def sampleBamInsertSize(bamFiles: List[File]): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile =>
def sampleBamInsertSize(bamFile: File): Int = {
val inputSam: SamReader = SamReaderFactory.makeDefault.open(bamFile)
val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.par.map({
contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, contig.getSequenceLength)
contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, 1, contig.getSequenceLength)
}).toList
val sum = baminsertsizes.flatMap(x => x).reduceLeft(_ + _)
val n = baminsertsizes.flatMap(x => x).size
bamFile -> (sum / n).toInt
val counts = baminsertsizes.flatMap(x => x)
val sum = counts.reduceLeft(_ + _)
val n = counts.size
sum / n
}
/**
* Estimate the insertsize for each bam file and return Map[<sampleBamFile>, <insertSize>]
*
* @param bamFiles input bam files
* @return
*/
def sampleBamInsertSize(bamFiles: List[File]): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile =>
bamFile -> sampleBamInsertSize(bamFile)
}.toMap
}
......@@ -21,6 +21,7 @@ import java.util.Calendar
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.extensions.pindel._
import nl.lumc.sasc.biopet.utils.BamUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
/// Pindel is actually a mini pipeline executing binaries from the pindel package
......@@ -42,8 +43,10 @@ class Pindel(val root: Configurable) extends SvCaller {
val cfg = new PindelConfig(this)
cfg.input = bamFile
val insertSize: Int = BamUtils.sampleBamInsertSize(bamFile)
// FIXME: get the real insert size of the bam (from bammetrics?)
cfg.insertsize = 500
cfg.insertsize = insertSize
cfg.sampleName = sample
cfg.output = config_file
add(cfg)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment