Skip to content
Snippets Groups Projects
Commit 75b7230c authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Added insert size estimation. Fix pindel2vcf dependency (depend on pindel, use...

Added insert size estimation. Fix pindel2vcf dependency (depend on pindel, use beforegraph to set outputpath)
parent 6a1866c0
No related branches found
No related tags found
No related merge requests found
...@@ -59,19 +59,19 @@ class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction wit ...@@ -59,19 +59,19 @@ class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction wit
@Output(doc = "Output file of pindel, pointing to the DEL file") @Output(doc = "Output file of pindel, pointing to the DEL file")
var outputFile: File = _ var outputFile: File = _
@Output(doc="", required=false) @Output(doc = "", required = false)
var outputINV: File = _ var outputINV: File = _
@Output(doc="", required=false) @Output(doc = "", required = false)
var outputTD: File = _ var outputTD: File = _
@Output(doc="", required=false) @Output(doc = "", required = false)
var outputLI: File = _ var outputLI: File = _
@Output(doc="", required=false) @Output(doc = "", required = false)
var outputBP: File = _ var outputBP: File = _
@Output(doc="", required=false) @Output(doc = "", required = false)
var outputSI: File = _ var outputSI: File = _
@Output(doc="", required=false) @Output(doc = "", required = false)
var outputRP: File = _ var outputRP: File = _
@Output(doc="", required=false) @Output(doc = "", required = false)
var outputCloseEndMapped: File = _ var outputCloseEndMapped: File = _
var RP: Option[Int] = config("RP") var RP: Option[Int] = config("RP")
...@@ -148,7 +148,7 @@ class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction wit ...@@ -148,7 +148,7 @@ class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction wit
if (reportLongInsertions) { if (reportLongInsertions) {
outputLI = new File(outputPrefix + File.separator, "sample_LI") outputLI = new File(outputPrefix + File.separator, "sample_LI")
} }
if (reportBreakpoints){ if (reportBreakpoints) {
outputBP = new File(outputPrefix + File.separator, "sample_BP") outputBP = new File(outputPrefix + File.separator, "sample_BP")
} }
outputSI = new File(outputPrefix + File.separator, "sample_SI") outputSI = new File(outputPrefix + File.separator, "sample_SI")
...@@ -215,6 +215,7 @@ object PindelCaller { ...@@ -215,6 +215,7 @@ object PindelCaller {
val caller = new PindelCaller(root) val caller = new PindelCaller(root)
caller.configFile = Some(configFile) caller.configFile = Some(configFile)
caller.outputPrefix = outputDir caller.outputPrefix = outputDir
caller.beforeGraph
caller caller
} }
} }
...@@ -32,9 +32,10 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R ...@@ -32,9 +32,10 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R
override def beforeGraph: Unit = { override def beforeGraph: Unit = {
if (reference == null) reference = referenceFasta() if (reference == null) reference = referenceFasta()
println(pindelOutputInputHolder)
} }
@Input @Input(doc = "Make this file a dependency before pindel2vcf can run. Usually a file generated by Pindel such as a _D file")
var pindelOutputInputHolder: File = _ var pindelOutputInputHolder: File = _
var pindelOutput: Option[File] = config("pindel_output") var pindelOutput: Option[File] = config("pindel_output")
...@@ -66,6 +67,7 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R ...@@ -66,6 +67,7 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R
required("--reference", reference) + required("--reference", reference) +
required("--reference_name", referenceSpecies) + required("--reference_name", referenceSpecies) +
required("--reference_date", rDate) + required("--reference_date", rDate) +
required("--fake_biopet_input_holder", pindelOutputInputHolder) +
optional("--pindel_output", pindelOutput) + optional("--pindel_output", pindelOutput) +
optional("--pindel_output_root", pindelOutputRoot) + optional("--pindel_output_root", pindelOutputRoot) +
required("--vcf", outputVCF) + required("--vcf", outputVCF) +
......
...@@ -41,13 +41,15 @@ object BamUtils { ...@@ -41,13 +41,15 @@ object BamUtils {
* @param end postion to stop scanning * @param end postion to stop scanning
* @return Int with insertsize for this contig * @return Int with insertsize for this contig
*/ */
def contigInsertSize(inputBam: File, contig: String, end: Int): Option[Int] = { def contigInsertSize(inputBam: File, contig: String, start: Int, end: Int, samplingSize: Int = 100000): Option[Int] = {
val inputSam: SamReader = SamReaderFactory.makeDefault.open(inputBam) val inputSam: SamReader = SamReaderFactory.makeDefault.open(inputBam)
val samIterator = inputSam.query(contig, 1, end, true) val samIterator = inputSam.query(contig, start, end, true)
val insertsizes: List[Int] = (for { val insertsizes: List[Int] = (for {
read <- samIterator.toStream.takeWhile(rec => { read <- samIterator.toStream.takeWhile(rec => {
rec.getReadPairedFlag && ((rec.getReadUnmappedFlag == false) && (rec.getMateUnmappedFlag == false) && rec.getProperPairFlag) val paired = rec.getReadPairedFlag && rec.getProperPairFlag
}).take(100000) val bothMapped = (rec.getReadUnmappedFlag == false) && (rec.getMateUnmappedFlag == false)
paired && bothMapped
}).take(samplingSize)
} yield { } yield {
read.getInferredInsertSize.asInstanceOf[Int].abs read.getInferredInsertSize.asInstanceOf[Int].abs
})(collection.breakOut) })(collection.breakOut)
...@@ -60,19 +62,30 @@ object BamUtils { ...@@ -60,19 +62,30 @@ object BamUtils {
} }
/** /**
* Estimate the insertsize for each bam file and return Map[<sampleName>, <insertSize>] * Estimate the insertsize for one single bamfile and return the insertsize
* *
* @param bamFiles input bam files * @param bamFile bamfile to estimate avg insertsize from
* @return * @return
*/ */
def sampleBamInsertSize(bamFiles: List[File]): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile => def sampleBamInsertSize(bamFile: File): Int = {
val inputSam: SamReader = SamReaderFactory.makeDefault.open(bamFile) val inputSam: SamReader = SamReaderFactory.makeDefault.open(bamFile)
val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.par.map({ val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.par.map({
contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, contig.getSequenceLength) contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, 1, contig.getSequenceLength)
}).toList }).toList
val sum = baminsertsizes.flatMap(x => x).reduceLeft(_ + _) val counts = baminsertsizes.flatMap(x => x)
val n = baminsertsizes.flatMap(x => x).size val sum = counts.reduceLeft(_ + _)
bamFile -> (sum / n).toInt val n = counts.size
sum / n
}
/**
* Estimate the insertsize for each bam file and return Map[<sampleBamFile>, <insertSize>]
*
* @param bamFiles input bam files
* @return
*/
def sampleBamInsertSize(bamFiles: List[File]): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile =>
bamFile -> sampleBamInsertSize(bamFile)
}.toMap }.toMap
} }
...@@ -21,6 +21,7 @@ import java.util.Calendar ...@@ -21,6 +21,7 @@ import java.util.Calendar
import nl.lumc.sasc.biopet.core.PipelineCommand import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.extensions.pindel._ import nl.lumc.sasc.biopet.extensions.pindel._
import nl.lumc.sasc.biopet.utils.BamUtils
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
/// Pindel is actually a mini pipeline executing binaries from the pindel package /// Pindel is actually a mini pipeline executing binaries from the pindel package
...@@ -42,8 +43,10 @@ class Pindel(val root: Configurable) extends SvCaller { ...@@ -42,8 +43,10 @@ class Pindel(val root: Configurable) extends SvCaller {
val cfg = new PindelConfig(this) val cfg = new PindelConfig(this)
cfg.input = bamFile cfg.input = bamFile
val insertSize: Int = BamUtils.sampleBamInsertSize(bamFile)
// FIXME: get the real insert size of the bam (from bammetrics?) // FIXME: get the real insert size of the bam (from bammetrics?)
cfg.insertsize = 500 cfg.insertsize = insertSize
cfg.sampleName = sample cfg.sampleName = sample
cfg.output = config_file cfg.output = config_file
add(cfg) add(cfg)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment