diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala index 426dd52dd4646482fb813f3dc892401f6b0874f3..2e1c3798500af977ecea1da2438fd031a735f548 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala @@ -59,19 +59,19 @@ class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction wit @Output(doc = "Output file of pindel, pointing to the DEL file") var outputFile: File = _ - @Output(doc="", required=false) + @Output(doc = "", required = false) var outputINV: File = _ - @Output(doc="", required=false) + @Output(doc = "", required = false) var outputTD: File = _ - @Output(doc="", required=false) + @Output(doc = "", required = false) var outputLI: File = _ - @Output(doc="", required=false) + @Output(doc = "", required = false) var outputBP: File = _ - @Output(doc="", required=false) + @Output(doc = "", required = false) var outputSI: File = _ - @Output(doc="", required=false) + @Output(doc = "", required = false) var outputRP: File = _ - @Output(doc="", required=false) + @Output(doc = "", required = false) var outputCloseEndMapped: File = _ var RP: Option[Int] = config("RP") @@ -148,7 +148,7 @@ class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction wit if (reportLongInsertions) { outputLI = new File(outputPrefix + File.separator, "sample_LI") } - if (reportBreakpoints){ + if (reportBreakpoints) { outputBP = new File(outputPrefix + File.separator, "sample_BP") } outputSI = new File(outputPrefix + File.separator, "sample_SI") @@ -215,6 +215,7 @@ object PindelCaller { val caller = new PindelCaller(root) caller.configFile = Some(configFile) caller.outputPrefix = outputDir + caller.beforeGraph caller } } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelVCF.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelVCF.scala index 6d69531e8efeeb0e2e132d9e84874bfb32f16948..7dc6ef1545254ae0015ad802c311508052157c32 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelVCF.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelVCF.scala @@ -32,9 +32,10 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R override def beforeGraph: Unit = { if (reference == null) reference = referenceFasta() + println(pindelOutputInputHolder) } - @Input + @Input(doc = "Make this file a dependency before pindel2vcf can run. Usually a file generated by Pindel such as a _D file") var pindelOutputInputHolder: File = _ var pindelOutput: Option[File] = config("pindel_output") @@ -66,6 +67,7 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R required("--reference", reference) + required("--reference_name", referenceSpecies) + required("--reference_date", rDate) + + required("--fake_biopet_input_holder", pindelOutputInputHolder) + optional("--pindel_output", pindelOutput) + optional("--pindel_output_root", pindelOutputRoot) + required("--vcf", outputVCF) + diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala index 18ba1bdbd6cfffecbc2d6dca13f2230db006b364..7b7e2ee02b6d978d635c2d8987a4d4dc36d38990 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala @@ -41,13 +41,15 @@ object BamUtils { * @param end postion to stop scanning * @return Int with insertsize for this contig */ - def contigInsertSize(inputBam: File, contig: String, end: Int): Option[Int] = { + def contigInsertSize(inputBam: File, contig: String, start: Int, end: Int, samplingSize: Int = 100000): Option[Int] = { val inputSam: SamReader = SamReaderFactory.makeDefault.open(inputBam) - val samIterator = inputSam.query(contig, 1, end, true) + val samIterator = inputSam.query(contig, start, end, true) val insertsizes: List[Int] = (for { read <- samIterator.toStream.takeWhile(rec => { - rec.getReadPairedFlag && ((rec.getReadUnmappedFlag == false) && (rec.getMateUnmappedFlag == false) && rec.getProperPairFlag) - }).take(100000) + val paired = rec.getReadPairedFlag && rec.getProperPairFlag + val bothMapped = (rec.getReadUnmappedFlag == false) && (rec.getMateUnmappedFlag == false) + paired && bothMapped + }).take(samplingSize) } yield { read.getInferredInsertSize.asInstanceOf[Int].abs })(collection.breakOut) @@ -60,19 +62,30 @@ object BamUtils { } /** - * Estimate the insertsize for each bam file and return Map[<sampleName>, <insertSize>] + * Estimate the insertsize for one single bamfile and return the insertsize * - * @param bamFiles input bam files + * @param bamFile bamfile to estimate avg insertsize from * @return */ - def sampleBamInsertSize(bamFiles: List[File]): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile => + def sampleBamInsertSize(bamFile: File): Int = { val inputSam: SamReader = SamReaderFactory.makeDefault.open(bamFile) val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.par.map({ - contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, contig.getSequenceLength) + contig => BamUtils.contigInsertSize(bamFile, contig.getSequenceName, 1, contig.getSequenceLength) }).toList - val sum = baminsertsizes.flatMap(x => x).reduceLeft(_ + _) - val n = baminsertsizes.flatMap(x => x).size - bamFile -> (sum / n).toInt + val counts = baminsertsizes.flatMap(x => x) + val sum = counts.reduceLeft(_ + _) + val n = counts.size + sum / n + } + + /** + * Estimate the insertsize for each bam file and return Map[<sampleBamFile>, <insertSize>] + * + * @param bamFiles input bam files + * @return + */ + def sampleBamInsertSize(bamFiles: List[File]): immutable.ParMap[File, Int] = bamFiles.par.map { bamFile => + bamFile -> sampleBamInsertSize(bamFile) }.toMap } diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala index a318c5eb9d0f44b4cc54f4722e5c8e398f0aa0f5..a452c0feee4b14b9f3f57fec403b4e006f05727b 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala @@ -21,6 +21,7 @@ import java.util.Calendar import nl.lumc.sasc.biopet.core.PipelineCommand import nl.lumc.sasc.biopet.extensions.pindel._ +import nl.lumc.sasc.biopet.utils.BamUtils import nl.lumc.sasc.biopet.utils.config.Configurable /// Pindel is actually a mini pipeline executing binaries from the pindel package @@ -42,8 +43,10 @@ class Pindel(val root: Configurable) extends SvCaller { val cfg = new PindelConfig(this) cfg.input = bamFile + val insertSize: Int = BamUtils.sampleBamInsertSize(bamFile) + // FIXME: get the real insert size of the bam (from bammetrics?) - cfg.insertsize = 500 + cfg.insertsize = insertSize cfg.sampleName = sample cfg.output = config_file add(cfg)