diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelVCF.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelVCF.scala index bcf16c8f19de1f19873620b3bd4e162758bf8ffc..8b257bd4e21369fd86130ff97444fa0ba27d35fe 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelVCF.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelVCF.scala @@ -7,8 +7,8 @@ import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Output, Input } /** - * Created by wyleung on 20-1-16. - */ + * Created by wyleung on 20-1-16. + */ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version { executable = config("exe", default = "pindel2vcf") @@ -20,15 +20,15 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R def versionCommand = executable + " -h" /** - * Required parameters - */ + * Required parameters + */ @Input var reference: File = referenceFasta @Output var outputVCF: File = _ - var referenceDate: String = config("reference_date") + var referenceDate: String = config("reference_date", freeVar = false) override def beforeGraph: Unit = { if (reference == null) reference = referenceFasta() @@ -54,10 +54,10 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R var maxInternalRepeatLength: Option[Int] = config("max_internal_repeatlength") var maxPostindelRepeats: Option[Int] = config("max_postindel_repeat") var maxPostindelRepeatLength: Option[Int] = config("max_postindel_repeatlength") - var onlyBalancedSamples: Boolean = config("only_balanced_samples") - var somaticP: Boolean = config("somatic_p") + var onlyBalancedSamples: Boolean = config("only_balanced_samples", default = false) + var somaticP: Boolean = config("somatic_p", default = false) var minimumStrandSupport: Option[Int] = config("minimum_strand_support") - var gatkCompatible: Boolean = config("gatk_compatible") + var gatkCompatible: Boolean = config("gatk_compatible", default = false) def cmdLine = required(executable) + required("--reference_name", referenceSpecies) + diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala index a37b3d54647a3f8175e1935d0c059cb8c34a5384..1928d84f8999b493c089e6d67f8a2e0f392123f7 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala @@ -2,7 +2,7 @@ package nl.lumc.sasc.biopet.utils import java.io.File -import htsjdk.samtools.{SamReader, SamReaderFactory} +import htsjdk.samtools.{ SAMSequenceRecord, SamReader, SamReaderFactory } import scala.collection.JavaConversions._ @@ -31,32 +31,37 @@ object BamUtils { temp.toMap } - /** - * Estimate the insertsize for each bam file and return Map[<sampleName>, <insertSize>] - * - * @param bamFiles input bam files - * @return - */ - def sampleBamInsertSize(bamFiles: List[File]): Map[File, Float] = bamFiles.map { file => + def contigInsertSize(inputSam: SamReader, contig: SAMSequenceRecord): Int = { - val inputSam: SamReader = SamReaderFactory.makeDefault.open(file) + val insertsizes: Iterator[Int] = for { + read <- inputSam.query(contig.getSequenceName, 1, contig.getSequenceLength, true) //.toStream.slice(0, 100).toList + insertsize = read.getInferredInsertSize + paired = read.getReadPairedFlag + bothMapped = (read.getReadUnmappedFlag == false) && (read.getMateUnmappedFlag == false) + if paired && bothMapped + } yield { + insertsize + } + val contigInsertSize = insertsizes.foldLeft((0.0, 0))((t, r) => (t._1 + r, t._2 + 1)) + (contigInsertSize._1 / contigInsertSize._2).toInt + } + /** + * Estimate the insertsize for each bam file and return Map[<sampleName>, <insertSize>] + * + * @param bamFiles input bam files + * @return + */ + def sampleBamInsertSize(bamFiles: List[File]): Map[File, Int] = bamFiles.map { file => + val inputSam: SamReader = SamReaderFactory.makeDefault.open(file) val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.map { contig => - val insertsizes: Iterator[Int] = for { - read <- inputSam.query( contig.getSequenceName, 1, contig.getSequenceLength, true) //.toStream.slice(0, 100).toList - insertsize = read.getInferredInsertSize - paired = read.getReadPairedFlag - bothMapped = (read.getReadUnmappedFlag == false) && (read.getMateUnmappedFlag == false) - if paired && bothMapped - } yield { - insertsize - } - val contigInsertSize = insertsizes.foldLeft((0.0,0))((t, r) => (t._1 + r, t._2 +1)) - contigInsertSize._1 / contigInsertSize._2 - }.foldLeft((0.0,0))((t, r) => (t._1 + r, t._2 +1)) - - file -> baminsertsizes._1 / baminsertsizes._2 - } + val insertSize = BamUtils.contigInsertSize(inputSam, contig) + + Logging.logger.debug(s"Insertsize ${contig}: ${insertSize}") + insertSize + } + file -> (baminsertsizes.sum / baminsertsizes.size) + }.toMap }