Skip to content
Snippets Groups Projects
Commit 314d4942 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Some fixes with boolean in the pindelvcf wrapper

parent 5827c070
No related branches found
No related tags found
No related merge requests found
...@@ -7,8 +7,8 @@ import nl.lumc.sasc.biopet.utils.config.Configurable ...@@ -7,8 +7,8 @@ import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input } import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
/** /**
* Created by wyleung on 20-1-16. * Created by wyleung on 20-1-16.
*/ */
class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version { class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
executable = config("exe", default = "pindel2vcf") executable = config("exe", default = "pindel2vcf")
...@@ -20,15 +20,15 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R ...@@ -20,15 +20,15 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R
def versionCommand = executable + " -h" def versionCommand = executable + " -h"
/** /**
* Required parameters * Required parameters
*/ */
@Input @Input
var reference: File = referenceFasta var reference: File = referenceFasta
@Output @Output
var outputVCF: File = _ var outputVCF: File = _
var referenceDate: String = config("reference_date") var referenceDate: String = config("reference_date", freeVar = false)
override def beforeGraph: Unit = { override def beforeGraph: Unit = {
if (reference == null) reference = referenceFasta() if (reference == null) reference = referenceFasta()
...@@ -54,10 +54,10 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R ...@@ -54,10 +54,10 @@ class PindelVCF(val root: Configurable) extends BiopetCommandLineFunction with R
var maxInternalRepeatLength: Option[Int] = config("max_internal_repeatlength") var maxInternalRepeatLength: Option[Int] = config("max_internal_repeatlength")
var maxPostindelRepeats: Option[Int] = config("max_postindel_repeat") var maxPostindelRepeats: Option[Int] = config("max_postindel_repeat")
var maxPostindelRepeatLength: Option[Int] = config("max_postindel_repeatlength") var maxPostindelRepeatLength: Option[Int] = config("max_postindel_repeatlength")
var onlyBalancedSamples: Boolean = config("only_balanced_samples") var onlyBalancedSamples: Boolean = config("only_balanced_samples", default = false)
var somaticP: Boolean = config("somatic_p") var somaticP: Boolean = config("somatic_p", default = false)
var minimumStrandSupport: Option[Int] = config("minimum_strand_support") var minimumStrandSupport: Option[Int] = config("minimum_strand_support")
var gatkCompatible: Boolean = config("gatk_compatible") var gatkCompatible: Boolean = config("gatk_compatible", default = false)
def cmdLine = required(executable) + def cmdLine = required(executable) +
required("--reference_name", referenceSpecies) + required("--reference_name", referenceSpecies) +
......
...@@ -2,7 +2,7 @@ package nl.lumc.sasc.biopet.utils ...@@ -2,7 +2,7 @@ package nl.lumc.sasc.biopet.utils
import java.io.File import java.io.File
import htsjdk.samtools.{SamReader, SamReaderFactory} import htsjdk.samtools.{ SAMSequenceRecord, SamReader, SamReaderFactory }
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
...@@ -31,32 +31,37 @@ object BamUtils { ...@@ -31,32 +31,37 @@ object BamUtils {
temp.toMap temp.toMap
} }
/** def contigInsertSize(inputSam: SamReader, contig: SAMSequenceRecord): Int = {
* Estimate the insertsize for each bam file and return Map[<sampleName>, <insertSize>]
*
* @param bamFiles input bam files
* @return
*/
def sampleBamInsertSize(bamFiles: List[File]): Map[File, Float] = bamFiles.map { file =>
val inputSam: SamReader = SamReaderFactory.makeDefault.open(file) val insertsizes: Iterator[Int] = for {
read <- inputSam.query(contig.getSequenceName, 1, contig.getSequenceLength, true) //.toStream.slice(0, 100).toList
insertsize = read.getInferredInsertSize
paired = read.getReadPairedFlag
bothMapped = (read.getReadUnmappedFlag == false) && (read.getMateUnmappedFlag == false)
if paired && bothMapped
} yield {
insertsize
}
val contigInsertSize = insertsizes.foldLeft((0.0, 0))((t, r) => (t._1 + r, t._2 + 1))
(contigInsertSize._1 / contigInsertSize._2).toInt
}
/**
* Estimate the insertsize for each bam file and return Map[<sampleName>, <insertSize>]
*
* @param bamFiles input bam files
* @return
*/
def sampleBamInsertSize(bamFiles: List[File]): Map[File, Int] = bamFiles.map { file =>
val inputSam: SamReader = SamReaderFactory.makeDefault.open(file)
val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.map { val baminsertsizes = inputSam.getFileHeader.getSequenceDictionary.getSequences.map {
contig => contig =>
val insertsizes: Iterator[Int] = for { val insertSize = BamUtils.contigInsertSize(inputSam, contig)
read <- inputSam.query( contig.getSequenceName, 1, contig.getSequenceLength, true) //.toStream.slice(0, 100).toList
insertsize = read.getInferredInsertSize Logging.logger.debug(s"Insertsize ${contig}: ${insertSize}")
paired = read.getReadPairedFlag insertSize
bothMapped = (read.getReadUnmappedFlag == false) && (read.getMateUnmappedFlag == false) }
if paired && bothMapped file -> (baminsertsizes.sum / baminsertsizes.size)
} yield { }.toMap
insertsize
}
val contigInsertSize = insertsizes.foldLeft((0.0,0))((t, r) => (t._1 + r, t._2 +1))
contigInsertSize._1 / contigInsertSize._2
}.foldLeft((0.0,0))((t, r) => (t._1 + r, t._2 +1))
file -> baminsertsizes._1 / baminsertsizes._2
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment