diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala index d9ee8e68a5e9237341a3838718523741b9c90465..e7b87696ba1b3556113ad0a97a34321b8fef08a7 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala @@ -33,7 +33,7 @@ class Pysvtools(val root: Configurable) extends BiopetCommandLineFunction { @Argument(doc = "Set flanking amount") var flanking: Option[Int] = config("flanking") - var exclusionRegions: List[File] = config("exclusion_regions") + var exclusionRegions: List[File] = config("exclusion_regions", default = Nil) var translocationsOnly: Boolean = config("translocations_only", default = false) @Output(doc = "Unzipped file", required = true) diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala index c93eaed3993830d9e1860b827d772b2c925b6983..91761c1965510f240af328bb54f4408d6388a75b 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala @@ -17,6 +17,8 @@ package nl.lumc.sasc.biopet.tools import java.io.{ File, PrintWriter } +import cern.jet.random.Binomial +import cern.jet.random.engine.RandomEngine import nl.lumc.sasc.biopet.utils.ToolCommand import scala.collection.mutable @@ -26,7 +28,7 @@ import scala.math.{ floor, round } object MpileupToVcf extends ToolCommand { case class Args(input: File = null, output: File = null, sample: String = null, minDP: Int = 8, minAP: Int = 2, - homoFraction: Double = 0.8, ploidy: Int = 2) extends AbstractArgs + homoFraction: Double = 0.8, ploidy: Int = 2, seqError: Double = 0.005) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('I', "input") valueName "<file>" action { (x, c) => @@ -50,6 +52,9 @@ object MpileupToVcf extends ToolCommand { opt[Int]("ploidy") action { (x, c) => c.copy(ploidy = x) } + opt[Double]("seqError") action { (x, c) => + c.copy(seqError = x) + } } /** @@ -69,8 +74,10 @@ object MpileupToVcf extends ToolCommand { writer.println("##FORMAT=<ID=FREQ,Number=A,Type=Float,Description=\"Allele Frequency\">") writer.println("##FORMAT=<ID=RFC,Number=1,Type=Integer,Description=\"Reference Forward Reads\">") writer.println("##FORMAT=<ID=RRC,Number=1,Type=Integer,Description=\"Reference Reverse Reads\">") - writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternetive Forward Reads\">") - writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternetive Reverse Reads\">") + writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternative Forward Reads\">") + writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternative Reverse Reads\">") + writer.println("##FORMAT=<ID=SEQ-ERR,Number=.,Type=Float,Description=\"Probability to not be a sequence error with error rate " + commandArgs.seqError + "\">") + writer.println("##FORMAT=<ID=MA-SEQ-ERR,Number=1,Type=Float,Description=\"Minimal probability for all alternative alleles to not be a sequence error with error rate " + commandArgs.seqError + "\">") writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">") writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample) val inputStream = if (commandArgs.input != null) { @@ -94,7 +101,7 @@ object MpileupToVcf extends ToolCommand { } val reads = values(3).toInt val mpileup = values(4) - val qual = values(5) + //val qual = values(5) val counts: mutable.Map[String, Counts] = mutable.Map(ref.toUpperCase -> new Counts(0, 0)) @@ -137,23 +144,37 @@ object MpileupToVcf extends ToolCommand { } } + val binomial = new Binomial(reads, commandArgs.seqError, RandomEngine.makeDefault()) val info: ArrayBuffer[String] = ArrayBuffer("DP=" + reads) - val format: mutable.Map[String, String] = mutable.Map("DP" -> reads.toString) + val format: mutable.Map[String, Any] = mutable.Map("DP" -> reads.toString) val alt: ArrayBuffer[String] = new ArrayBuffer + var maSeqErr: Option[Double] = None format += ("RFC" -> counts(ref.toUpperCase).forward.toString) format += ("RRC" -> counts(ref.toUpperCase).reverse.toString) format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString) + format += ("SEQ-ERR" -> (1.0 - binomial.cdf(counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse)).toString) if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward + value.reverse >= commandArgs.minAP) { alt += key format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString)) + val seqErr = 1.0 - binomial.cdf(value.forward + value.reverse) + maSeqErr match { + case Some(x) if x < seqErr => + case _ => maSeqErr = Some(seqErr) + } + format += ("SEQ-ERR" -> (format("SEQ-ERR") + "," + seqErr.toString)) format += ("AFC" -> ((if (format.contains("AFC")) format("AFC") + "," else "") + value.forward)) format += ("ARC" -> ((if (format.contains("ARC")) format("ARC") + "," else "") + value.reverse)) format += ("FREQ" -> ((if (format.contains("FREQ")) format("FREQ") + "," else "") + round((value.forward + value.reverse).toDouble / reads * 1E4).toDouble / 1E2)) } + maSeqErr match { + case Some(x) => format += ("MA-SEQ-ERR" -> x) + case _ => + } + if (alt.nonEmpty) { - val ad = for (ad <- format("AD").split(",")) yield ad.toInt + val ad = for (ad <- format("AD").toString.split(",")) yield ad.toInt var left = reads - dels val gt = ArrayBuffer[Int]() diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala index bc91cfc044cb0982d14e00b7a15a1b79a2045919..0c3728d8af976217f67ced8aa06a220cd8e74cd4 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala @@ -21,7 +21,6 @@ import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext } import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder } import htsjdk.variant.vcf.VCFFileReader import nl.lumc.sasc.biopet.utils.ToolCommand -import nl.lumc.sasc.biopet.utils.config.Configurable import scala.collection.JavaConversions._ import scala.io.Source diff --git a/public/biopet-utils/pom.xml b/public/biopet-utils/pom.xml index 722d1e6c629a51fd7f962d14dd5019e8e9201e06..cb80b9429d757389ab3936a99355606f035bb635 100644 --- a/public/biopet-utils/pom.xml +++ b/public/biopet-utils/pom.xml @@ -31,6 +31,11 @@ <packaging>jar</packaging> <dependencies> + <dependency> + <groupId>colt</groupId> + <artifactId>colt</artifactId> + <version>1.2.0</version> + </dependency> <dependency> <groupId>org.testng</groupId> <artifactId>testng</artifactId> diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala index d978be672b518ff3246162cab3f5b8d0371be33e..847e671166191da3153cc2df818828c66de37aa1 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala @@ -29,6 +29,8 @@ class RawVcf(val root: Configurable) extends Variantcaller { // This caller is designed as fallback when other variantcallers fails to report protected def defaultPrio = Int.MaxValue + val keepRefCalls: Boolean = config("keep_ref_calls", default = false) + def biopetScript { val rawFiles = inputBams.map { case (sample, bamFile) => @@ -48,7 +50,7 @@ class RawVcf(val root: Configurable) extends Variantcaller { override def defaults = Map("min_sample_depth" -> 8, "min_alternate_depth" -> 2, "min_samples_pass" -> 1, - "filter_ref_calls" -> true + "filter_ref_calls" -> !keepRefCalls ) } vcfFilter.inputVcf = m2v.output @@ -61,7 +63,7 @@ class RawVcf(val root: Configurable) extends Variantcaller { cv.inputFiles = rawFiles.toList cv.outputFile = outputFile cv.setKey = "null" - cv.excludeNonVariants = true + cv.excludeNonVariants = !keepRefCalls add(cv) } }