From 05bf28b2db7dc4991b386fef31ef78ddecaf2899 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Wed, 30 Mar 2016 19:20:10 +0200 Subject: [PATCH] Added binominal test --- .../lumc/sasc/biopet/tools/MpileupToVcf.scala | 17 +++++++++++++---- public/biopet-utils/pom.xml | 5 +++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala index c93eaed39..48d658358 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala @@ -15,18 +15,20 @@ */ package nl.lumc.sasc.biopet.tools -import java.io.{ File, PrintWriter } +import java.io.{File, PrintWriter} +import cern.jet.random.Binomial +import cern.jet.random.engine.RandomEngine import nl.lumc.sasc.biopet.utils.ToolCommand import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.io.Source -import scala.math.{ floor, round } +import scala.math.{floor, round} object MpileupToVcf extends ToolCommand { case class Args(input: File = null, output: File = null, sample: String = null, minDP: Int = 8, minAP: Int = 2, - homoFraction: Double = 0.8, ploidy: Int = 2) extends AbstractArgs + homoFraction: Double = 0.8, ploidy: Int = 2, seqError: Double = 0.005) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('I', "input") valueName "<file>" action { (x, c) => @@ -50,6 +52,9 @@ object MpileupToVcf extends ToolCommand { opt[Int]("ploidy") action { (x, c) => c.copy(ploidy = x) } + opt[Double]("seqError") action { (x, c) => + c.copy(seqError = x) + } } /** @@ -61,7 +66,7 @@ object MpileupToVcf extends ToolCommand { if (commandArgs.input != null && !commandArgs.input.exists) throw new IllegalStateException("Input file does not exist") val writer = new PrintWriter(commandArgs.output) - writer.println("##fileformat=VCFv4.1") + writer.println("##fileformat=VCFv4.2") writer.println("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">") writer.println("##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">") writer.println("##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">") @@ -71,6 +76,7 @@ object MpileupToVcf extends ToolCommand { writer.println("##FORMAT=<ID=RRC,Number=1,Type=Integer,Description=\"Reference Reverse Reads\">") writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternetive Forward Reads\">") writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternetive Reverse Reads\">") + writer.println(s"##FORMAT=<ID=SEQ-ERR,Number=R,Type=Float,Description=\"Probilty to not be a sequence error with error rate ${commandArgs.seqError}\">") writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">") writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample) val inputStream = if (commandArgs.input != null) { @@ -137,15 +143,18 @@ object MpileupToVcf extends ToolCommand { } } + val binomial = new Binomial(reads, commandArgs.seqError, RandomEngine.makeDefault()) val info: ArrayBuffer[String] = ArrayBuffer("DP=" + reads) val format: mutable.Map[String, String] = mutable.Map("DP" -> reads.toString) val alt: ArrayBuffer[String] = new ArrayBuffer format += ("RFC" -> counts(ref.toUpperCase).forward.toString) format += ("RRC" -> counts(ref.toUpperCase).reverse.toString) format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString) + format += ("SEQ-ERR" -> binomial.cdf(counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString) if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward + value.reverse >= commandArgs.minAP) { alt += key format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString)) + format += ("SEQ-ERR" -> (format("SEQ-ERR") + "," + binomial.cdf(value.forward + value.reverse).toString)) format += ("AFC" -> ((if (format.contains("AFC")) format("AFC") + "," else "") + value.forward)) format += ("ARC" -> ((if (format.contains("ARC")) format("ARC") + "," else "") + value.reverse)) format += ("FREQ" -> ((if (format.contains("FREQ")) format("FREQ") + "," else "") + diff --git a/public/biopet-utils/pom.xml b/public/biopet-utils/pom.xml index 722d1e6c6..cb80b9429 100644 --- a/public/biopet-utils/pom.xml +++ b/public/biopet-utils/pom.xml @@ -31,6 +31,11 @@ <packaging>jar</packaging> <dependencies> + <dependency> + <groupId>colt</groupId> + <artifactId>colt</artifactId> + <version>1.2.0</version> + </dependency> <dependency> <groupId>org.testng</groupId> <artifactId>testng</artifactId> -- GitLab