From 05bf28b2db7dc4991b386fef31ef78ddecaf2899 Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Wed, 30 Mar 2016 19:20:10 +0200
Subject: [PATCH] Added binominal test

---
 .../lumc/sasc/biopet/tools/MpileupToVcf.scala   | 17 +++++++++++++----
 public/biopet-utils/pom.xml                     |  5 +++++
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala
index c93eaed39..48d658358 100644
--- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala
+++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala
@@ -15,18 +15,20 @@
  */
 package nl.lumc.sasc.biopet.tools
 
-import java.io.{ File, PrintWriter }
+import java.io.{File, PrintWriter}
 
+import cern.jet.random.Binomial
+import cern.jet.random.engine.RandomEngine
 import nl.lumc.sasc.biopet.utils.ToolCommand
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.io.Source
-import scala.math.{ floor, round }
+import scala.math.{floor, round}
 
 object MpileupToVcf extends ToolCommand {
   case class Args(input: File = null, output: File = null, sample: String = null, minDP: Int = 8, minAP: Int = 2,
-                  homoFraction: Double = 0.8, ploidy: Int = 2) extends AbstractArgs
+                  homoFraction: Double = 0.8, ploidy: Int = 2, seqError: Double = 0.005) extends AbstractArgs
 
   class OptParser extends AbstractOptParser {
     opt[File]('I', "input") valueName "<file>" action { (x, c) =>
@@ -50,6 +52,9 @@ object MpileupToVcf extends ToolCommand {
     opt[Int]("ploidy") action { (x, c) =>
       c.copy(ploidy = x)
     }
+    opt[Double]("seqError") action { (x, c) =>
+      c.copy(seqError = x)
+    }
   }
 
   /**
@@ -61,7 +66,7 @@ object MpileupToVcf extends ToolCommand {
     if (commandArgs.input != null && !commandArgs.input.exists) throw new IllegalStateException("Input file does not exist")
 
     val writer = new PrintWriter(commandArgs.output)
-    writer.println("##fileformat=VCFv4.1")
+    writer.println("##fileformat=VCFv4.2")
     writer.println("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">")
     writer.println("##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">")
     writer.println("##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">")
@@ -71,6 +76,7 @@ object MpileupToVcf extends ToolCommand {
     writer.println("##FORMAT=<ID=RRC,Number=1,Type=Integer,Description=\"Reference Reverse Reads\">")
     writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternetive Forward Reads\">")
     writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternetive Reverse Reads\">")
+    writer.println(s"##FORMAT=<ID=SEQ-ERR,Number=R,Type=Float,Description=\"Probilty to not be a sequence error with error rate ${commandArgs.seqError}\">")
     writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">")
     writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample)
     val inputStream = if (commandArgs.input != null) {
@@ -137,15 +143,18 @@ object MpileupToVcf extends ToolCommand {
         }
       }
 
+      val binomial = new Binomial(reads, commandArgs.seqError, RandomEngine.makeDefault())
       val info: ArrayBuffer[String] = ArrayBuffer("DP=" + reads)
       val format: mutable.Map[String, String] = mutable.Map("DP" -> reads.toString)
       val alt: ArrayBuffer[String] = new ArrayBuffer
       format += ("RFC" -> counts(ref.toUpperCase).forward.toString)
       format += ("RRC" -> counts(ref.toUpperCase).reverse.toString)
       format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString)
+      format += ("SEQ-ERR" -> binomial.cdf(counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString)
       if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward + value.reverse >= commandArgs.minAP) {
         alt += key
         format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString))
+        format += ("SEQ-ERR" -> (format("SEQ-ERR") + "," + binomial.cdf(value.forward + value.reverse).toString))
         format += ("AFC" -> ((if (format.contains("AFC")) format("AFC") + "," else "") + value.forward))
         format += ("ARC" -> ((if (format.contains("ARC")) format("ARC") + "," else "") + value.reverse))
         format += ("FREQ" -> ((if (format.contains("FREQ")) format("FREQ") + "," else "") +
diff --git a/public/biopet-utils/pom.xml b/public/biopet-utils/pom.xml
index 722d1e6c6..cb80b9429 100644
--- a/public/biopet-utils/pom.xml
+++ b/public/biopet-utils/pom.xml
@@ -31,6 +31,11 @@
     <packaging>jar</packaging>
 
     <dependencies>
+        <dependency>
+            <groupId>colt</groupId>
+            <artifactId>colt</artifactId>
+            <version>1.2.0</version>
+        </dependency>
         <dependency>
             <groupId>org.testng</groupId>
             <artifactId>testng</artifactId>
-- 
GitLab