Commit 180bb97e authored by bow's avatar bow
Browse files

Merge branch 'feature-seq_error_test' into 'develop'

Added binominal test

Added this for project 168

See merge request !367
parents 3637e383 99a7f2e0
......@@ -33,7 +33,7 @@ class Pysvtools(val root: Configurable) extends BiopetCommandLineFunction {
@Argument(doc = "Set flanking amount")
var flanking: Option[Int] = config("flanking")
var exclusionRegions: List[File] = config("exclusion_regions")
var exclusionRegions: List[File] = config("exclusion_regions", default = Nil)
var translocationsOnly: Boolean = config("translocations_only", default = false)
@Output(doc = "Unzipped file", required = true)
......
......@@ -17,6 +17,8 @@ package nl.lumc.sasc.biopet.tools
import java.io.{ File, PrintWriter }
import cern.jet.random.Binomial
import cern.jet.random.engine.RandomEngine
import nl.lumc.sasc.biopet.utils.ToolCommand
import scala.collection.mutable
......@@ -26,7 +28,7 @@ import scala.math.{ floor, round }
object MpileupToVcf extends ToolCommand {
case class Args(input: File = null, output: File = null, sample: String = null, minDP: Int = 8, minAP: Int = 2,
homoFraction: Double = 0.8, ploidy: Int = 2) extends AbstractArgs
homoFraction: Double = 0.8, ploidy: Int = 2, seqError: Double = 0.005) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "input") valueName "<file>" action { (x, c) =>
......@@ -50,6 +52,9 @@ object MpileupToVcf extends ToolCommand {
opt[Int]("ploidy") action { (x, c) =>
c.copy(ploidy = x)
}
opt[Double]("seqError") action { (x, c) =>
c.copy(seqError = x)
}
}
/**
......@@ -69,8 +74,10 @@ object MpileupToVcf extends ToolCommand {
writer.println("##FORMAT=<ID=FREQ,Number=A,Type=Float,Description=\"Allele Frequency\">")
writer.println("##FORMAT=<ID=RFC,Number=1,Type=Integer,Description=\"Reference Forward Reads\">")
writer.println("##FORMAT=<ID=RRC,Number=1,Type=Integer,Description=\"Reference Reverse Reads\">")
writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternetive Forward Reads\">")
writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternetive Reverse Reads\">")
writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternative Forward Reads\">")
writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternative Reverse Reads\">")
writer.println("##FORMAT=<ID=SEQ-ERR,Number=.,Type=Float,Description=\"Probability to not be a sequence error with error rate " + commandArgs.seqError + "\">")
writer.println("##FORMAT=<ID=MA-SEQ-ERR,Number=1,Type=Float,Description=\"Minimal probability for all alternative alleles to not be a sequence error with error rate " + commandArgs.seqError + "\">")
writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">")
writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample)
val inputStream = if (commandArgs.input != null) {
......@@ -94,7 +101,7 @@ object MpileupToVcf extends ToolCommand {
}
val reads = values(3).toInt
val mpileup = values(4)
val qual = values(5)
//val qual = values(5)
val counts: mutable.Map[String, Counts] = mutable.Map(ref.toUpperCase -> new Counts(0, 0))
......@@ -137,23 +144,37 @@ object MpileupToVcf extends ToolCommand {
}
}
val binomial = new Binomial(reads, commandArgs.seqError, RandomEngine.makeDefault())
val info: ArrayBuffer[String] = ArrayBuffer("DP=" + reads)
val format: mutable.Map[String, String] = mutable.Map("DP" -> reads.toString)
val format: mutable.Map[String, Any] = mutable.Map("DP" -> reads.toString)
val alt: ArrayBuffer[String] = new ArrayBuffer
var maSeqErr: Option[Double] = None
format += ("RFC" -> counts(ref.toUpperCase).forward.toString)
format += ("RRC" -> counts(ref.toUpperCase).reverse.toString)
format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString)
format += ("SEQ-ERR" -> (1.0 - binomial.cdf(counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse)).toString)
if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward + value.reverse >= commandArgs.minAP) {
alt += key
format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString))
val seqErr = 1.0 - binomial.cdf(value.forward + value.reverse)
maSeqErr match {
case Some(x) if x < seqErr =>
case _ => maSeqErr = Some(seqErr)
}
format += ("SEQ-ERR" -> (format("SEQ-ERR") + "," + seqErr.toString))
format += ("AFC" -> ((if (format.contains("AFC")) format("AFC") + "," else "") + value.forward))
format += ("ARC" -> ((if (format.contains("ARC")) format("ARC") + "," else "") + value.reverse))
format += ("FREQ" -> ((if (format.contains("FREQ")) format("FREQ") + "," else "") +
round((value.forward + value.reverse).toDouble / reads * 1E4).toDouble / 1E2))
}
maSeqErr match {
case Some(x) => format += ("MA-SEQ-ERR" -> x)
case _ =>
}
if (alt.nonEmpty) {
val ad = for (ad <- format("AD").split(",")) yield ad.toInt
val ad = for (ad <- format("AD").toString.split(",")) yield ad.toInt
var left = reads - dels
val gt = ArrayBuffer[Int]()
......
......@@ -21,7 +21,6 @@ import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext }
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.utils.ToolCommand
import nl.lumc.sasc.biopet.utils.config.Configurable
import scala.collection.JavaConversions._
import scala.io.Source
......
......@@ -31,6 +31,11 @@
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>colt</groupId>
<artifactId>colt</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
......
......@@ -29,6 +29,8 @@ class RawVcf(val root: Configurable) extends Variantcaller {
// This caller is designed as fallback when other variantcallers fails to report
protected def defaultPrio = Int.MaxValue
val keepRefCalls: Boolean = config("keep_ref_calls", default = false)
def biopetScript {
val rawFiles = inputBams.map {
case (sample, bamFile) =>
......@@ -48,7 +50,7 @@ class RawVcf(val root: Configurable) extends Variantcaller {
override def defaults = Map("min_sample_depth" -> 8,
"min_alternate_depth" -> 2,
"min_samples_pass" -> 1,
"filter_ref_calls" -> true
"filter_ref_calls" -> !keepRefCalls
)
}
vcfFilter.inputVcf = m2v.output
......@@ -61,7 +63,7 @@ class RawVcf(val root: Configurable) extends Variantcaller {
cv.inputFiles = rawFiles.toList
cv.outputFile = outputFile
cv.setKey = "null"
cv.excludeNonVariants = true
cv.excludeNonVariants = !keepRefCalls
add(cv)
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment