Skip to content
Snippets Groups Projects
Commit 180bb97e authored by bow's avatar bow
Browse files

Merge branch 'feature-seq_error_test' into 'develop'

Added binominal test

Added this for project 168

See merge request !367
parents 3637e383 99a7f2e0
No related branches found
No related tags found
No related merge requests found
......@@ -33,7 +33,7 @@ class Pysvtools(val root: Configurable) extends BiopetCommandLineFunction {
@Argument(doc = "Set flanking amount")
var flanking: Option[Int] = config("flanking")
var exclusionRegions: List[File] = config("exclusion_regions")
var exclusionRegions: List[File] = config("exclusion_regions", default = Nil)
var translocationsOnly: Boolean = config("translocations_only", default = false)
@Output(doc = "Unzipped file", required = true)
......
......@@ -17,6 +17,8 @@ package nl.lumc.sasc.biopet.tools
import java.io.{ File, PrintWriter }
import cern.jet.random.Binomial
import cern.jet.random.engine.RandomEngine
import nl.lumc.sasc.biopet.utils.ToolCommand
import scala.collection.mutable
......@@ -26,7 +28,7 @@ import scala.math.{ floor, round }
object MpileupToVcf extends ToolCommand {
case class Args(input: File = null, output: File = null, sample: String = null, minDP: Int = 8, minAP: Int = 2,
homoFraction: Double = 0.8, ploidy: Int = 2) extends AbstractArgs
homoFraction: Double = 0.8, ploidy: Int = 2, seqError: Double = 0.005) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "input") valueName "<file>" action { (x, c) =>
......@@ -50,6 +52,9 @@ object MpileupToVcf extends ToolCommand {
opt[Int]("ploidy") action { (x, c) =>
c.copy(ploidy = x)
}
opt[Double]("seqError") action { (x, c) =>
c.copy(seqError = x)
}
}
/**
......@@ -69,8 +74,10 @@ object MpileupToVcf extends ToolCommand {
writer.println("##FORMAT=<ID=FREQ,Number=A,Type=Float,Description=\"Allele Frequency\">")
writer.println("##FORMAT=<ID=RFC,Number=1,Type=Integer,Description=\"Reference Forward Reads\">")
writer.println("##FORMAT=<ID=RRC,Number=1,Type=Integer,Description=\"Reference Reverse Reads\">")
writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternetive Forward Reads\">")
writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternetive Reverse Reads\">")
writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternative Forward Reads\">")
writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternative Reverse Reads\">")
writer.println("##FORMAT=<ID=SEQ-ERR,Number=.,Type=Float,Description=\"Probability to not be a sequence error with error rate " + commandArgs.seqError + "\">")
writer.println("##FORMAT=<ID=MA-SEQ-ERR,Number=1,Type=Float,Description=\"Minimal probability for all alternative alleles to not be a sequence error with error rate " + commandArgs.seqError + "\">")
writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">")
writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample)
val inputStream = if (commandArgs.input != null) {
......@@ -94,7 +101,7 @@ object MpileupToVcf extends ToolCommand {
}
val reads = values(3).toInt
val mpileup = values(4)
val qual = values(5)
//val qual = values(5)
val counts: mutable.Map[String, Counts] = mutable.Map(ref.toUpperCase -> new Counts(0, 0))
......@@ -137,23 +144,37 @@ object MpileupToVcf extends ToolCommand {
}
}
val binomial = new Binomial(reads, commandArgs.seqError, RandomEngine.makeDefault())
val info: ArrayBuffer[String] = ArrayBuffer("DP=" + reads)
val format: mutable.Map[String, String] = mutable.Map("DP" -> reads.toString)
val format: mutable.Map[String, Any] = mutable.Map("DP" -> reads.toString)
val alt: ArrayBuffer[String] = new ArrayBuffer
var maSeqErr: Option[Double] = None
format += ("RFC" -> counts(ref.toUpperCase).forward.toString)
format += ("RRC" -> counts(ref.toUpperCase).reverse.toString)
format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString)
format += ("SEQ-ERR" -> (1.0 - binomial.cdf(counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse)).toString)
if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward + value.reverse >= commandArgs.minAP) {
alt += key
format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString))
val seqErr = 1.0 - binomial.cdf(value.forward + value.reverse)
maSeqErr match {
case Some(x) if x < seqErr =>
case _ => maSeqErr = Some(seqErr)
}
format += ("SEQ-ERR" -> (format("SEQ-ERR") + "," + seqErr.toString))
format += ("AFC" -> ((if (format.contains("AFC")) format("AFC") + "," else "") + value.forward))
format += ("ARC" -> ((if (format.contains("ARC")) format("ARC") + "," else "") + value.reverse))
format += ("FREQ" -> ((if (format.contains("FREQ")) format("FREQ") + "," else "") +
round((value.forward + value.reverse).toDouble / reads * 1E4).toDouble / 1E2))
}
maSeqErr match {
case Some(x) => format += ("MA-SEQ-ERR" -> x)
case _ =>
}
if (alt.nonEmpty) {
val ad = for (ad <- format("AD").split(",")) yield ad.toInt
val ad = for (ad <- format("AD").toString.split(",")) yield ad.toInt
var left = reads - dels
val gt = ArrayBuffer[Int]()
......
......@@ -21,7 +21,6 @@ import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext }
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.utils.ToolCommand
import nl.lumc.sasc.biopet.utils.config.Configurable
import scala.collection.JavaConversions._
import scala.io.Source
......
......@@ -31,6 +31,11 @@
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>colt</groupId>
<artifactId>colt</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
......
......@@ -29,6 +29,8 @@ class RawVcf(val root: Configurable) extends Variantcaller {
// This caller is designed as fallback when other variantcallers fails to report
protected def defaultPrio = Int.MaxValue
val keepRefCalls: Boolean = config("keep_ref_calls", default = false)
def biopetScript {
val rawFiles = inputBams.map {
case (sample, bamFile) =>
......@@ -48,7 +50,7 @@ class RawVcf(val root: Configurable) extends Variantcaller {
override def defaults = Map("min_sample_depth" -> 8,
"min_alternate_depth" -> 2,
"min_samples_pass" -> 1,
"filter_ref_calls" -> true
"filter_ref_calls" -> !keepRefCalls
)
}
vcfFilter.inputVcf = m2v.output
......@@ -61,7 +63,7 @@ class RawVcf(val root: Configurable) extends Variantcaller {
cv.inputFiles = rawFiles.toList
cv.outputFile = outputFile
cv.setKey = "null"
cv.excludeNonVariants = true
cv.excludeNonVariants = !keepRefCalls
add(cv)
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment