Skip to content
Snippets Groups Projects
Commit 180bb97e authored by bow's avatar bow
Browse files

Merge branch 'feature-seq_error_test' into 'develop'

Added binominal test

Added this for project 168

See merge request !367
parents 3637e383 99a7f2e0
No related branches found
No related tags found
No related merge requests found
...@@ -33,7 +33,7 @@ class Pysvtools(val root: Configurable) extends BiopetCommandLineFunction { ...@@ -33,7 +33,7 @@ class Pysvtools(val root: Configurable) extends BiopetCommandLineFunction {
@Argument(doc = "Set flanking amount") @Argument(doc = "Set flanking amount")
var flanking: Option[Int] = config("flanking") var flanking: Option[Int] = config("flanking")
var exclusionRegions: List[File] = config("exclusion_regions") var exclusionRegions: List[File] = config("exclusion_regions", default = Nil)
var translocationsOnly: Boolean = config("translocations_only", default = false) var translocationsOnly: Boolean = config("translocations_only", default = false)
@Output(doc = "Unzipped file", required = true) @Output(doc = "Unzipped file", required = true)
......
...@@ -17,6 +17,8 @@ package nl.lumc.sasc.biopet.tools ...@@ -17,6 +17,8 @@ package nl.lumc.sasc.biopet.tools
import java.io.{ File, PrintWriter } import java.io.{ File, PrintWriter }
import cern.jet.random.Binomial
import cern.jet.random.engine.RandomEngine
import nl.lumc.sasc.biopet.utils.ToolCommand import nl.lumc.sasc.biopet.utils.ToolCommand
import scala.collection.mutable import scala.collection.mutable
...@@ -26,7 +28,7 @@ import scala.math.{ floor, round } ...@@ -26,7 +28,7 @@ import scala.math.{ floor, round }
object MpileupToVcf extends ToolCommand { object MpileupToVcf extends ToolCommand {
case class Args(input: File = null, output: File = null, sample: String = null, minDP: Int = 8, minAP: Int = 2, case class Args(input: File = null, output: File = null, sample: String = null, minDP: Int = 8, minAP: Int = 2,
homoFraction: Double = 0.8, ploidy: Int = 2) extends AbstractArgs homoFraction: Double = 0.8, ploidy: Int = 2, seqError: Double = 0.005) extends AbstractArgs
class OptParser extends AbstractOptParser { class OptParser extends AbstractOptParser {
opt[File]('I', "input") valueName "<file>" action { (x, c) => opt[File]('I', "input") valueName "<file>" action { (x, c) =>
...@@ -50,6 +52,9 @@ object MpileupToVcf extends ToolCommand { ...@@ -50,6 +52,9 @@ object MpileupToVcf extends ToolCommand {
opt[Int]("ploidy") action { (x, c) => opt[Int]("ploidy") action { (x, c) =>
c.copy(ploidy = x) c.copy(ploidy = x)
} }
opt[Double]("seqError") action { (x, c) =>
c.copy(seqError = x)
}
} }
/** /**
...@@ -69,8 +74,10 @@ object MpileupToVcf extends ToolCommand { ...@@ -69,8 +74,10 @@ object MpileupToVcf extends ToolCommand {
writer.println("##FORMAT=<ID=FREQ,Number=A,Type=Float,Description=\"Allele Frequency\">") writer.println("##FORMAT=<ID=FREQ,Number=A,Type=Float,Description=\"Allele Frequency\">")
writer.println("##FORMAT=<ID=RFC,Number=1,Type=Integer,Description=\"Reference Forward Reads\">") writer.println("##FORMAT=<ID=RFC,Number=1,Type=Integer,Description=\"Reference Forward Reads\">")
writer.println("##FORMAT=<ID=RRC,Number=1,Type=Integer,Description=\"Reference Reverse Reads\">") writer.println("##FORMAT=<ID=RRC,Number=1,Type=Integer,Description=\"Reference Reverse Reads\">")
writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternetive Forward Reads\">") writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternative Forward Reads\">")
writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternetive Reverse Reads\">") writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternative Reverse Reads\">")
writer.println("##FORMAT=<ID=SEQ-ERR,Number=.,Type=Float,Description=\"Probability to not be a sequence error with error rate " + commandArgs.seqError + "\">")
writer.println("##FORMAT=<ID=MA-SEQ-ERR,Number=1,Type=Float,Description=\"Minimal probability for all alternative alleles to not be a sequence error with error rate " + commandArgs.seqError + "\">")
writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">") writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">")
writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample) writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample)
val inputStream = if (commandArgs.input != null) { val inputStream = if (commandArgs.input != null) {
...@@ -94,7 +101,7 @@ object MpileupToVcf extends ToolCommand { ...@@ -94,7 +101,7 @@ object MpileupToVcf extends ToolCommand {
} }
val reads = values(3).toInt val reads = values(3).toInt
val mpileup = values(4) val mpileup = values(4)
val qual = values(5) //val qual = values(5)
val counts: mutable.Map[String, Counts] = mutable.Map(ref.toUpperCase -> new Counts(0, 0)) val counts: mutable.Map[String, Counts] = mutable.Map(ref.toUpperCase -> new Counts(0, 0))
...@@ -137,23 +144,37 @@ object MpileupToVcf extends ToolCommand { ...@@ -137,23 +144,37 @@ object MpileupToVcf extends ToolCommand {
} }
} }
val binomial = new Binomial(reads, commandArgs.seqError, RandomEngine.makeDefault())
val info: ArrayBuffer[String] = ArrayBuffer("DP=" + reads) val info: ArrayBuffer[String] = ArrayBuffer("DP=" + reads)
val format: mutable.Map[String, String] = mutable.Map("DP" -> reads.toString) val format: mutable.Map[String, Any] = mutable.Map("DP" -> reads.toString)
val alt: ArrayBuffer[String] = new ArrayBuffer val alt: ArrayBuffer[String] = new ArrayBuffer
var maSeqErr: Option[Double] = None
format += ("RFC" -> counts(ref.toUpperCase).forward.toString) format += ("RFC" -> counts(ref.toUpperCase).forward.toString)
format += ("RRC" -> counts(ref.toUpperCase).reverse.toString) format += ("RRC" -> counts(ref.toUpperCase).reverse.toString)
format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString) format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString)
format += ("SEQ-ERR" -> (1.0 - binomial.cdf(counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse)).toString)
if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward + value.reverse >= commandArgs.minAP) { if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward + value.reverse >= commandArgs.minAP) {
alt += key alt += key
format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString)) format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString))
val seqErr = 1.0 - binomial.cdf(value.forward + value.reverse)
maSeqErr match {
case Some(x) if x < seqErr =>
case _ => maSeqErr = Some(seqErr)
}
format += ("SEQ-ERR" -> (format("SEQ-ERR") + "," + seqErr.toString))
format += ("AFC" -> ((if (format.contains("AFC")) format("AFC") + "," else "") + value.forward)) format += ("AFC" -> ((if (format.contains("AFC")) format("AFC") + "," else "") + value.forward))
format += ("ARC" -> ((if (format.contains("ARC")) format("ARC") + "," else "") + value.reverse)) format += ("ARC" -> ((if (format.contains("ARC")) format("ARC") + "," else "") + value.reverse))
format += ("FREQ" -> ((if (format.contains("FREQ")) format("FREQ") + "," else "") + format += ("FREQ" -> ((if (format.contains("FREQ")) format("FREQ") + "," else "") +
round((value.forward + value.reverse).toDouble / reads * 1E4).toDouble / 1E2)) round((value.forward + value.reverse).toDouble / reads * 1E4).toDouble / 1E2))
} }
maSeqErr match {
case Some(x) => format += ("MA-SEQ-ERR" -> x)
case _ =>
}
if (alt.nonEmpty) { if (alt.nonEmpty) {
val ad = for (ad <- format("AD").split(",")) yield ad.toInt val ad = for (ad <- format("AD").toString.split(",")) yield ad.toInt
var left = reads - dels var left = reads - dels
val gt = ArrayBuffer[Int]() val gt = ArrayBuffer[Int]()
......
...@@ -21,7 +21,6 @@ import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext } ...@@ -21,7 +21,6 @@ import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext }
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder } import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf.VCFFileReader import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.utils.ToolCommand import nl.lumc.sasc.biopet.utils.ToolCommand
import nl.lumc.sasc.biopet.utils.config.Configurable
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
import scala.io.Source import scala.io.Source
......
...@@ -31,6 +31,11 @@ ...@@ -31,6 +31,11 @@
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>
<dependency>
<groupId>colt</groupId>
<artifactId>colt</artifactId>
<version>1.2.0</version>
</dependency>
<dependency> <dependency>
<groupId>org.testng</groupId> <groupId>org.testng</groupId>
<artifactId>testng</artifactId> <artifactId>testng</artifactId>
......
...@@ -29,6 +29,8 @@ class RawVcf(val root: Configurable) extends Variantcaller { ...@@ -29,6 +29,8 @@ class RawVcf(val root: Configurable) extends Variantcaller {
// This caller is designed as fallback when other variantcallers fails to report // This caller is designed as fallback when other variantcallers fails to report
protected def defaultPrio = Int.MaxValue protected def defaultPrio = Int.MaxValue
val keepRefCalls: Boolean = config("keep_ref_calls", default = false)
def biopetScript { def biopetScript {
val rawFiles = inputBams.map { val rawFiles = inputBams.map {
case (sample, bamFile) => case (sample, bamFile) =>
...@@ -48,7 +50,7 @@ class RawVcf(val root: Configurable) extends Variantcaller { ...@@ -48,7 +50,7 @@ class RawVcf(val root: Configurable) extends Variantcaller {
override def defaults = Map("min_sample_depth" -> 8, override def defaults = Map("min_sample_depth" -> 8,
"min_alternate_depth" -> 2, "min_alternate_depth" -> 2,
"min_samples_pass" -> 1, "min_samples_pass" -> 1,
"filter_ref_calls" -> true "filter_ref_calls" -> !keepRefCalls
) )
} }
vcfFilter.inputVcf = m2v.output vcfFilter.inputVcf = m2v.output
...@@ -61,7 +63,7 @@ class RawVcf(val root: Configurable) extends Variantcaller { ...@@ -61,7 +63,7 @@ class RawVcf(val root: Configurable) extends Variantcaller {
cv.inputFiles = rawFiles.toList cv.inputFiles = rawFiles.toList
cv.outputFile = outputFile cv.outputFile = outputFile
cv.setKey = "null" cv.setKey = "null"
cv.excludeNonVariants = true cv.excludeNonVariants = !keepRefCalls
add(cv) add(cv)
} }
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment