Skip to content
Snippets Groups Projects
Commit 410da462 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by bow
Browse files

Added MpileupToVcf to exe

parent bd3cc3d3
No related branches found
No related tags found
No related merge requests found
...@@ -27,7 +27,8 @@ object BiopetExecutable { ...@@ -27,7 +27,8 @@ object BiopetExecutable {
nl.lumc.sasc.biopet.tools.VcfToTsv, nl.lumc.sasc.biopet.tools.VcfToTsv,
nl.lumc.sasc.biopet.tools.VcfFilter, nl.lumc.sasc.biopet.tools.VcfFilter,
nl.lumc.sasc.biopet.tools.FindRepeatsPacBio, nl.lumc.sasc.biopet.tools.FindRepeatsPacBio,
nl.lumc.sasc.biopet.tools.BedToInterval) nl.lumc.sasc.biopet.tools.BedToInterval,
nl.lumc.sasc.biopet.tools.MpileupToVcf)
) )
/** /**
......
...@@ -3,6 +3,7 @@ package nl.lumc.sasc.biopet.tools ...@@ -3,6 +3,7 @@ package nl.lumc.sasc.biopet.tools
import java.io.File import java.io.File
import java.io.PrintWriter import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup
import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
...@@ -50,46 +51,47 @@ class MpileupToVcf(val root: Configurable) extends BiopetJavaCommandLineFunction ...@@ -50,46 +51,47 @@ class MpileupToVcf(val root: Configurable) extends BiopetJavaCommandLineFunction
} else "") + } else "") +
super.commandLine + super.commandLine +
required("-o", output) + required("-o", output) +
optional("-minDP", minDP) + optional("--minDP", minDP) +
optional("-minAP", minAP) + optional("--minAP", minAP) +
optional("-homoFraction", homoFraction) + optional("--homoFraction", homoFraction) +
optional("-ploidy", ploidy) + optional("--ploidy", ploidy) +
required("-sample", sample) + required("--sample", sample) +
(if (inputBam == null) required("-I", inputMpileup) else "") (if (inputBam == null) required("-I", inputMpileup) else "")
} }
} }
object MpileupToVcf { object MpileupToVcf extends ToolCommand {
var input: File = _ case class Args (input:File = null, output:File = null, sample:String = null, minDP:Int = 8, minAP:Int = 2,
var output: File = _ homoFraction:Double = 0.8, ploidy:Int = 2) extends AbstractArgs
var sample: String = _
var minDP = 8 class OptParser extends AbstractOptParser {
var minAP = 2 opt[File]('I', "input") valueName("<file>") action { (x, c) =>
var homoFraction = 0.8 c.copy(input = x) } text("input, default is stdin")
var ploidy = 2 opt[File]('o', "output") required() valueName("<file>") action { (x, c) =>
c.copy(output = x) } text("out is a required file property")
opt[String]('s', "sample") required() action { (x, c) =>
c.copy(sample = x) }
opt[Int]("minDP") required() action { (x, c) =>
c.copy(minDP = x) }
opt[Int]("minAP") required() action { (x, c) =>
c.copy(minAP = x) }
opt[Double]("homoFraction") required() action { (x, c) =>
c.copy(homoFraction = x) }
opt[Int]("ploidy") required() action { (x, c) =>
c.copy(ploidy = x) }
}
/** /**
* @param args the command line arguments * @param args the command line arguments
*/ */
def main(args: Array[String]): Unit = { def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
import scala.collection.mutable.Map import scala.collection.mutable.Map
for (t <- 0 until args.size) { if (commandArgs.input != null && !commandArgs.input.exists) throw new IllegalStateException("Input file does not exist")
args(t) match {
case "-I" => input = new File(args(t+1))
case "-o" => output = new File(args(t+1))
case "-minDP" => minDP = args(t+1).toInt
case "-minAP" => minAP = args(t+1).toInt
case "-sample" => sample = args(t+1)
case "-homoFraction" => homoFraction = args(t+1).toDouble
case "-ploidy" => ploidy = args(t+1).toInt
case _ =>
}
}
if (input != null && !input.exists) throw new IllegalStateException("Input file does not exist")
if (output == null) throw new IllegalStateException("Output file not found, use -o")
if (sample == null) throw new IllegalStateException("sample not given, pls use -sample")
val writer = new PrintWriter(output) val writer = new PrintWriter(commandArgs.output)
writer.println("##fileformat=VCFv4.1") writer.println("##fileformat=VCFv4.1")
writer.println("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">") writer.println("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">")
writer.println("##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">") writer.println("##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">")
...@@ -101,8 +103,8 @@ object MpileupToVcf { ...@@ -101,8 +103,8 @@ object MpileupToVcf {
writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternetive Forward Reads\">") writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternetive Forward Reads\">")
writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternetive Reverse Reads\">") writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternetive Reverse Reads\">")
writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">") writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">")
writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + sample) writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample)
val inputStream = if (input != null) Source.fromFile(input).getLines else Source.stdin.getLines val inputStream = if (commandArgs.input != null) Source.fromFile(commandArgs.input).getLines else Source.stdin.getLines
class Counts(var forward:Int, var reverse:Int) class Counts(var forward:Int, var reverse:Int)
for (line <- inputStream; for (line <- inputStream;
val values = line.split("\t"); val values = line.split("\t");
...@@ -166,7 +168,7 @@ object MpileupToVcf { ...@@ -166,7 +168,7 @@ object MpileupToVcf {
format += ("RFC" -> counts(ref.toUpperCase).forward.toString) format += ("RFC" -> counts(ref.toUpperCase).forward.toString)
format += ("RRC" -> counts(ref.toUpperCase).reverse.toString) format += ("RRC" -> counts(ref.toUpperCase).reverse.toString)
format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString) format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString)
if (reads >= minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward+value.reverse >= minAP) { if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward+value.reverse >= commandArgs.minAP) {
alt += key alt += key
format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString)) format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString))
format += ("AFC" -> ( (if (format.contains("AFC")) format("AFC") + "," else "") + value.forward)) format += ("AFC" -> ( (if (format.contains("AFC")) format("AFC") + "," else "") + value.forward))
...@@ -180,13 +182,13 @@ object MpileupToVcf { ...@@ -180,13 +182,13 @@ object MpileupToVcf {
var left = reads - dels var left = reads - dels
val gt = ArrayBuffer[Int]() val gt = ArrayBuffer[Int]()
for (p <- 0 to alt.size if gt.size < ploidy) { for (p <- 0 to alt.size if gt.size < commandArgs.ploidy) {
var max = -1 var max = -1
for (a <- 0 until ad.length if ad(a) > (if (max >= 0) ad(max) else -1) && !gt.exists(_ == a)) max = a for (a <- 0 until ad.length if ad(a) > (if (max >= 0) ad(max) else -1) && !gt.exists(_ == a)) max = a
val f = ad(max).toDouble / left val f = ad(max).toDouble / left
for (a <- 0 to floor(f).toInt if gt.size < ploidy) gt.append(max) for (a <- 0 to floor(f).toInt if gt.size < commandArgs.ploidy) gt.append(max)
if (f - floor(f) >= homoFraction) { if (f - floor(f) >= commandArgs.homoFraction) {
for (b <- p to ploidy if gt.size < ploidy) gt.append(max) for (b <- p to commandArgs.ploidy if gt.size < commandArgs.ploidy) gt.append(max)
} }
left -= ad(max) left -= ad(max)
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment