From 410da4622a9a3e95cbc3f9644bf391f515d4bf87 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Fri, 24 Oct 2014 15:32:07 +0200 Subject: [PATCH] Added MpileupToVcf to exe --- .../sasc/biopet/core/BiopetExecutable.scala | 3 +- .../lumc/sasc/biopet/tools/MpileupToVcf.scala | 74 ++++++++++--------- 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala index 92c2880b8..d0134fd84 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala @@ -27,7 +27,8 @@ object BiopetExecutable { nl.lumc.sasc.biopet.tools.VcfToTsv, nl.lumc.sasc.biopet.tools.VcfFilter, nl.lumc.sasc.biopet.tools.FindRepeatsPacBio, - nl.lumc.sasc.biopet.tools.BedToInterval) + nl.lumc.sasc.biopet.tools.BedToInterval, + nl.lumc.sasc.biopet.tools.MpileupToVcf) ) /** diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala index 8cc95f673..a407a64de 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala @@ -3,6 +3,7 @@ package nl.lumc.sasc.biopet.tools import java.io.File import java.io.PrintWriter import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction +import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup import org.broadinstitute.gatk.utils.commandline.{ Input, Output } @@ -50,46 +51,47 @@ class MpileupToVcf(val root: Configurable) extends BiopetJavaCommandLineFunction } else "") + super.commandLine + required("-o", output) + - optional("-minDP", minDP) + - optional("-minAP", minAP) + - optional("-homoFraction", homoFraction) + - optional("-ploidy", ploidy) + - required("-sample", sample) + + optional("--minDP", minDP) + + optional("--minAP", minAP) + + optional("--homoFraction", homoFraction) + + optional("--ploidy", ploidy) + + required("--sample", sample) + (if (inputBam == null) required("-I", inputMpileup) else "") } } -object MpileupToVcf { - var input: File = _ - var output: File = _ - var sample: String = _ - var minDP = 8 - var minAP = 2 - var homoFraction = 0.8 - var ploidy = 2 +object MpileupToVcf extends ToolCommand { + case class Args (input:File = null, output:File = null, sample:String = null, minDP:Int = 8, minAP:Int = 2, + homoFraction:Double = 0.8, ploidy:Int = 2) extends AbstractArgs + + class OptParser extends AbstractOptParser { + opt[File]('I', "input") valueName("<file>") action { (x, c) => + c.copy(input = x) } text("input, default is stdin") + opt[File]('o', "output") required() valueName("<file>") action { (x, c) => + c.copy(output = x) } text("out is a required file property") + opt[String]('s', "sample") required() action { (x, c) => + c.copy(sample = x) } + opt[Int]("minDP") required() action { (x, c) => + c.copy(minDP = x) } + opt[Int]("minAP") required() action { (x, c) => + c.copy(minAP = x) } + opt[Double]("homoFraction") required() action { (x, c) => + c.copy(homoFraction = x) } + opt[Int]("ploidy") required() action { (x, c) => + c.copy(ploidy = x) } + } /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { + val argsParser = new OptParser + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + import scala.collection.mutable.Map - for (t <- 0 until args.size) { - args(t) match { - case "-I" => input = new File(args(t+1)) - case "-o" => output = new File(args(t+1)) - case "-minDP" => minDP = args(t+1).toInt - case "-minAP" => minAP = args(t+1).toInt - case "-sample" => sample = args(t+1) - case "-homoFraction" => homoFraction = args(t+1).toDouble - case "-ploidy" => ploidy = args(t+1).toInt - case _ => - } - } - if (input != null && !input.exists) throw new IllegalStateException("Input file does not exist") - if (output == null) throw new IllegalStateException("Output file not found, use -o") - if (sample == null) throw new IllegalStateException("sample not given, pls use -sample") + if (commandArgs.input != null && !commandArgs.input.exists) throw new IllegalStateException("Input file does not exist") - val writer = new PrintWriter(output) + val writer = new PrintWriter(commandArgs.output) writer.println("##fileformat=VCFv4.1") writer.println("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">") writer.println("##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">") @@ -101,8 +103,8 @@ object MpileupToVcf { writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternetive Forward Reads\">") writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternetive Reverse Reads\">") writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">") - writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + sample) - val inputStream = if (input != null) Source.fromFile(input).getLines else Source.stdin.getLines + writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample) + val inputStream = if (commandArgs.input != null) Source.fromFile(commandArgs.input).getLines else Source.stdin.getLines class Counts(var forward:Int, var reverse:Int) for (line <- inputStream; val values = line.split("\t"); @@ -166,7 +168,7 @@ object MpileupToVcf { format += ("RFC" -> counts(ref.toUpperCase).forward.toString) format += ("RRC" -> counts(ref.toUpperCase).reverse.toString) format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString) - if (reads >= minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward+value.reverse >= minAP) { + if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward+value.reverse >= commandArgs.minAP) { alt += key format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString)) format += ("AFC" -> ( (if (format.contains("AFC")) format("AFC") + "," else "") + value.forward)) @@ -180,13 +182,13 @@ object MpileupToVcf { var left = reads - dels val gt = ArrayBuffer[Int]() - for (p <- 0 to alt.size if gt.size < ploidy) { + for (p <- 0 to alt.size if gt.size < commandArgs.ploidy) { var max = -1 for (a <- 0 until ad.length if ad(a) > (if (max >= 0) ad(max) else -1) && !gt.exists(_ == a)) max = a val f = ad(max).toDouble / left - for (a <- 0 to floor(f).toInt if gt.size < ploidy) gt.append(max) - if (f - floor(f) >= homoFraction) { - for (b <- p to ploidy if gt.size < ploidy) gt.append(max) + for (a <- 0 to floor(f).toInt if gt.size < commandArgs.ploidy) gt.append(max) + if (f - floor(f) >= commandArgs.homoFraction) { + for (b <- p to commandArgs.ploidy if gt.size < commandArgs.ploidy) gt.append(max) } left -= ad(max) } -- GitLab