diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala index af0d91dff9b5434d0f17c22193655a09007e215e..0b377b8800ae297bc5bcc3d9a14a7f4b5679e281 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala @@ -25,7 +25,10 @@ object BiopetExecutable { nl.lumc.sasc.biopet.tools.CheckAllelesVcfInBam, nl.lumc.sasc.biopet.tools.VcfToTsv, nl.lumc.sasc.biopet.tools.VcfFilter, - nl.lumc.sasc.biopet.tools.FindRepeatsPacBio) + nl.lumc.sasc.biopet.tools.FindRepeatsPacBio, + nl.lumc.sasc.biopet.tools.BedToInterval, + nl.lumc.sasc.biopet.tools.MpileupToVcf, + nl.lumc.sasc.biopet.tools.FastqSplitter) ) /** diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala index 7b8e5003762f8c6d408b8a6f6fd31ec765958268..0a8cbdb3accf11ef641708b5a2dd4ac44f910467 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala @@ -4,6 +4,7 @@ import htsjdk.samtools.SAMFileReader import htsjdk.samtools.SAMSequenceRecord import java.io.File import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction +import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.PrintWriter @@ -27,7 +28,7 @@ class BedToInterval(val root: Configurable) extends BiopetJavaCommandLineFunctio override def commandLine = super.commandLine + required(input) + required(bamFile) + required(output) } -object BedToInterval { +object BedToInterval extends ToolCommand { def apply(root: Configurable, inputBed: File, inputBam: File, outputDir: String): BedToInterval = { val bedToInterval = new BedToInterval(root) bedToInterval.input = inputBed @@ -44,13 +45,25 @@ object BedToInterval { return bedToInterval } + case class Args (inputFile:File = null, outputFile:File = null) extends AbstractArgs + + class OptParser extends AbstractOptParser { + opt[File]('I', "inputFile") required() valueName("<file>") action { (x, c) => + c.copy(inputFile = x) } text("out is a required file property") + opt[File]('o', "output") required() valueName("<file>") action { (x, c) => + c.copy(outputFile = x) } text("out is a required file property") + } + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { - val writer = new PrintWriter(args(2)) + val argsParser = new OptParser + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + + val writer = new PrintWriter(commandArgs.outputFile) - val inputSam = new SAMFileReader(new File(args(1))) + val inputSam = new SAMFileReader(commandArgs.inputFile) val refs = for (SQ <- inputSam.getFileHeader.getSequenceDictionary.getSequences.toArray) yield { val record = SQ.asInstanceOf[SAMSequenceRecord] writer.write("@SQ\tSN:" + record.getSequenceName + "\tLN:" + record.getSequenceLength + "\n") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala index 66cb83cc5d32c3f597bcf9166a6714d96bbece93..c59d8a2464797f03eca4e0f7b2d6c3e0965ab624 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala @@ -4,6 +4,7 @@ import java.io.{ BufferedInputStream, File, FileInputStream, PrintWriter } import java.util.zip.GZIPInputStream import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import scala.io.Source +import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } @@ -19,24 +20,34 @@ class FastqSplitter(val root: Configurable) extends BiopetJavaCommandLineFunctio override val defaultVmem = "8G" memoryLimit = Option(4.0) - override def commandLine = super.commandLine + required(input) + repeat(output) + override def commandLine = super.commandLine + required("-I", input) + repeat("-o", output) } -object FastqSplitter { +object FastqSplitter extends ToolCommand { + case class Args (inputFile:File = null, outputFile:List[File] = Nil) extends AbstractArgs + + class OptParser extends AbstractOptParser { + opt[File]('I', "inputFile") required() valueName("<file>") action { (x, c) => + c.copy(inputFile = x) } text("out is a required file property") + opt[File]('o', "output") required() unbounded() valueName("<file>") action { (x, c) => + c.copy(outputFile = x :: c.outputFile) } text("out is a required file property") + } + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { + val argsParser = new OptParser + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val groupsize = 100 - val input = new File(args.head) - val output: Array[PrintWriter] = new Array[PrintWriter](args.tail.size) - for (t <- 1 to args.tail.size) output(t - 1) = new PrintWriter(args(t)) + val output = for (file <- commandArgs.outputFile) yield new PrintWriter(file) val inputStream = { - if (input.getName.endsWith(".gz") || input.getName.endsWith(".gzip")) Source.fromInputStream( + if (commandArgs.inputFile.getName.endsWith(".gz") || commandArgs.inputFile.getName.endsWith(".gzip")) Source.fromInputStream( new GZIPInputStream( new BufferedInputStream( - new FileInputStream(input)))).bufferedReader - else Source.fromFile(input).bufferedReader + new FileInputStream(commandArgs.inputFile)))).bufferedReader + else Source.fromFile(commandArgs.inputFile).bufferedReader } while (inputStream.ready) { for (writter <- output) { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala index 8cc95f6733d351a25751dba309075a36248cb04a..a407a64de28c47de0a2c53033793a4b3c4f15a5e 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala @@ -3,6 +3,7 @@ package nl.lumc.sasc.biopet.tools import java.io.File import java.io.PrintWriter import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction +import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup import org.broadinstitute.gatk.utils.commandline.{ Input, Output } @@ -50,46 +51,47 @@ class MpileupToVcf(val root: Configurable) extends BiopetJavaCommandLineFunction } else "") + super.commandLine + required("-o", output) + - optional("-minDP", minDP) + - optional("-minAP", minAP) + - optional("-homoFraction", homoFraction) + - optional("-ploidy", ploidy) + - required("-sample", sample) + + optional("--minDP", minDP) + + optional("--minAP", minAP) + + optional("--homoFraction", homoFraction) + + optional("--ploidy", ploidy) + + required("--sample", sample) + (if (inputBam == null) required("-I", inputMpileup) else "") } } -object MpileupToVcf { - var input: File = _ - var output: File = _ - var sample: String = _ - var minDP = 8 - var minAP = 2 - var homoFraction = 0.8 - var ploidy = 2 +object MpileupToVcf extends ToolCommand { + case class Args (input:File = null, output:File = null, sample:String = null, minDP:Int = 8, minAP:Int = 2, + homoFraction:Double = 0.8, ploidy:Int = 2) extends AbstractArgs + + class OptParser extends AbstractOptParser { + opt[File]('I', "input") valueName("<file>") action { (x, c) => + c.copy(input = x) } text("input, default is stdin") + opt[File]('o', "output") required() valueName("<file>") action { (x, c) => + c.copy(output = x) } text("out is a required file property") + opt[String]('s', "sample") required() action { (x, c) => + c.copy(sample = x) } + opt[Int]("minDP") required() action { (x, c) => + c.copy(minDP = x) } + opt[Int]("minAP") required() action { (x, c) => + c.copy(minAP = x) } + opt[Double]("homoFraction") required() action { (x, c) => + c.copy(homoFraction = x) } + opt[Int]("ploidy") required() action { (x, c) => + c.copy(ploidy = x) } + } /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { + val argsParser = new OptParser + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + import scala.collection.mutable.Map - for (t <- 0 until args.size) { - args(t) match { - case "-I" => input = new File(args(t+1)) - case "-o" => output = new File(args(t+1)) - case "-minDP" => minDP = args(t+1).toInt - case "-minAP" => minAP = args(t+1).toInt - case "-sample" => sample = args(t+1) - case "-homoFraction" => homoFraction = args(t+1).toDouble - case "-ploidy" => ploidy = args(t+1).toInt - case _ => - } - } - if (input != null && !input.exists) throw new IllegalStateException("Input file does not exist") - if (output == null) throw new IllegalStateException("Output file not found, use -o") - if (sample == null) throw new IllegalStateException("sample not given, pls use -sample") + if (commandArgs.input != null && !commandArgs.input.exists) throw new IllegalStateException("Input file does not exist") - val writer = new PrintWriter(output) + val writer = new PrintWriter(commandArgs.output) writer.println("##fileformat=VCFv4.1") writer.println("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">") writer.println("##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">") @@ -101,8 +103,8 @@ object MpileupToVcf { writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternetive Forward Reads\">") writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternetive Reverse Reads\">") writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">") - writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + sample) - val inputStream = if (input != null) Source.fromFile(input).getLines else Source.stdin.getLines + writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample) + val inputStream = if (commandArgs.input != null) Source.fromFile(commandArgs.input).getLines else Source.stdin.getLines class Counts(var forward:Int, var reverse:Int) for (line <- inputStream; val values = line.split("\t"); @@ -166,7 +168,7 @@ object MpileupToVcf { format += ("RFC" -> counts(ref.toUpperCase).forward.toString) format += ("RRC" -> counts(ref.toUpperCase).reverse.toString) format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString) - if (reads >= minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward+value.reverse >= minAP) { + if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward+value.reverse >= commandArgs.minAP) { alt += key format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString)) format += ("AFC" -> ( (if (format.contains("AFC")) format("AFC") + "," else "") + value.forward)) @@ -180,13 +182,13 @@ object MpileupToVcf { var left = reads - dels val gt = ArrayBuffer[Int]() - for (p <- 0 to alt.size if gt.size < ploidy) { + for (p <- 0 to alt.size if gt.size < commandArgs.ploidy) { var max = -1 for (a <- 0 until ad.length if ad(a) > (if (max >= 0) ad(max) else -1) && !gt.exists(_ == a)) max = a val f = ad(max).toDouble / left - for (a <- 0 to floor(f).toInt if gt.size < ploidy) gt.append(max) - if (f - floor(f) >= homoFraction) { - for (b <- p to ploidy if gt.size < ploidy) gt.append(max) + for (a <- 0 to floor(f).toInt if gt.size < commandArgs.ploidy) gt.append(max) + if (f - floor(f) >= commandArgs.homoFraction) { + for (b <- p to commandArgs.ploidy if gt.size < commandArgs.ploidy) gt.append(max) } left -= ad(max) }