From 48edf17ba0299d5efd66609497692ccb0cf54a60 Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Fri, 8 Apr 2016 07:38:38 +0200
Subject: [PATCH] Added framework for SnptestToVcf

---
 .../lumc/sasc/biopet/tools/SnptestToVcf.scala | 87 +++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SnptestToVcf.scala

diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SnptestToVcf.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SnptestToVcf.scala
new file mode 100644
index 000000000..1ae91e1be
--- /dev/null
+++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SnptestToVcf.scala
@@ -0,0 +1,87 @@
+package nl.lumc.sasc.biopet.tools
+
+import java.io.File
+import java.util
+
+import htsjdk.samtools.reference.{FastaSequenceFile, ReferenceSequenceFileFactory}
+import htsjdk.variant.variantcontext.writer.{AsyncVariantContextWriter, VariantContextWriterBuilder}
+import htsjdk.variant.variantcontext.{Allele, GenotypeBuilder, VariantContextBuilder}
+import htsjdk.variant.vcf._
+import nl.lumc.sasc.biopet.utils.ToolCommand
+
+import scala.collection.JavaConversions._
+import scala.io.Source
+
+/**
+ * Created by pjvanthof on 15/03/16.
+ */
+object SnptestToVcf extends ToolCommand {
+
+  case class Args(inputInfo: File = null,
+                  outputVcf: File = null,
+                  referenceFasta: File = null,
+                  contig: String = null) extends AbstractArgs
+
+  class OptParser extends AbstractOptParser {
+    opt[File]('i', "inputInfo") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
+      c.copy(inputInfo = x)
+    } text "Input info fields"
+    opt[File]('o', "outputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
+      c.copy(outputVcf = x)
+    } text "Output vcf file"
+    opt[File]('R', "referenceFasta") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
+      c.copy(referenceFasta = x)
+    } text "reference fasta file"
+    opt[String]('c', "contig") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
+      c.copy(contig = x)
+    } text "contig of impute file"
+  }
+
+  def main(args: Array[String]): Unit = {
+    logger.info("Start")
+
+    val argsParser = new OptParser
+    val cmdArgs = argsParser.parse(args, Args()).getOrElse(throw new IllegalArgumentException)
+
+    val infoIt = Source.fromFile(cmdArgs.inputInfo).getLines()
+    val infoHeader = infoIt.find(!_.startsWith("#"))
+
+    infoHeader match {
+      case Some(header) => parseLines(header, infoIt, cmdArgs)
+      case _ => logger.info("No header and records found in file")
+    }
+
+    logger.info("Done")
+  }
+
+  def parseLines(header: String, lineIt: Iterator[String], cmdArgs: Args): Unit = {
+    val headerKeys = header.split(" ")
+    val headerMap = headerKeys.zipWithIndex.toMap
+    require(headerKeys.size == headerMap.size, "Duplicates header keys found")
+    val metaLines = new util.HashSet[VCFHeaderLine]()
+    for (key <- headerKeys if key != "rsid" if key != "chromosome" if key != "position"
+         if key != "alleleA" if key != "alleleB" if key != "alleleA")
+      metaLines.add(new VCFInfoHeaderLine(s"ST_$key", 1, VCFHeaderLineType.String, ""))
+
+    val reference = new FastaSequenceFile(cmdArgs.referenceFasta, true)
+    require(reference.getSequenceDictionary.getSequence(cmdArgs.contig) != null,
+      s"contig '${cmdArgs.contig}' not found on reference")
+
+    val vcfHeader = new VCFHeader(metaLines)
+    vcfHeader.setSequenceDictionary(reference.getSequenceDictionary)
+    val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder()
+      .setOutputFile(cmdArgs.outputVcf)
+      .setReferenceDictionary(vcfHeader.getSequenceDictionary)
+      .build)
+    writer.writeHeader(vcfHeader)
+
+    for (line <- lineIt if !line.startsWith("#")) {
+      val values = line.split(" ")
+      require(values.size == headerKeys.size, "Number of values are not the same as number of header keys")
+
+    }
+
+    writer.close()
+
+  }
+}
-- 
GitLab