From 34fc7da96707b8ed25fb6d561b1c6ac08b7b5357 Mon Sep 17 00:00:00 2001
From: Sander Bollen <a.h.b.bollen@lumc.nl>
Date: Tue, 10 Feb 2015 13:52:55 +0100
Subject: [PATCH] removed code duplication

---
 .../sasc/biopet/tools/VEPNormalizer.scala     | 90 ++++++-------------
 1 file changed, 29 insertions(+), 61 deletions(-)

diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VEPNormalizer.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VEPNormalizer.scala
index dabc14556..53b58fa9c 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VEPNormalizer.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VEPNormalizer.scala
@@ -1,13 +1,15 @@
 package nl.lumc.sasc.biopet.tools
 
 import java.io.{ File, IOException }
+import htsjdk.tribble.TribbleException
+
 import scala.collection.JavaConversions._
 import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, ToolCommand }
 import collection.mutable.{ Map => MMap }
 import collection.JavaConverters._
 import htsjdk.variant.vcf._
 import htsjdk.variant.variantcontext.{ VariantContextBuilder, VariantContext }
-import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder
+import htsjdk.variant.variantcontext.writer.{ VariantContextWriter, VariantContextWriterBuilder }
 import nl.lumc.sasc.biopet.core.config.Configurable
 import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
 
@@ -48,35 +50,16 @@ object VEPNormalizer extends ToolCommand {
     val input = commandArgs.inputVCF
     val output = commandArgs.outputVCF
 
-    if (commandArgs.mode == "explode") {
-      logger.info("You have selected explode mode")
-      logger.info(s"""Input VCF is $input""")
-      logger.info(s"""Output VCF is $output""")
-      explode(commandArgs.inputVCF, commandArgs.outputVCF)
-    } else if (commandArgs.mode == "standard") {
-      logger.info("You have selected standard mode")
-      logger.info(s"""Input VCF is $input""")
-      logger.info(s"""Output VCF is $output""")
-      standard(commandArgs.inputVCF, commandArgs.outputVCF)
-    } else {
-      // this should be impossible, but should nevertheless be checked
-      logger.error("impossibru!", new IllegalArgumentException)
-    }
-  }
+    logger.info(s"""Input VCF is $input""")
+    logger.info(s"""Output VCF is $output""")
 
-  /**
-   * Wrapper for mode explode
-   * @param input input VCF file
-   * @param output output VCF file
-   */
-  def explode(input: File, output: File) = {
     var reader: VCFFileReader = null
     // this can give a codec error if malformed VCF
     //
     try {
       reader = new VCFFileReader(input, false)
     } catch {
-      case e: Exception =>
+      case e: TribbleException.MalformedFeatureFile =>
         logger.error("Malformed VCF file! VCFv3 not supported!")
         throw e
     }
@@ -108,6 +91,25 @@ object VEPNormalizer extends ToolCommand {
     writer.writeHeader(header)
     logger.debug("Wrote header to file")
 
+    if (commandArgs.mode == "explode") {
+      logger.info("You have selected explode mode")
+      explode(reader, writer, new_infos)
+    } else if (commandArgs.mode == "standard") {
+      logger.info("You have selected standard mode")
+      standard(reader, writer, new_infos)
+    } else {
+      // this should be impossible, but should nevertheless be checked
+      logger.error("impossibru!", new IllegalArgumentException)
+    }
+  }
+
+  /**
+   * Wrapper for mode explode
+   * @param reader input VCF VCFFileReader
+   * @param writer output VCF VariantContextWriter
+   * @param new_infos array of string containing names of new info fields
+   */
+  def explode(reader: VCFFileReader, writer: VariantContextWriter, new_infos: Array[String]) = {
     logger.info("Start processing records")
     var nprocessed_records: Int = 0
     var nwritten_records: Int = 0
@@ -132,45 +134,11 @@ object VEPNormalizer extends ToolCommand {
 
   /**
    * Wrapper for mode standard
-   * @param input input VCF file
-   * @param output output VCF file
+   * @param reader input VCF VCFFileReader
+   * @param writer output VCF VariantContextWriter
+   * @param new_infos array of string containing names of new info fields
    */
-  def standard(input: File, output: File) = {
-    val reader: VCFFileReader = try {
-      new VCFFileReader(input, false)
-    } catch {
-      case e: Exception =>
-        logger.error("Malformed VCF file! VCFv3 not supported!")
-        throw e
-    }
-
-    val header = reader.getFileHeader
-    logger.debug("Checking for CSQ tag")
-    csqCheck(header)
-    logger.debug("CSQ tag OK")
-    logger.debug("Checkion VCF version")
-    versionCheck(header)
-    logger.debug("VCF version OK")
-    val seqDict = header.getSequenceDictionary
-    logger.debug("Parsing header")
-    val new_infos = parseCsq(header)
-    header.setWriteCommandLine(true)
-    for (info <- new_infos) {
-      val tmpheaderline = new VCFInfoHeaderLine(info, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "A VEP annotation")
-      header.addMetaDataLine(tmpheaderline)
-    }
-    logger.debug("Header parsing done")
-
-    logger.debug("Writing header to file")
-    val writerBuilder = new VariantContextWriterBuilder()
-    writerBuilder.
-      setOutputFile(output).
-      setOutputFileType(VariantContextWriterBuilder.OutputType.VCF).
-      setReferenceDictionary(seqDict)
-    val writer = writerBuilder.build()
-    writer.writeHeader(header)
-    logger.debug("Wrote header to file")
-
+  def standard(reader: VCFFileReader, writer: VariantContextWriter, new_infos: Array[String]) = {
     logger.info("Start processing records")
     var nprocessed_records: Int = 0
     var nwritten_records: Int = 0
-- 
GitLab