From 09870eec2a9d0e27b14117e8802cbb2c786b6306 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Thu, 20 Jan 2011 15:19:38 +0000
Subject: [PATCH] Added batch SNP converter.

src/wsgi.py:
- Updated batch routing to include SNP converter.

mutalyzer.conf:
src/Modules/Config.py:
- Added result file header line for batch SNP converter.

src/Modules/Scheduler.py:
- Added batch SNP converter.

src/Modules/File.py:
- Updated batch CSV input file parsing. Default is now standard Excel format
  and only if the CSV sniffer can find another dialect using a predefined set
  of delimiter characters without error we use that one.

templates/menu.html:
- Added link for batch SNP converter.


git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@163 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
---
 mutalyzer.conf           |  7 +++--
 src/Modules/Config.py    |  1 +
 src/Modules/File.py      | 18 +++++++++----
 src/Modules/Scheduler.py | 58 ++++++++++++++++++++++++++++++++++++++++
 src/wsgi.py              |  8 +++---
 templates/menu.html      | 12 +++++++++
 6 files changed, 94 insertions(+), 10 deletions(-)

diff --git a/mutalyzer.conf b/mutalyzer.conf
index 12ff4d0e..2a35880d 100644
--- a/mutalyzer.conf
+++ b/mutalyzer.conf
@@ -120,12 +120,15 @@ PIDfile = "./var/batch.pid"
 # The output header for NameChecking
 nameCheckOutHeader = "Input", "Errors | Messages", "AccNo", "Genesymbol", "Variant", "Reference Sequence Start Descr.", "Coding DNA Descr.", "Protein Descr.", "GeneSymbol Coding DNA Descr.", "GeneSymbol Protein Descr.", "Genomic Reference", "Coding Reference", "Protein Reference", "Affected Transcripts", "Affected Proteins"
 
-# The output header for NameChecking
+# The output header for SyntaxChecking
 syntaxCheckOutHeader = "Input", "Status"
 
-# The output header for NameChecking
+# The output header for PositionConverter
 positionConverterOutHeader = "Input Variant", "Errors", "Chromosomal Variant", "Coding Variant(s)"
 
+# The output header for SnpConverter
+snpConverterOutHeader = "Input Variant", "HGVS description(s)", "Errors | Messages"
+
 
 #
 # These settings are used by the File module.
diff --git a/src/Modules/Config.py b/src/Modules/Config.py
index 94b9c66f..f0662721 100644
--- a/src/Modules/Config.py
+++ b/src/Modules/Config.py
@@ -204,6 +204,7 @@ class Config() :
         self.Scheduler.nameCheckOutHeader = config["nameCheckOutHeader"]
         self.Scheduler.syntaxCheckOutHeader= config["syntaxCheckOutHeader"]
         self.Scheduler.positionConverterOutHeader= config["positionConverterOutHeader"]
+        self.Scheduler.snpConverterOutHeader= config["snpConverterOutHeader"]
 
         # Set thte variables neede for the Batch module.
         self.Batch.PIDfile = config["PIDfile"]
diff --git a/src/Modules/File.py b/src/Modules/File.py
index d1e13a9f..8206da6b 100644
--- a/src/Modules/File.py
+++ b/src/Modules/File.py
@@ -123,16 +123,24 @@ class File() :
         handle.seek(0)
         buf = handle.read(self.__config.bufSize)
 
+        # Default dialect
+        dialect = 'excel'
+
+        # The idea is that for new-style batch input files we have only
+        # one column and the sniffer cannot find a delimiter.
+
         try :
-            dialect = csv.Sniffer().sniff(buf)
+            # Todo: delimiters in config file
+            dialect = csv.Sniffer().sniff(buf, delimiters="\t ;|,")
         except csv.Error, e :
-            self.__output.addMessage(__file__, 4, "EBPARSE", e)
-            return None
+            #self.__output.addMessage(__file__, 4, "EBPARSE", e)
+            #return None
+            pass
         #except
 
         #Watch out for : delimiter FIXME and for the . delimiter
-        if dialect.delimiter == ":":
-            dialect.delimiter = "\t"
+#        if dialect.delimiter == ":":
+#            dialect.delimiter = "\t"
 
         handle.seek(0)
         reader = csv.reader(handle, dialect)
diff --git a/src/Modules/Scheduler.py b/src/Modules/Scheduler.py
index 4aabc806..21503a12 100644
--- a/src/Modules/Scheduler.py
+++ b/src/Modules/Scheduler.py
@@ -27,6 +27,7 @@ from Modules import Config              # Config.Config
 from Modules import Output              # Output.Output
 from Modules import Parser              # Parser.Nomenclatureparser
 from Modules import Mapper              # Mapper.Converter
+from Modules import Retriever           # Retriever.Retriever
 
 import Mutalyzer                        # Mutalyzer.process
 
@@ -333,6 +334,8 @@ class Scheduler() :
                         self._processSyntaxCheck(inputl, i, flags)
                     elif jobType == "PositionConverter" :
                         self._processConversion(inputl, i, arg1, flags)
+                    elif jobType == "SnpConverter" :
+                        self._processSNP(inputl, i, flags)
                     else: #unknown jobType
                         pass #TODO: Scream burning water and remove from Queue
                 else :
@@ -566,6 +569,61 @@ class Scheduler() :
     #_processConversion
 
 
+    def _processSNP(self, cmd, i, flags) :
+        """
+        Process an entry from the SNP converter Batch, write the results
+        to the job-file. If an Exception is raised, catch and continue.
+
+        Side-effect:
+            - Output written to outputfile.
+
+        @arg cmd: The SNP converter input
+        @type cmd:
+        @arg i: The JobID
+        @type i:
+        @arg flags: Flags of the current entry
+        @type flags:
+        """
+
+        C = Config.Config()
+        O = Output.Output(__file__, C.Output)
+        O.addMessage(__file__, -1, "INFO",
+            "Received SNP converter batch rs" + cmd)
+
+        #Read out the flags
+        # Todo: Do something with the flags?
+        skip = self.__processFlags(O, flags)
+
+        if not skip :
+            R = Retriever.Retriever(C.Retriever, O, None)
+            R.snpConvert(cmd)
+
+        # Todo: Is output ok?
+        outputline =  "%s\t" % cmd
+        outputline += "%s\t" % "|".join(O.getOutput('snp'))
+        outputline += "%s\t" % "|".join(O.getBatchMessages(3))
+
+        outputline += "\n"
+
+        #Output
+        filename = "%s/Results_%s.txt" % (self.__config.resultsDir, i)
+        if not os.path.exists(filename) :
+            # If the file does not yet exist, create it with the correct
+            # header above it. The header is read from the config file as
+            # a list. We need a tab delimited string.
+            header = self.__config.snpConverterOutHeader
+            handle = open(filename, 'a')
+            handle.write("%s\n" % "\t".join(header))
+        #if
+        else :
+            handle = open(filename, 'a')
+
+        handle.write(outputline)
+        handle.close()
+        O.addMessage(__file__, -1, "INFO",
+                     "Finished SNP converter batch rs%s" % cmd)
+    #_processSNP
+
 
     def addJob(self, outputFilter, eMail, queue, fromHost, jobType, Arg1) :
         """
diff --git a/src/wsgi.py b/src/wsgi.py
index 9449f0fe..46c251c7 100644
--- a/src/wsgi.py
+++ b/src/wsgi.py
@@ -768,7 +768,7 @@ class BatchChecker:
         - arg1: Additional argument. Currently only used if batchType is
                 'PositionConverter', denoting the human genome build.
         - batchType: Type of batch job to run. One of 'NameChecker' (default),
-                     'SyntaxChecker', or 'PositionChecker'.
+                     'SyntaxChecker', 'PositionConverter', or 'SnpConverter'.
         """
         i = web.input(batchEmail=None, batchFile={}, arg1='',
                       batchType=None)
@@ -786,7 +786,8 @@ class BatchChecker:
         @kwarg arg1: Additional argument. Currently only used if batchType is
                      'PositionConverter', denoting the human genome build.
         @kwarg batchType: Type of batch job to run. One of 'NameChecker'
-                          (default), 'SyntaxChecker', or 'PositionChecker'.
+                          (default), 'SyntaxChecker', 'PositionConverter', or
+                          'SnpConverter'.
         """
         O = Output.Output(__file__, C.Output)
 
@@ -795,7 +796,8 @@ class BatchChecker:
                 "debug"         : [],
                 "batchTypes"    : ["NameChecker",
                                    "SyntaxChecker",
-                                   "PositionConverter"],
+                                   "PositionConverter",
+                                   "SnpConverter"],
                 "hideTypes"     : batchType and 'none' or '',
                 "selected"      : "0",
                 "batchType"     : batchType or "",
diff --git a/templates/menu.html b/templates/menu.html
index 0e6f7f9e..4b48bc7b 100644
--- a/templates/menu.html
+++ b/templates/menu.html
@@ -274,6 +274,18 @@
         </td>
       </tr>
 
+      <tr>
+        <td></td>
+        <td valign="baseline" width="10">
+          <img src="base/images/bullitmiddel.gif" id="b_batchSnpConverter">
+        </td>
+        <td colspan="2">
+          <a id="page_batchSnpConverter" 
+            href="batchSnpConverter" 
+            class="vertnavsub">SNP Converter</a>
+        </td>
+      </tr>
+
       <tr>
         <td valign="top" width="20">
           <img src="base/images/bullitdonker.gif" id="b_upload">
-- 
GitLab