From 09870eec2a9d0e27b14117e8802cbb2c786b6306 Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Thu, 20 Jan 2011 15:19:38 +0000 Subject: [PATCH] Added batch SNP converter. src/wsgi.py: - Updated batch routing to include SNP converter. mutalyzer.conf: src/Modules/Config.py: - Added result file header line for batch SNP converter. src/Modules/Scheduler.py: - Added batch SNP converter. src/Modules/File.py: - Updated batch CSV input file parsing. Default is now standard Excel format and only if the CSV sniffer can find another dialect using a predefined set of delimiter characters without error we use that one. templates/menu.html: - Added link for batch SNP converter. git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@163 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1 --- mutalyzer.conf | 7 +++-- src/Modules/Config.py | 1 + src/Modules/File.py | 18 +++++++++---- src/Modules/Scheduler.py | 58 ++++++++++++++++++++++++++++++++++++++++ src/wsgi.py | 8 +++--- templates/menu.html | 12 +++++++++ 6 files changed, 94 insertions(+), 10 deletions(-) diff --git a/mutalyzer.conf b/mutalyzer.conf index 12ff4d0e..2a35880d 100644 --- a/mutalyzer.conf +++ b/mutalyzer.conf @@ -120,12 +120,15 @@ PIDfile = "./var/batch.pid" # The output header for NameChecking nameCheckOutHeader = "Input", "Errors | Messages", "AccNo", "Genesymbol", "Variant", "Reference Sequence Start Descr.", "Coding DNA Descr.", "Protein Descr.", "GeneSymbol Coding DNA Descr.", "GeneSymbol Protein Descr.", "Genomic Reference", "Coding Reference", "Protein Reference", "Affected Transcripts", "Affected Proteins" -# The output header for NameChecking +# The output header for SyntaxChecking syntaxCheckOutHeader = "Input", "Status" -# The output header for NameChecking +# The output header for PositionConverter positionConverterOutHeader = "Input Variant", "Errors", "Chromosomal Variant", "Coding Variant(s)" +# The output header for SnpConverter +snpConverterOutHeader = "Input Variant", "HGVS description(s)", "Errors | Messages" + # # These settings are used by the File module. diff --git a/src/Modules/Config.py b/src/Modules/Config.py index 94b9c66f..f0662721 100644 --- a/src/Modules/Config.py +++ b/src/Modules/Config.py @@ -204,6 +204,7 @@ class Config() : self.Scheduler.nameCheckOutHeader = config["nameCheckOutHeader"] self.Scheduler.syntaxCheckOutHeader= config["syntaxCheckOutHeader"] self.Scheduler.positionConverterOutHeader= config["positionConverterOutHeader"] + self.Scheduler.snpConverterOutHeader= config["snpConverterOutHeader"] # Set thte variables neede for the Batch module. self.Batch.PIDfile = config["PIDfile"] diff --git a/src/Modules/File.py b/src/Modules/File.py index d1e13a9f..8206da6b 100644 --- a/src/Modules/File.py +++ b/src/Modules/File.py @@ -123,16 +123,24 @@ class File() : handle.seek(0) buf = handle.read(self.__config.bufSize) + # Default dialect + dialect = 'excel' + + # The idea is that for new-style batch input files we have only + # one column and the sniffer cannot find a delimiter. + try : - dialect = csv.Sniffer().sniff(buf) + # Todo: delimiters in config file + dialect = csv.Sniffer().sniff(buf, delimiters="\t ;|,") except csv.Error, e : - self.__output.addMessage(__file__, 4, "EBPARSE", e) - return None + #self.__output.addMessage(__file__, 4, "EBPARSE", e) + #return None + pass #except #Watch out for : delimiter FIXME and for the . delimiter - if dialect.delimiter == ":": - dialect.delimiter = "\t" +# if dialect.delimiter == ":": +# dialect.delimiter = "\t" handle.seek(0) reader = csv.reader(handle, dialect) diff --git a/src/Modules/Scheduler.py b/src/Modules/Scheduler.py index 4aabc806..21503a12 100644 --- a/src/Modules/Scheduler.py +++ b/src/Modules/Scheduler.py @@ -27,6 +27,7 @@ from Modules import Config # Config.Config from Modules import Output # Output.Output from Modules import Parser # Parser.Nomenclatureparser from Modules import Mapper # Mapper.Converter +from Modules import Retriever # Retriever.Retriever import Mutalyzer # Mutalyzer.process @@ -333,6 +334,8 @@ class Scheduler() : self._processSyntaxCheck(inputl, i, flags) elif jobType == "PositionConverter" : self._processConversion(inputl, i, arg1, flags) + elif jobType == "SnpConverter" : + self._processSNP(inputl, i, flags) else: #unknown jobType pass #TODO: Scream burning water and remove from Queue else : @@ -566,6 +569,61 @@ class Scheduler() : #_processConversion + def _processSNP(self, cmd, i, flags) : + """ + Process an entry from the SNP converter Batch, write the results + to the job-file. If an Exception is raised, catch and continue. + + Side-effect: + - Output written to outputfile. + + @arg cmd: The SNP converter input + @type cmd: + @arg i: The JobID + @type i: + @arg flags: Flags of the current entry + @type flags: + """ + + C = Config.Config() + O = Output.Output(__file__, C.Output) + O.addMessage(__file__, -1, "INFO", + "Received SNP converter batch rs" + cmd) + + #Read out the flags + # Todo: Do something with the flags? + skip = self.__processFlags(O, flags) + + if not skip : + R = Retriever.Retriever(C.Retriever, O, None) + R.snpConvert(cmd) + + # Todo: Is output ok? + outputline = "%s\t" % cmd + outputline += "%s\t" % "|".join(O.getOutput('snp')) + outputline += "%s\t" % "|".join(O.getBatchMessages(3)) + + outputline += "\n" + + #Output + filename = "%s/Results_%s.txt" % (self.__config.resultsDir, i) + if not os.path.exists(filename) : + # If the file does not yet exist, create it with the correct + # header above it. The header is read from the config file as + # a list. We need a tab delimited string. + header = self.__config.snpConverterOutHeader + handle = open(filename, 'a') + handle.write("%s\n" % "\t".join(header)) + #if + else : + handle = open(filename, 'a') + + handle.write(outputline) + handle.close() + O.addMessage(__file__, -1, "INFO", + "Finished SNP converter batch rs%s" % cmd) + #_processSNP + def addJob(self, outputFilter, eMail, queue, fromHost, jobType, Arg1) : """ diff --git a/src/wsgi.py b/src/wsgi.py index 9449f0fe..46c251c7 100644 --- a/src/wsgi.py +++ b/src/wsgi.py @@ -768,7 +768,7 @@ class BatchChecker: - arg1: Additional argument. Currently only used if batchType is 'PositionConverter', denoting the human genome build. - batchType: Type of batch job to run. One of 'NameChecker' (default), - 'SyntaxChecker', or 'PositionChecker'. + 'SyntaxChecker', 'PositionConverter', or 'SnpConverter'. """ i = web.input(batchEmail=None, batchFile={}, arg1='', batchType=None) @@ -786,7 +786,8 @@ class BatchChecker: @kwarg arg1: Additional argument. Currently only used if batchType is 'PositionConverter', denoting the human genome build. @kwarg batchType: Type of batch job to run. One of 'NameChecker' - (default), 'SyntaxChecker', or 'PositionChecker'. + (default), 'SyntaxChecker', 'PositionConverter', or + 'SnpConverter'. """ O = Output.Output(__file__, C.Output) @@ -795,7 +796,8 @@ class BatchChecker: "debug" : [], "batchTypes" : ["NameChecker", "SyntaxChecker", - "PositionConverter"], + "PositionConverter", + "SnpConverter"], "hideTypes" : batchType and 'none' or '', "selected" : "0", "batchType" : batchType or "", diff --git a/templates/menu.html b/templates/menu.html index 0e6f7f9e..4b48bc7b 100644 --- a/templates/menu.html +++ b/templates/menu.html @@ -274,6 +274,18 @@ </td> </tr> + <tr> + <td></td> + <td valign="baseline" width="10"> + <img src="base/images/bullitmiddel.gif" id="b_batchSnpConverter"> + </td> + <td colspan="2"> + <a id="page_batchSnpConverter" + href="batchSnpConverter" + class="vertnavsub">SNP Converter</a> + </td> + </tr> + <tr> <td valign="top" width="20"> <img src="base/images/bullitdonker.gif" id="b_upload"> -- GitLab