Minor Bug Fixes

src/ Mutalyzer.py Batch Jobs omit additional mutalyzer results when an error occures LRGParser.py Added the locus tag, transcriptProduct and proteinProduct GenRecord.py Removed transLongName & protLongName. Substituted by transcriptProduct and proteinProduct File.py Empty lines in batch files are preserved A better summary of errors is displayed A threshold of 5% is set which omits erroreneous entries Added the skip flag ~! for entries to be skipped at all Used for empty lines and lines that do not follow the formatting of either the new of old style Db.py Added the option to set a flag when inserting an entry templates/ batch.html Moved the error below the input form TODO: File.py Add threshold to config file git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@76 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1

Minor Bug Fixes
51318f49 · Gerben Stouten · 0aa697dd · 51318f49 · 51318f49 · 51318f49
Commit 51318f49 authored 14 years ago by Gerben Stouten
--- a/src/Modules/Db.py
+++ b/src/Modules/Db.py
@@ -1284,7 +1284,7 @@ class Batch(Db) :
        return data
    #removeJob

-    def addToQueue(self, jobID, inputl):
+    def addToQueue(self, jobID, inputl, flag):
        """
            Add a request belonging to a certain job to the queue.

@@ -1300,7 +1300,7 @@ class Batch(Db) :
        statement = """
            INSERT INTO BatchQueue
              VALUES (%s, %s, %s, %s);
-        """, (None, jobID, inputl, None)
+        """, (None, jobID, inputl, flag)

        self.query(statement)
    #addToQueue

--- a/src/Modules/File.py
+++ b/src/Modules/File.py
@@ -244,62 +244,96 @@ class File() :
                       lines).
        """
        #remove empty lines (store original line numbers line 1 = job[0])
-        jobl = [(l+1, row) for l, row in enumerate(job) if row and any(row)]
+        #jobl = [(l+1, row) for l, row in enumerate(job) if row and any(row)]
+        jobl = [(l+1, row) for l, row in enumerate(job)]

        #TODO:  Add more new style old style logic
-        #       Should empty lines be stored
-        #       Can we concatonate 3 column entries to 1 column

        if jobl[0][1] == self.__config.header : #Old style NameCheckBatch job
-            #collect all lines where the amount of arguments != 3
-            notthree = filter(lambda i: len(i[1])!=3, jobl)
-
-            [jobl.remove(r) for r in notthree]       # update job
+            ret = []
+            notthree = []
+            emptyfield = []
+            for line, job in jobl[1:]:
+                if not any(job):    #Empty line
+                    ret.append("~!")
+                    continue
+
+                inputl = ""
+                if len(job)!=3:     #Need three columns
+                    notthree.append(line)
+                elif (not(job[0] and job[2])):
+                    emptyfield.append(line)
+                else:
+                    if job[1]:
+                        if job[0].startswith("LRG"):
+                            inputl = "%s%s:%s" % tuple(job)
+                        else:
+                            inputl = "%s(%s):%s" % tuple(job)
+                    else:
+                        inputl = "%s:%s" % (job[0], job[2])

-            #collect all lines where the first or third argument is empty
-            emptyfield = filter(lambda i: not(i[1][0] and i[1][2]), jobl)
+                if not inputl:
+                    #try to make something out of it
+                    inputl = "~!InputFields: " #start with the skip flag
+                    inputl+= "|".join(job)

-            [jobl.remove(r) for r in emptyfield]     # update job
+                ret.append(inputl)
+            #for

            #Create output Message for incompatible fields
            if any(notthree):
-                lines = ", ".join([str(i[0]) for i in notthree])
-                self.__output.addMessage(__file__, 4, "EBPARSE",
-                        "Wrong amount of columns in line(s): %s.\n" % lines)
+                lines = makeList(notthree, 10)
+                self.__output.addMessage(__file__, 3, "EBPARSE",
+                        "Wrong amount of columns in %i line(s): %s.\n" %
+                        (len(notthree), lines))

            if any(emptyfield):
-                lines = ", ".join([str(i[0]) for i in emptyfield])
-                self.__output.addMessage(__file__, 4, "EBPARSE",
-                        "The first and last column can't be left empty on "
-                        "line(s): %s.\n" % lines)
-
-            if notthree or emptyfield:
-                return None
+                lines = makeList(emptyfield, 10)
+                self.__output.addMessage(__file__, 3, "EBPARSE",
+                        "The first and last column can't be left empty in "
+                        "%i line(s): %s.\n" % (len(emptyfield), lines))

-            #Create a Namechecker batch entry
-            ret = []
-            for line, job in jobl[1:]:
-                if job[1]:
-                    if job[0].startswith("LRG"):
-                        inputl = "%s%s:%s" % tuple(job)
-                    else:
-                        inputl = "%s(%s):%s" % tuple(job)
-                else:
-                    inputl = "%s:%s" % (job[0], job[2])
-                ret.append(inputl)
-            return ret
+            errlist = notthree + emptyfield
+        #if

        else:   #No Header, possibly a new BatchType
+            if len(jobl) == 0: return
            #collect all lines with data in fields other than the first
-            lines = ", ".join([str(row[0]) for row in jobl if any(row[1][1:])])
-            if any(lines):
-                self.__output.addMessage(__file__, 4, "EBPARSE",
+            errlist = [line for line, row in jobl if any(row[1:])]
+            if any(errlist):
+                self.__output.addMessage(__file__, 3, "EBPARSE",
                    "New Type Batch jobs (see help) should contain one "
-                    "entry per line, please check line(s): %s" % lines)
-            else:
-                return [job[0] for line, job in jobl]
+                    "entry per line, please check %i line(s): %s" %
+                    (len(errList), makeList(errlist)))
+
+            ret = []
+            for line, job in jobl:
+                if not any(job):    #Empty line
+                    ret.append("~!")
+                    continue
+                if line in lines:
+                    inputl = "~!InputFields: "   #Dirty Escape BatchEntries
+                else:
+                    inputl = ""
+                ret.append(inputl+"|".join(job))
+        #else

+        if not ret: return None     #prevent divide by zero

+        err = float(len(errlist))/len(ret)
+        if err == 0:
+            return ret
+        elif err < 0.05:
+            #allow a 5 percent threshold for errors in batchfiles
+            self.__output.addMessage(__file__, 3, "EBPARSE",
+                    "There were errors in your batch entry file, they are "
+                    "omitted and your batch is started.")
+            self.__output.addMessage(__file__, 3, "EBPARSE",
+                    "Please check the batch input file help at the top of "
+                    "this page for additional information.")
+            return ret
+        else:
+            return None
    #__checkBatchFormat

    def parseFileRaw(self, handle) :
@@ -345,3 +379,10 @@ class File() :
        return None
    #parseBatchFile
 #File
+
+def makeList(l, maxlen=10):
+    ret = ", ".join(str(i) for i in l[:maxlen])
+    if len(l)>maxlen:
+        return ret+", ..."
+    else:
+        return ret
--- a/src/Modules/GenRecord.py
+++ b/src/Modules/GenRecord.py
@@ -94,8 +94,6 @@ class Locus(object) :
        self.proteinRange = []
        self.locusTag = None
        self.link = None
-        self.transLongName = ""
-        self.protLongName = ""
        self.transcribe = False
        self.translate = False
        self.linkMethod = None

--- a/src/Modules/LRGparser.py
+++ b/src/Modules/LRGparser.py
@@ -74,6 +74,8 @@ def createLrgRecord(data):
        transcriptName = tData.getAttribute("name").encode("utf8")[1:]
        transcription = [t for t in gene.transcriptList if t.name ==
                transcriptName][0]
+        transcription.locusTag = transcriptName and "t"+transcriptName
+        transcription.linkMethod = "Locus Tag"

        transcription.location = \
          [int(tData.getAttribute("start")), int(tData.getAttribute("end"))]
@@ -171,14 +173,14 @@ def _emptyTranscripts(symbol, data):

 def _transcriptPopulator(trName, trData):
    transcript = GenRecord.Locus(trName)
-    transcript.longName = trData.get("transLongName")
+    transcript.transcriptProduct = trData.get("transLongName")
    if trData.has_key("transAttr"):
        tA = trData["transAttr"]
        transcript.transcriptID = tA.get("transcript_id")
        transcript.location = [tA.get("start"), tA.get("end")]

    if trData.has_key("proteinAttr"):
-        transcript.protLongName = trData.get("proteinLongName")
+        transcript.proteinProduct = trData.get("proteinLongName")

        pA = trData["proteinAttr"]
        transcript.proteinID = pA.get("accession")

--- a/src/Modules/Scheduler.py
+++ b/src/Modules/Scheduler.py
@@ -117,7 +117,14 @@ class Scheduler() :
        if not flags: return
        if 'S' in flags: #This entry is going to be skipped
            #Add a usefull message to the Output object
-            O.addMessage(__file__, 4, "EBSKIP", "Skipping entry")
+            if "S0" in flags:
+                message = "Entry could not be formatted correctly, check "\
+                        "batch input file help for details"
+            elif "S9" in flags:
+                message = "Empty Line"
+            else:
+                message = "Skipping entry"
+            O.addMessage(__file__, 4, "EBSKIP", message)
            return True #skip
        #if
        if 'A' in flags: #This entry is altered before execution
@@ -182,13 +189,13 @@ class Scheduler() :
        while jobList :
            for i, jobType, arg1 in jobList :
                inputl, flags = self.__database.getFromQueue(i)
-                if inputl:
+                if not(inputl is None):
                    if jobType == "NameChecker":
                        self._processNameBatch(inputl, i, flags)
                    elif jobType == "SyntaxChecker":
-                        self._processSyntaxCheck(inputl, i)
+                        self._processSyntaxCheck(inputl, i, flags)
                    elif jobType == "ConversionChecker":
-                        self._processConversion(inputl, i, arg1)
+                        self._processConversion(inputl, i, arg1, flags)
                    else: #unknown jobType
                        pass #TODO: Scream burning water
                else :
@@ -257,7 +264,7 @@ class Scheduler() :
        handle.close()
    #_processNameBatch

-    def _processSyntaxCheck(self, cmd, i):
+    def _processSyntaxCheck(self, cmd, i, flags):
        #TODO documentation
        """
            _processSyntaxCheck docstring
@@ -267,8 +274,13 @@ class Scheduler() :
        O = Output.Output(__file__, C.Output)
        P = Parser.Nomenclatureparser(O)

+        skip = self.__processFlags(O, flags)
        #Process
-        parsetree = P.parse(cmd)
+        if not skip:
+            parsetree = P.parse(cmd)
+        else:
+            parsetree = None
+
        if parsetree:
            result = "OK"
        else:
@@ -288,7 +300,7 @@ class Scheduler() :
        handle.close()
    #_processSyntaxCheck

-    def _processConversion(self, cmd, i, build):
+    def _processConversion(self, cmd, i, build, flags):
        #TODO documentation
        """
            _processConversion docstring
@@ -301,39 +313,38 @@ class Scheduler() :
        gName = ""
        cNames = [""]

-        try:
-            #process
-            converter = Mapper.Converter(build, C, O)
+        skip = self.__processFlags(O, flags)
+        if not skip:
+            try:
+                #process
+                converter = Mapper.Converter(build, C, O)

-            #Also accept chr accNo
-            variant = converter.correctChrVariant(variant)
+                #Also accept chr accNo
+                variant = converter.correctChrVariant(variant)

-            if not (":c." in variant or ":g." in variant):
-                #Bad name
-                P = Parser.Nomenclatureparser(O)
-                parsetree = P.parse(variant)
+                if not(":c." in variant or ":g." in variant):
+                    #Bad name
+                    P = Parser.Nomenclatureparser(O)
+                    parsetree = P.parse(variant)
            #if

-            if ":c." in variant:
-                # Do the c2chrom dance
-                variant = converter.c2chrom(variant)
-            if variant and ":g." in variant:
-                # Do the g2c dance
-                variants = converter.chrom2c(variant)
-                if variants:
-                    gName = variant #TODO clarify
-                    cNames = [cName for cName2 in variants.values() for cName in
-                            cName2]
-                #if
-            #if
-        #try
-        except Exception, e:
-            #Catch all exceptions related to the processing of cmd
-            O.addMessage(__file__, -1, "EBATCH",
-                    "Error during ConversionBatch. Input: %s" % `cmd`)
-            O.addMessage(__file__, 4, "EBATCHU",
-                    "Unexpected error occurred, dev-team notified")
-        #except
+                if ":c." in variant:
+                    # Do the c2chrom dance
+                    variant = converter.c2chrom(variant)
+                if variant and ":g." in variant:
+                    # Do the g2c dance
+                    variants = converter.chrom2c(variant)
+                    if variants:
+                        gName = variant #TODO clarify
+                        cNames = [cName for cName2 in variants.values() \
+                                for cName in cName2]
+            except Exception, e:
+                #Catch all exceptions related to the processing of cmd
+                O.addMessage(__file__, -1, "EBATCH",
+                        "Error during ConversionBatch. Input: %s" % `cmd`)
+                O.addMessage(__file__, 4, "EBATCHU",
+                        "Unexpected error occurred, dev-team notified")
+	    #except

        error = "%s" % "|".join(O.getBatchMessages(3))

@@ -368,7 +379,16 @@ class Scheduler() :
        jobID = self.__database.addJob(outputFilter, eMail,
                fromHost, jobType, Arg1)
        for inputl in queue :
-            self.__database.addToQueue(jobID, inputl)
+            if inputl.startswith("~!"): #Dirty Escape
+                inputl = inputl[2:]
+                if inputl:
+                    flag = "S0"
+                else:
+                    flag = "S9"
+                    inputl = " " #Empty Line
+            else:
+                flag = None
+            self.__database.addToQueue(jobID, inputl, flag)

        # Spawn child
        p = subprocess.Popen(["MutalyzerBatch",

--- a/src/Mutalyzer.py
+++ b/src/Mutalyzer.py
@@ -473,6 +473,9 @@ def _createBatchOutput(O):
    if len(descriptions) == 0:
        #No descriptions generated [unlikely]
        return
+    if O.Summary()[0]:
+        #There were errors during the run, return.
+        return
    for descr in descriptions:
        if goi in descr[0] and toi in descr[1]: # Gene and Transcript
            if tDescr:

--- a/templates/batch.html
+++ b/templates/batch.html
@@ -51,12 +51,7 @@
  </div> <!-- help -->
  <br /><br />

-      <div tal:condition = "errors" id="errors">
-          <b>Errors:</b><br />
-        <div tal:repeat = "i errors"
-                  tal:replace = "structure string:${i}<br>">
-         </div>
-      </div>
+

      <table id="inputform">
          <form action = "" method = "post" enctype = "multipart/form-data">
@@ -117,6 +112,13 @@
        tal:content="structure string:document.getElementById('batchType').selectedIndex = ${selected}; document.getElementById('batchRow').style.display = '${hideTypes}'; window.onload = initpage;">
        </script>

+      <div tal:condition = "errors" id="errors">
+          <b>Errors:</b><br />
+        <div tal:repeat = "i errors"
+                  tal:replace = "structure string:${i}<br>">
+              </div><br />
+     </div>
+
  <div tal:condition = "messages">
    <b>Messages</b><br>
     <div tal:repeat = "i messages"