Commit 2308bc1e authored by Laros's avatar Laros
Browse files

Added a hamming distance selection option and a version option.

parent 1fa761af
......@@ -22,10 +22,10 @@ Licensed under the MIT license, see the LICENSE file.
RELEASE = False
__version_info__ = ('0', '3', 'dev')
__version_info__ = ('0', '4', 'dev')
__version__ = '.'.join(__version_info__)
__author__ = 'LUMC, Jeroen F.J. Laros'
__contact__ = 'j.f.j.laros@@lumc.nl'
__contact__ = 'j.f.j.laros@lumc.nl'
__homepage__ = 'https://git.lumc.nl/j.f.j.laros/barcode'
......@@ -9,119 +9,151 @@ Use any of the positional arguments with the -h option for more information.
import argparse
import Levenshtein
__nucleotides = ['A', 'C', 'G', 'T']
from . import __version__, __author__, __contact__, __homepage__
def docSplit(func):
return func.__doc__.split("\n\n")[0]
def __allWords(bucket, word, length, result):
"""
Generate all possible words of a certain length over a specified alphabet.
@arg bucket: An alphabet.
@type bucket: list[str]
@arg word: A word over the alphabet {bucket}.
@type word: str
@arg length: Lenth of the barcodes.
@type length: int
@arg result: Constructed words.
@type result: list[str]
"""
if length:
for i in bucket:
__allWords(bucket, word + i, length - 1, result)
else:
result.append(word)
#__allWords
def allBarcodes(length):
"""
Generate all possible barcodes of a certain length.
@arg length: Lenth of the barcodes.
@type length: int
"""
result = []
__allWords(__nucleotides, "", length, result)
return result
#allBarcodes
def __filterStretch(barcode, stretches):
"""
Test whether {barcode} contains none of the stretches in {stretches}.
@arg barcode: A barcode.
@type barcode: str
@arg stretches:
@type stretches: list[str]
"""
for i in stretches:
if i in barcode:
return False
return True
#__filterStretch
def filterStretches(barcodes, max_stretch):
"""
Filter a list of barcodes for mononucleotide stretches.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg max_stretch: Maximum mononucleotide stretch length.
@type max_stretch: int
"""
stretches = map(lambda x: max_stretch * x, __nucleotides)
result = []
for i in barcodes:
if __filterStretch(i, stretches):
result.append(i)
return result
#filterStretches
def __filterDistance(barcodes, candidate, min_dist):
def version(name):
"""
Test whether {candidate} can be added to {barcodes} based on the minimum
distance between {candidate} and all barcodes in {barcodes}.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg candidate: Candidate barcode.
@type candidate: str
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
for i in barcodes:
if Levenshtein.distance(i, candidate) < min_dist:
return False
return True
#__filterDistance
def filterDistance(barcodes, min_dist):
"""
Filter a list of barcodes for distances with other barcodes.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
result = []
for i in barcodes:
if __filterDistance(result, i, min_dist):
result.append(i)
return result
#filterDistance
def barcode(length, max_stretch, min_dist):
Return version information.
@arg name: Name of the program.
@type name: str
@returns: Version information.
@rtype: str
"""
return "%s version %s\n%s\n%s\n%s" % (name, __version__, __author__,
__contact__, __homepage__)
#version
class BarCode(object):
"""
Design and test NGS barcodes.
"""
__nucleotides = ['A', 'C', 'G', 'T']
def __init__(self, distance=Levenshtein.distance):
"""
Initialise the class.
@arg distance: Distance function.
@type distance: func
"""
self.distance = distance
#__init__
def __allWords(self, bucket, word, length, result):
"""
Generate all possible words of a certain length over a specified
alphabet.
@arg bucket: An alphabet.
@type bucket: list[str]
@arg word: A word over the alphabet {bucket}.
@type word: str
@arg length: Lenth of the barcodes.
@type length: int
@arg result: Constructed words.
@type result: list[str]
"""
if length:
for i in bucket:
self.__allWords(bucket, word + i, length - 1, result)
else:
result.append(word)
#__allWords
def allBarcodes(self, length):
"""
Generate all possible barcodes of a certain length.
@arg length: Lenth of the barcodes.
@type length: int
"""
result = []
self.__allWords(self.__nucleotides, "", length, result)
return result
#allBarcodes
def __filterStretch(self, barcode, stretches):
"""
Test whether {barcode} contains none of the stretches in {stretches}.
@arg barcode: A barcode.
@type barcode: str
@arg stretches:
@type stretches: list[str]
"""
for i in stretches:
if i in barcode:
return False
return True
#__filterStretch
def filterStretches(self, barcodes, max_stretch):
"""
Filter a list of barcodes for mononucleotide stretches.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg max_stretch: Maximum mononucleotide stretch length.
@type max_stretch: int
"""
stretches = map(lambda x: max_stretch * x, self.__nucleotides)
result = []
for i in barcodes:
if self.__filterStretch(i, stretches):
result.append(i)
return result
#filterStretches
def __filterDistance(self, barcodes, candidate, min_dist):
"""
Test whether {candidate} can be added to {barcodes} based on the
minimum distance between {candidate} and all barcodes in {barcodes}.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg candidate: Candidate barcode.
@type candidate: str
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
for i in barcodes:
if self.distance(i, candidate) < min_dist:
return False
return True
#__filterDistance
def filterDistance(self, barcodes, min_dist):
"""
Filter a list of barcodes for distances with other barcodes.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
result = []
for i in barcodes:
if self.__filterDistance(result, i, min_dist):
result.append(i)
return result
#filterDistance
#BarCode
def barcode(length, max_stretch, min_dist, distance):
"""
Make a set of barcodes, filter them for mononucleotide stretches and for
distances with other barcodes.
......@@ -132,12 +164,16 @@ def barcode(length, max_stretch, min_dist):
@type max_stretch: int
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
@arg distance: Distance function.
@type distance: func
"""
return filterDistance(filterStretches(allBarcodes(length), max_stretch),
min_dist)
B = BarCode(distance)
return B.filterDistance(B.filterStretches(B.allBarcodes(length),
max_stretch), min_dist)
#barcode
def testBarcodes(barcodes, min_dist):
def testBarcodes(barcodes, min_dist, distance):
"""
Test a set of barcodes.
......@@ -145,11 +181,15 @@ def testBarcodes(barcodes, min_dist):
@type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
@arg distance: Distance function.
@type distance: func
@returns: The number of barcodes that violate the distance constraint.
@rtype: int
"""
return len(barcodes) - len(filterDistance(barcodes, min_dist))
B = BarCode(distance)
return len(barcodes) - len(B.filterDistance(barcodes, min_dist))
#testBarcodes
def main():
......@@ -165,11 +205,14 @@ def main():
distance_parser = argparse.ArgumentParser(add_help=False)
distance_parser.add_argument("-d", dest="distance", type=int, default=3,
help="minimum distance between the barcodes")
distance_parser.add_argument("-H", dest="hamming", default=False,
action="store_true", help="use Hamming distance")
usage = __doc__.split("\n\n\n")
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=usage[0], epilog=usage[1])
parser.add_argument("-v", action="version", version=version(parser.prog))
subparsers = parser.add_subparsers(dest="subcommand")
parser_make = subparsers.add_parser("make", parents=[output_parser,
......@@ -184,13 +227,18 @@ def main():
args = parser.parse_args()
dfunc = Levenshtein.distance
if args.hamming:
dfunc = Levenshtein.hamming
if args.subcommand == "make":
args.OUTPUT.write("\n".join(barcode(args.length, args.stretch,
args.distance)))
args.distance, dfunc)))
if args.subcommand == "test":
print "%s barcodes violate the distance contraint." % testBarcodes(
map(lambda x: x.strip(), args.INPUT.readlines()), args.distance)
map(lambda x: x.strip(), args.INPUT.readlines()), args.distance,
dfunc)
#main
if __name__ == "__main__":
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment