Commit 2308bc1e authored by Laros's avatar Laros
Browse files

Added a hamming distance selection option and a version option.

parent 1fa761af
...@@ -22,10 +22,10 @@ Licensed under the MIT license, see the LICENSE file. ...@@ -22,10 +22,10 @@ Licensed under the MIT license, see the LICENSE file.
RELEASE = False RELEASE = False
__version_info__ = ('0', '3', 'dev') __version_info__ = ('0', '4', 'dev')
__version__ = '.'.join(__version_info__) __version__ = '.'.join(__version_info__)
__author__ = 'LUMC, Jeroen F.J. Laros' __author__ = 'LUMC, Jeroen F.J. Laros'
__contact__ = 'j.f.j.laros@@lumc.nl' __contact__ = 'j.f.j.laros@lumc.nl'
__homepage__ = 'https://git.lumc.nl/j.f.j.laros/barcode' __homepage__ = 'https://git.lumc.nl/j.f.j.laros/barcode'
...@@ -9,119 +9,151 @@ Use any of the positional arguments with the -h option for more information. ...@@ -9,119 +9,151 @@ Use any of the positional arguments with the -h option for more information.
import argparse import argparse
import Levenshtein import Levenshtein
from . import __version__, __author__, __contact__, __homepage__
__nucleotides = ['A', 'C', 'G', 'T']
def docSplit(func): def docSplit(func):
return func.__doc__.split("\n\n")[0] return func.__doc__.split("\n\n")[0]
def __allWords(bucket, word, length, result): def version(name):
"""
Generate all possible words of a certain length over a specified alphabet.
@arg bucket: An alphabet.
@type bucket: list[str]
@arg word: A word over the alphabet {bucket}.
@type word: str
@arg length: Lenth of the barcodes.
@type length: int
@arg result: Constructed words.
@type result: list[str]
"""
if length:
for i in bucket:
__allWords(bucket, word + i, length - 1, result)
else:
result.append(word)
#__allWords
def allBarcodes(length):
"""
Generate all possible barcodes of a certain length.
@arg length: Lenth of the barcodes.
@type length: int
"""
result = []
__allWords(__nucleotides, "", length, result)
return result
#allBarcodes
def __filterStretch(barcode, stretches):
"""
Test whether {barcode} contains none of the stretches in {stretches}.
@arg barcode: A barcode.
@type barcode: str
@arg stretches:
@type stretches: list[str]
"""
for i in stretches:
if i in barcode:
return False
return True
#__filterStretch
def filterStretches(barcodes, max_stretch):
"""
Filter a list of barcodes for mononucleotide stretches.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg max_stretch: Maximum mononucleotide stretch length.
@type max_stretch: int
"""
stretches = map(lambda x: max_stretch * x, __nucleotides)
result = []
for i in barcodes:
if __filterStretch(i, stretches):
result.append(i)
return result
#filterStretches
def __filterDistance(barcodes, candidate, min_dist):
""" """
Test whether {candidate} can be added to {barcodes} based on the minimum Return version information.
distance between {candidate} and all barcodes in {barcodes}.
@arg name: Name of the program.
@arg barcodes: List of barcodes. @type name: str
@type barcodes: list[str]
@arg candidate: Candidate barcode. @returns: Version information.
@type candidate: str @rtype: str
@arg min_dist: Minimum distance between the barcodes. """
@type min_dist: int return "%s version %s\n%s\n%s\n%s" % (name, __version__, __author__,
""" __contact__, __homepage__)
for i in barcodes: #version
if Levenshtein.distance(i, candidate) < min_dist:
return False class BarCode(object):
"""
return True Design and test NGS barcodes.
#__filterDistance """
__nucleotides = ['A', 'C', 'G', 'T']
def filterDistance(barcodes, min_dist):
"""
Filter a list of barcodes for distances with other barcodes. def __init__(self, distance=Levenshtein.distance):
"""
@arg barcodes: List of barcodes. Initialise the class.
@type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes. @arg distance: Distance function.
@type min_dist: int @type distance: func
""" """
result = [] self.distance = distance
#__init__
for i in barcodes:
if __filterDistance(result, i, min_dist): def __allWords(self, bucket, word, length, result):
result.append(i) """
Generate all possible words of a certain length over a specified
return result alphabet.
#filterDistance
@arg bucket: An alphabet.
def barcode(length, max_stretch, min_dist): @type bucket: list[str]
@arg word: A word over the alphabet {bucket}.
@type word: str
@arg length: Lenth of the barcodes.
@type length: int
@arg result: Constructed words.
@type result: list[str]
"""
if length:
for i in bucket:
self.__allWords(bucket, word + i, length - 1, result)
else:
result.append(word)
#__allWords
def allBarcodes(self, length):
"""
Generate all possible barcodes of a certain length.
@arg length: Lenth of the barcodes.
@type length: int
"""
result = []
self.__allWords(self.__nucleotides, "", length, result)
return result
#allBarcodes
def __filterStretch(self, barcode, stretches):
"""
Test whether {barcode} contains none of the stretches in {stretches}.
@arg barcode: A barcode.
@type barcode: str
@arg stretches:
@type stretches: list[str]
"""
for i in stretches:
if i in barcode:
return False
return True
#__filterStretch
def filterStretches(self, barcodes, max_stretch):
"""
Filter a list of barcodes for mononucleotide stretches.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg max_stretch: Maximum mononucleotide stretch length.
@type max_stretch: int
"""
stretches = map(lambda x: max_stretch * x, self.__nucleotides)
result = []
for i in barcodes:
if self.__filterStretch(i, stretches):
result.append(i)
return result
#filterStretches
def __filterDistance(self, barcodes, candidate, min_dist):
"""
Test whether {candidate} can be added to {barcodes} based on the
minimum distance between {candidate} and all barcodes in {barcodes}.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg candidate: Candidate barcode.
@type candidate: str
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
for i in barcodes:
if self.distance(i, candidate) < min_dist:
return False
return True
#__filterDistance
def filterDistance(self, barcodes, min_dist):
"""
Filter a list of barcodes for distances with other barcodes.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
result = []
for i in barcodes:
if self.__filterDistance(result, i, min_dist):
result.append(i)
return result
#filterDistance
#BarCode
def barcode(length, max_stretch, min_dist, distance):
""" """
Make a set of barcodes, filter them for mononucleotide stretches and for Make a set of barcodes, filter them for mononucleotide stretches and for
distances with other barcodes. distances with other barcodes.
...@@ -132,12 +164,16 @@ def barcode(length, max_stretch, min_dist): ...@@ -132,12 +164,16 @@ def barcode(length, max_stretch, min_dist):
@type max_stretch: int @type max_stretch: int
@arg min_dist: Minimum distance between the barcodes. @arg min_dist: Minimum distance between the barcodes.
@type min_dist: int @type min_dist: int
@arg distance: Distance function.
@type distance: func
""" """
return filterDistance(filterStretches(allBarcodes(length), max_stretch), B = BarCode(distance)
min_dist)
return B.filterDistance(B.filterStretches(B.allBarcodes(length),
max_stretch), min_dist)
#barcode #barcode
def testBarcodes(barcodes, min_dist): def testBarcodes(barcodes, min_dist, distance):
""" """
Test a set of barcodes. Test a set of barcodes.
...@@ -145,11 +181,15 @@ def testBarcodes(barcodes, min_dist): ...@@ -145,11 +181,15 @@ def testBarcodes(barcodes, min_dist):
@type barcodes: list[str] @type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes. @arg min_dist: Minimum distance between the barcodes.
@type min_dist: int @type min_dist: int
@arg distance: Distance function.
@type distance: func
@returns: The number of barcodes that violate the distance constraint. @returns: The number of barcodes that violate the distance constraint.
@rtype: int @rtype: int
""" """
return len(barcodes) - len(filterDistance(barcodes, min_dist)) B = BarCode(distance)
return len(barcodes) - len(B.filterDistance(barcodes, min_dist))
#testBarcodes #testBarcodes
def main(): def main():
...@@ -165,11 +205,14 @@ def main(): ...@@ -165,11 +205,14 @@ def main():
distance_parser = argparse.ArgumentParser(add_help=False) distance_parser = argparse.ArgumentParser(add_help=False)
distance_parser.add_argument("-d", dest="distance", type=int, default=3, distance_parser.add_argument("-d", dest="distance", type=int, default=3,
help="minimum distance between the barcodes") help="minimum distance between the barcodes")
distance_parser.add_argument("-H", dest="hamming", default=False,
action="store_true", help="use Hamming distance")
usage = __doc__.split("\n\n\n") usage = __doc__.split("\n\n\n")
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
description=usage[0], epilog=usage[1]) description=usage[0], epilog=usage[1])
parser.add_argument("-v", action="version", version=version(parser.prog))
subparsers = parser.add_subparsers(dest="subcommand") subparsers = parser.add_subparsers(dest="subcommand")
parser_make = subparsers.add_parser("make", parents=[output_parser, parser_make = subparsers.add_parser("make", parents=[output_parser,
...@@ -184,13 +227,18 @@ def main(): ...@@ -184,13 +227,18 @@ def main():
args = parser.parse_args() args = parser.parse_args()
dfunc = Levenshtein.distance
if args.hamming:
dfunc = Levenshtein.hamming
if args.subcommand == "make": if args.subcommand == "make":
args.OUTPUT.write("\n".join(barcode(args.length, args.stretch, args.OUTPUT.write("\n".join(barcode(args.length, args.stretch,
args.distance))) args.distance, dfunc)))
if args.subcommand == "test": if args.subcommand == "test":
print "%s barcodes violate the distance contraint." % testBarcodes( print "%s barcodes violate the distance contraint." % testBarcodes(
map(lambda x: x.strip(), args.INPUT.readlines()), args.distance) map(lambda x: x.strip(), args.INPUT.readlines()), args.distance,
dfunc)
#main #main
if __name__ == "__main__": if __name__ == "__main__":
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment