Commit 35e0fc3d authored by Laros's avatar Laros
Browse files

PEP 8.

parent a5146bbf
*.pyc
.cache/
.tox/
build/
dist/
barcode.egg-info/
""" """
barcode: Design NGS barcodes. Barcode: Design NGS barcodes.
Copyright (c) 2013 Leiden University Medical Center <humgen@lumc.nl>
Copyright (c) 2013 Jeroen F.J. Laros <j.f.j.laros@lumc.nl> Copyright (c) 2013-2016 Leiden University Medical Center <humgen@lumc.nl>
Copyright (c) 2013-2016 Jeroen F.J. Laros <J.F.J.Laros@lumc.nl>
Licensed under the MIT license, see the LICENSE file. Licensed under the MIT license, see the LICENSE file.
""" """
from .barcode import BarCode
# On the event of a new release, we update the __version_info__ package
# global and set RELEASE to True.
# Before a release, a development version is denoted by a __version_info__
# ending with a 'dev' item and RELEASE is set to False.
#
# We follow a versioning scheme compatible with setuptools [1] where the
# __version_info__ variable always contains the version of the upcomming
# release (and not that of the previous release), post-fixed with a 'dev'
# item. Only in a release commit, this 'dev' item is removed (and added
# again in the next commit).
#
# [1] http://peak.telecommunity.com/DevCenter/setuptools#specifying-your-project-s-version
RELEASE = False
__version_info__ = ('0', '5', '1')
__version_info__ = ('0', '6', '0')
__version__ = '.'.join(__version_info__) __version__ = '.'.join(__version_info__)
__author__ = 'LUMC, Jeroen F.J. Laros' __author__ = 'LUMC, Jeroen F.J. Laros'
__contact__ = 'j.f.j.laros@lumc.nl' __contact__ = 'J.F.J.Laros@lumc.nl'
__homepage__ = 'https://git.lumc.nl/j.f.j.laros/barcode' __homepage__ = 'https://git.lumc.nl/j.f.j.laros/barcode'
usage = __doc__.split('\n\n\n')
def doc_split(func):
return func.__doc__.split('\n\n')[0]
def version(name):
return '{} version {}\n\nAuthor : {} <{}>\nHomepage : {}'.format(
name, __version__, __author__, __contact__, __homepage__)
#!/usr/bin/env python
"""
Design NGS barcodes.
Use any of the positional arguments with the -h option for more information.
"""
import argparse
import Levenshtein import Levenshtein
from . import __version__, __author__, __contact__, __homepage__
def docSplit(func):
return func.__doc__.split("\n\n")[0]
def version(name):
"""
Return version information.
@arg name: Name of the program.
@type name: str
@returns: Version information.
@rtype: str
"""
return "%s version %s\n%s\n%s\n%s" % (name, __version__, __author__,
__contact__, __homepage__)
#version
class BarCode(object): class BarCode(object):
""" """
Design and test NGS barcodes. Design and test NGS barcodes.
""" """
__nucleotides = ['A', 'C', 'G', 'T'] _nucleotides = ['A', 'C', 'G', 'T']
def __init__(self, distance=Levenshtein.distance): def __init__(self, distance=Levenshtein.distance):
""" """
Initialise the class. Initialise the class.
@arg distance: Distance function. :arg function distance: Distance function.
@type distance: func
""" """
self.distance = distance self.distance = distance
#__init__
def __allWords(self, bucket, word, length, result): def _all_words(self, bucket, word, length, result):
""" """
Generate all possible words of a certain length over a specified Generate all possible words of a certain length over a specified
alphabet. alphabet.
@arg bucket: An alphabet. :arg list bucket: An alphabet.
@type bucket: list[str] :arg str word: A word over the alphabet {bucket}.
@arg word: A word over the alphabet {bucket}. :arg int length: Lenth of the barcodes.
@type word: str :arg list result: Constructed words.
@arg length: Lenth of the barcodes.
@type length: int
@arg result: Constructed words.
@type result: list[str]
""" """
if length: if length:
for i in bucket: for i in bucket:
self.__allWords(bucket, word + i, length - 1, result) self._all_words(bucket, word + i, length - 1, result)
else: else:
result.append(word) result.append(word)
#__allWords
def allBarcodes(self, length): def _filter_stretch(self, barcode, stretches):
"""
Generate all possible barcodes of a certain length.
@arg length: Lenth of the barcodes.
@type length: int
"""
result = []
self.__allWords(self.__nucleotides, "", length, result)
return result
#allBarcodes
def __filterStretch(self, barcode, stretches):
""" """
Test whether {barcode} contains none of the stretches in {stretches}. Test whether {barcode} contains none of the stretches in {stretches}.
@arg barcode: A barcode. :arg str barcode: A barcode.
@type barcode: str :arg list stretches:
@arg stretches:
@type stretches: list[str] :returns bool: True if the barcode is clean, False otherwise.
""" """
for i in stretches: for i in stretches:
if i in barcode: if i in barcode:
return False return False
return True return True
#__filterStretch
def filterStretches(self, barcodes, max_stretch):
"""
Filter a list of barcodes for mononucleotide stretches.
@arg barcodes: List of barcodes. def _filter_distance(self, barcodes, candidate, min_dist):
@type barcodes: list[str]
@arg max_stretch: Maximum mononucleotide stretch length.
@type max_stretch: int
"""
stretches = map(lambda x: (max_stretch + 1) * x, self.__nucleotides)
result = []
for i in barcodes:
if self.__filterStretch(i, stretches):
result.append(i)
return result
#filterStretches
def __filterDistance(self, barcodes, candidate, min_dist):
""" """
Test whether {candidate} can be added to {barcodes} based on the Test whether {candidate} can be added to {barcodes} based on the
minimum distance between {candidate} and all barcodes in {barcodes}. minimum distance between {candidate} and all barcodes in {barcodes}.
@arg barcodes: List of barcodes. :arg list barcodes: List of barcodes.
@type barcodes: list[str] :arg str candidate: Candidate barcode.
@arg candidate: Candidate barcode. :arg int min_dist: Minimum distance between the barcodes.
@type candidate: str
@arg min_dist: Minimum distance between the barcodes. :returns bool: True if the barcode is clean, False otherwise.
@type min_dist: int
""" """
for i in barcodes: for i in barcodes:
if self.distance(i, candidate) < min_dist: if self.distance(i, candidate) < min_dist:
return False return False
return True return True
#__filterDistance
def filterDistance(self, barcodes, min_dist): def all_barcodes(self, length):
""" """
Filter a list of barcodes for distances with other barcodes. Generate all possible barcodes of a certain length.
@arg barcodes: List of barcodes. :arg int length: Lenth of the barcodes.
@type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes. :returns list: List of barcodes.
@type min_dist: int
""" """
result = [] result = []
for i in barcodes: self._all_words(self._nucleotides, '', length, result)
if self.__filterDistance(result, i, min_dist):
result.append(i)
return result return result
#filterDistance
#BarCode
def barcode(length, max_stretch, min_dist, distance): def filter_stretches(self, barcodes, max_stretch):
""" """
Make a set of barcodes, filter them for mononucleotide stretches and for Filter a list of barcodes for mononucleotide stretches.
distances with other barcodes.
@arg length: Lenth of the barcodes.
@type length: int
@arg max_stretch: Maximum mononucleotide stretch length.
@type max_stretch: int
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
@arg distance: Distance function.
@type distance: func
"""
B = BarCode(distance)
return B.filterDistance(B.filterStretches(B.allBarcodes(length), :arg list barcodes: List of barcodes.
max_stretch), min_dist) :arg int max_stretch: Maximum mononucleotide stretch length.
#barcode
def testBarcodes(barcodes, min_dist, distance, handle): :returns list: List of barcodes filtered for mononucleotide stretches.
""" """
Test a set of barcodes. stretches = map(lambda x: (max_stretch + 1) * x, self._nucleotides)
result = []
@arg barcodes: List of barcodes. for i in barcodes:
@type barcodes: list[str] if self._filter_stretch(i, stretches):
@arg min_dist: Minimum distance between the barcodes. result.append(i)
@type min_dist: int
@arg distance: Distance function.
@type distance: func
@returns: The number of barcodes that violate the distance constraint. return result
@rtype: int
"""
B = BarCode(distance)
good_subset = B.filterDistance(barcodes, min_dist) def filter_distance(self, barcodes, min_dist):
if handle: """
handle.write("\n".join(good_subset)) Filter a list of barcodes for distance to other barcodes.
return len(barcodes) - len(good_subset) :arg list barcodes: List of barcodes.
#testBarcodes :arg int min_dist: Minimum distance between the barcodes.
def main(): :returns list: List of barcodes filtered for distance to other
""" barcodes.
Main entry point. """
""" result = []
output_parser = argparse.ArgumentParser(add_help=False)
output_parser.add_argument("OUTPUT", type=argparse.FileType('w'), for i in barcodes:
help="output file") if self._filter_distance(result, i, min_dist):
input_parser = argparse.ArgumentParser(add_help=False) result.append(i)
input_parser.add_argument("INPUT", type=argparse.FileType('r'),
help="input file") return result
distance_parser = argparse.ArgumentParser(add_help=False)
distance_parser.add_argument("-d", dest="distance", type=int, default=3,
help="minimum distance between the barcodes (int default=%(default)s)")
distance_parser.add_argument("-H", dest="hamming", default=False,
action="store_true", help="use Hamming distance")
usage = __doc__.split("\n\n\n")
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=usage[0], epilog=usage[1])
parser.add_argument("-v", action="version", version=version(parser.prog))
subparsers = parser.add_subparsers(dest="subcommand")
parser_make = subparsers.add_parser("make", parents=[output_parser,
distance_parser], description=docSplit(barcode))
parser_make.add_argument("-l", dest="length", type=int, default=8,
help="lenght of the barcodes (int default=%(default)s)")
parser_make.add_argument("-s", dest="stretch", type=int, default=2,
help="maximum mononucleotide stretch length (int default=%(default)s)")
parser_test = subparsers.add_parser("test", parents=[input_parser,
distance_parser], description=docSplit(testBarcodes))
parser_test.add_argument("-o", dest="output", type=argparse.FileType('w'),
help="list of good barcodes")
args = parser.parse_args()
dfunc = Levenshtein.distance
if args.hamming:
dfunc = Levenshtein.hamming
if args.subcommand == "make":
args.OUTPUT.write("\n".join(barcode(args.length, args.stretch,
args.distance, dfunc)))
if args.subcommand == "test":
print "%s barcodes violate the distance contraint." % testBarcodes(
map(lambda x: x.strip(), args.INPUT.readlines()), args.distance,
dfunc, args.output)
#main
if __name__ == "__main__":
main()
#!/usr/bin/env python
import argparse
import sys
import Levenshtein
from . import doc_split, version, usage
from .barcode import BarCode
def make_barcodes(length, max_stretch, min_dist, distance):
"""
Make a set of barcodes, filter them for mononucleotide stretches and for
distances with other barcodes.
:arg int length: Lenth of the barcodes.
:arg int max_stretch: Maximum mononucleotide stretch length.
:arg int min_dist: Minimum distance between the barcodes.
:arg function distance: Distance function.
"""
bc = BarCode(distance)
return bc.filter_distance(
bc.filter_stretches(bc.all_barcodes(length), max_stretch), min_dist)
def test_barcodes(barcodes, min_dist, distance, handle):
"""
Test a set of barcodes.
:arg list barcodes: List of barcodes.
:arg int min_dist: Minimum distance between the barcodes.
:arg function distance: Distance function.
:returns int: The number of barcodes that violate the distance constraint.
"""
bc = BarCode(distance)
good_subset = bc.filter_distance(barcodes, min_dist)
if handle:
handle.write('\n'.join(good_subset))
return len(barcodes) - len(good_subset)
def main():
"""
Main entry point.
"""
output_parser = argparse.ArgumentParser(add_help=False)
output_parser.add_argument(
'OUTPUT', type=argparse.FileType('w'), help='output file')
input_parser = argparse.ArgumentParser(add_help=False)
input_parser.add_argument(
'INPUT', type=argparse.FileType('r'), help='input file')
distance_parser = argparse.ArgumentParser(add_help=False)
distance_parser.add_argument(
'-d', dest='distance', type=int, default=3,
help='minimum distance between the barcodes (int default=%(default)s)')
distance_parser.add_argument(
'-H', dest='hamming', default=False,
action='store_true', help='use Hamming distance')
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=usage[0], epilog=usage[1])
parser.add_argument('-v', action='version', version=version(parser.prog))
subparsers = parser.add_subparsers(dest='subcommand')
parser_make = subparsers.add_parser(
'make', parents=[output_parser, distance_parser],
description=doc_split(make_barcodes))
parser_make.add_argument(
'-l', dest='length', type=int, default=8,
help='lenght of the barcodes (int default=%(default)s)')
parser_make.add_argument(
'-s', dest='stretch', type=int, default=2,
help='maximum mononucleotide stretch length (int default=%(default)s)')
parser_test = subparsers.add_parser(
'test', parents=[input_parser, distance_parser],
description=doc_split(test_barcodes))
parser_test.add_argument(
'-o', dest='output', type=argparse.FileType('w'),
help='list of good barcodes')
args = parser.parse_args()
dfunc = Levenshtein.distance
if args.hamming:
dfunc = Levenshtein.hamming
if args.subcommand == 'make':
args.OUTPUT.write('\n'.join(
make_barcodes(args.length, args.stretch, args.distance, dfunc)))
if args.subcommand == 'test':
sys.stdout.write('{} barcodes violate the distance contraint.'.format(
test_barcodes(
map(lambda x: x.strip(), args.INPUT.readlines()),
args.distance, dfunc, args.output)))
if __name__ == '__main__':
main()
...@@ -29,7 +29,7 @@ setup( ...@@ -29,7 +29,7 @@ setup(
install_requires=requires, install_requires=requires,
entry_points = { entry_points = {
'console_scripts': [ 'console_scripts': [
'barcode = barcode.barcode:main' 'barcode = barcode.cli:main'
] ]
}, },
classifiers = [ classifiers = [
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment