Commit 82e8f875 authored by Laros's avatar Laros
Browse files

First version of a barcode design toolkit.

parents
Copyright (c) 2013 Leiden University Medical Center <humgen@lumc.nl>
Copyright (c) 2013 Jeroen F.J. Laros <j.f.j.laros@lumc.nl>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
barcode: Design NGS barcodes.
Copyright (c) 2013 Leiden University Medical Center <humgen@lumc.nl>
Copyright (c) 2013 Jeroen F.J. Laros <j.f.j.laros@lumc.nl>
Licensed under the MIT license, see the LICENSE file.
"""
# On the event of a new release, we update the __version_info__ package
# global and set RELEASE to True.
# Before a release, a development version is denoted by a __version_info__
# ending with a 'dev' item and RELEASE is set to False.
#
# We follow a versioning scheme compatible with setuptools [1] where the
# __version_info__ variable always contains the version of the upcomming
# release (and not that of the previous release), post-fixed with a 'dev'
# item. Only in a release commit, this 'dev' item is removed (and added
# again in the next commit).
#
# [1] http://peak.telecommunity.com/DevCenter/setuptools#specifying-your-project-s-version
RELEASE = False
__version_info__ = ('0', '1', 'dev')
__version__ = '.'.join(__version_info__)
__author__ = 'LUMC, Jeroen F.J. Laros'
__contact__ = 'j.f.j.laros@@lumc.nl'
__homepage__ = 'https://git.lumc.nl/j.f.j.laros/barcode'
#!/usr/bin/env python
"""
Design NGS barcodes.
Use any of the positional arguments with the -h option for more information.
"""
import argparse
import Levenshtein
__nucleotides = ['A', 'C', 'G', 'T']
def docSplit(func):
return func.__doc__.split("\n\n")[0]
def __allWords(bucket, word, length, result):
"""
Generate all possible words of a certain length over a specified alphabet.
@arg bucket: An alphabet.
@type bucket: list[str]
@arg word: A word over the alphabet {bucket}.
@type word: str
@arg length: Lenth of the barcodes.
@type length: int
@arg result: Constructed words.
@type result: list[str]
"""
if length:
for i in bucket:
__allWords(bucket, word + i, length - 1, result)
else:
result.append(word)
#__allWords
def allBarcodes(length):
"""
Generate all possible barcodes of a certain length.
@arg length: Lenth of the barcodes.
@type length: int
"""
result = []
__allWords(__nucleotides, "", length, result)
return result
#allBarcodes
def __filterStretch(barcode, stretches):
"""
Test whether {barcode} contains none of the stretches in {stretches}.
@arg barcode: A barcode.
@type barcode: str
@arg stretches:
@type stretches: list[str]
"""
for i in stretches:
if i in barcode:
return False
return True
#__filterStretch
def filterStretches(barcodes, max_stretch):
"""
Filter a list of barcodes for mononucleotide stretches.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg max_stretch: Maximum mononucleotide stretch length.
@type max_stretch: int
"""
stretches = map(lambda x: max_stretch * x, __nucleotides)
result = []
for i in barcodes:
if __filterStretch(i, stretches):
result.append(i)
return result
#filterStretches
def __filterDistance(barcodes, candidate, min_dist):
"""
Test whether {candidate} can be added to {barcodes} based on the minimum
distance between {candidate} and all barcodes in {barcodes}.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg candidate: Candidate barcode.
@type candidate: str
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
for i in barcodes:
if Levenshtein.distance(i, candidate) < min_dist:
return False
return True
#__filterDistance
def filterDistance(barcodes, min_dist):
"""
Filter a list of barcodes for distances with other barcodes.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
result = []
for i in barcodes:
if __filterDistance(result, i, min_dist):
result.append(i)
return result
#filterDistance
def barcode(length, max_stretch, min_dist):
"""
Make a set of barcodes, filter them for mononucleotide stretches and for
distances with other barcodes.
@arg length: Lenth of the barcodes.
@type length: int
@arg max_stretch: Maximum mononucleotide stretch length.
@type max_stretch: int
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
return filterDistance(filterStretches(allBarcodes(length), max_stretch),
min_dist)
#barcode
def main():
"""
Main entry point.
"""
output_parser = argparse.ArgumentParser(add_help=False)
output_parser.add_argument("OUTPUT", type=argparse.FileType('w'),
help="output file")
usage = __doc__.split("\n\n\n")
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=usage[0], epilog=usage[1])
subparsers = parser.add_subparsers(dest="subcommand")
parser_makebc = subparsers.add_parser("makebc",
parents=[output_parser], description=docSplit(barcode))
parser_makebc.add_argument("-l", dest="length", type=int, default=8,
help="lenght of the barcodes")
parser_makebc.add_argument("-s", dest="stretch", type=int, default=2,
help="maximum mononucleotide stretch length")
parser_makebc.add_argument("-d", dest="distance", type=int, default=3,
help="minimum distance between the barcodes")
args = parser.parse_args()
if args.subcommand == "makebc":
args.OUTPUT.write("\n".join(barcode(args.length, args.stretch,
args.distance)))
#main
if __name__ == "__main__":
main()
import sys
from setuptools import setup
if sys.version_info < (2, 6):
raise Exception('barcode requires Python 2.6 or higher.')
# Todo: How does this play with pip freeze requirement files?
requires = ['Levenshtein']
# Python 2.6 does not include the argparse module.
try:
import argparse
except ImportError:
requires.append('argparse')
import barcode as distmeta
setup(
name='barcode',
version=distmeta.__version__,
description='Design NGS barcodes.',
long_description=distmeta.__doc__,
author=distmeta.__author__,
author_email=distmeta.__contact__,
url=distmeta.__homepage__,
license='MIT License',
platforms=['any'],
packages=['barcode'],
install_requires=requires,
entry_points = {
'console_scripts': [
'barcode = barcode.barcode:main'
]
},
classifiers = [
'Development Status :: 3 - Alpha',
'Intended Audience :: Science/Research',
'Intended Audience :: Developers',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Topic :: Scientific/Engineering',
],
keywords='bioinformatics'
)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment