Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Laros
barcode
Commits
35e0fc3d
Commit
35e0fc3d
authored
Nov 05, 2016
by
Laros
Browse files
PEP 8.
parent
a5146bbf
Changes
5
Hide whitespace changes
Inline
Side-by-side
.gitignore
0 → 100644
View file @
35e0fc3d
*.pyc
.cache/
.tox/
build/
dist/
barcode.egg-info/
barcode/__init__.py
View file @
35e0fc3d
"""
b
arcode: Design NGS barcodes.
B
arcode: Design NGS barcodes.
Copyright (c) 2013 Leiden University Medical Center <humgen@lumc.nl>
Copyright (c) 2013 Jeroen F.J. Laros <j.f.j.laros@lumc.nl>
Copyright (c) 2013-2016 Leiden University Medical Center <humgen@lumc.nl>
Copyright (c) 2013-2016 Jeroen F.J. Laros <J.F.J.Laros@lumc.nl>
Licensed under the MIT license, see the LICENSE file.
"""
from
.barcode
import
BarCode
# On the event of a new release, we update the __version_info__ package
# global and set RELEASE to True.
# Before a release, a development version is denoted by a __version_info__
# ending with a 'dev' item and RELEASE is set to False.
#
# We follow a versioning scheme compatible with setuptools [1] where the
# __version_info__ variable always contains the version of the upcomming
# release (and not that of the previous release), post-fixed with a 'dev'
# item. Only in a release commit, this 'dev' item is removed (and added
# again in the next commit).
#
# [1] http://peak.telecommunity.com/DevCenter/setuptools#specifying-your-project-s-version
RELEASE
=
False
__version_info__
=
(
'0'
,
'5'
,
'1'
)
__version_info__
=
(
'0'
,
'6'
,
'0'
)
__version__
=
'.'
.
join
(
__version_info__
)
__author__
=
'LUMC, Jeroen F.J. Laros'
__contact__
=
'
j.f.j.l
aros@lumc.nl'
__contact__
=
'
J.F.J.L
aros@lumc.nl'
__homepage__
=
'https://git.lumc.nl/j.f.j.laros/barcode'
usage
=
__doc__
.
split
(
'
\n\n\n
'
)
def
doc_split
(
func
):
return
func
.
__doc__
.
split
(
'
\n\n
'
)[
0
]
def
version
(
name
):
return
'{} version {}
\n\n
Author : {} <{}>
\n
Homepage : {}'
.
format
(
name
,
__version__
,
__author__
,
__contact__
,
__homepage__
)
barcode/barcode.py
View file @
35e0fc3d
#!/usr/bin/env python
"""
Design NGS barcodes.
Use any of the positional arguments with the -h option for more information.
"""
import
argparse
import
Levenshtein
from
.
import
__version__
,
__author__
,
__contact__
,
__homepage__
def
docSplit
(
func
):
return
func
.
__doc__
.
split
(
"
\n\n
"
)[
0
]
def
version
(
name
):
"""
Return version information.
@arg name: Name of the program.
@type name: str
@returns: Version information.
@rtype: str
"""
return
"%s version %s
\n
%s
\n
%s
\n
%s"
%
(
name
,
__version__
,
__author__
,
__contact__
,
__homepage__
)
#version
class
BarCode
(
object
):
"""
Design and test NGS barcodes.
"""
__nucleotides
=
[
'A'
,
'C'
,
'G'
,
'T'
]
_nucleotides
=
[
'A'
,
'C'
,
'G'
,
'T'
]
def
__init__
(
self
,
distance
=
Levenshtein
.
distance
):
"""
Initialise the class.
@arg distance: Distance function.
@type distance: func
:arg function distance: Distance function.
"""
self
.
distance
=
distance
#__init__
def
_
_all
W
ords
(
self
,
bucket
,
word
,
length
,
result
):
def
_all
_w
ords
(
self
,
bucket
,
word
,
length
,
result
):
"""
Generate all possible words of a certain length over a specified
alphabet.
@arg bucket: An alphabet.
@type bucket: list[str]
@arg word: A word over the alphabet {bucket}.
@type word: str
@arg length: Lenth of the barcodes.
@type length: int
@arg result: Constructed words.
@type result: list[str]
:arg list bucket: An alphabet.
:arg str word: A word over the alphabet {bucket}.
:arg int length: Lenth of the barcodes.
:arg list result: Constructed words.
"""
if
length
:
for
i
in
bucket
:
self
.
_
_
all
W
ords
(
bucket
,
word
+
i
,
length
-
1
,
result
)
self
.
_all
_w
ords
(
bucket
,
word
+
i
,
length
-
1
,
result
)
else
:
result
.
append
(
word
)
#__allWords
def
allBarcodes
(
self
,
length
):
"""
Generate all possible barcodes of a certain length.
@arg length: Lenth of the barcodes.
@type length: int
"""
result
=
[]
self
.
__allWords
(
self
.
__nucleotides
,
""
,
length
,
result
)
return
result
#allBarcodes
def
__filterStretch
(
self
,
barcode
,
stretches
):
def
_filter_stretch
(
self
,
barcode
,
stretches
):
"""
Test whether {barcode} contains none of the stretches in {stretches}.
@
arg barcode: A barcode.
@type barcode: str
@arg stretches:
@type stretches: list[str]
:
arg
str
barcode: A barcode.
:arg list stretches:
:returns bool: True if the barcode is clean, False otherwise.
"""
for
i
in
stretches
:
if
i
in
barcode
:
return
False
return
True
#__filterStretch
def
filterStretches
(
self
,
barcodes
,
max_stretch
):
"""
Filter a list of barcodes for mononucleotide stretches.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg max_stretch: Maximum mononucleotide stretch length.
@type max_stretch: int
"""
stretches
=
map
(
lambda
x
:
(
max_stretch
+
1
)
*
x
,
self
.
__nucleotides
)
result
=
[]
for
i
in
barcodes
:
if
self
.
__filterStretch
(
i
,
stretches
):
result
.
append
(
i
)
return
result
#filterStretches
def
__filterDistance
(
self
,
barcodes
,
candidate
,
min_dist
):
def
_filter_distance
(
self
,
barcodes
,
candidate
,
min_dist
):
"""
Test whether {candidate} can be added to {barcodes} based on the
minimum distance between {candidate} and all barcodes in {barcodes}.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg candidate: Candidate barcode.
@type candidate: str
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
:arg list barcodes: List of barcodes.
:arg str candidate: Candidate barcode.
:arg int min_dist: Minimum distance between the barcodes.
:returns bool: True if the barcode is clean, False otherwise.
"""
for
i
in
barcodes
:
if
self
.
distance
(
i
,
candidate
)
<
min_dist
:
return
False
return
True
#__filterDistance
def
filterDistance
(
self
,
barcodes
,
min_dist
):
def
all_barcodes
(
self
,
length
):
"""
Fil
te
r
a
list of barcodes for distances with other barcodes
.
Genera
te a
ll possible barcodes of a certain length
.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
:arg int length: Lenth of the barcodes.
:returns list: List of barcodes.
"""
result
=
[]
for
i
in
barcodes
:
if
self
.
__filterDistance
(
result
,
i
,
min_dist
):
result
.
append
(
i
)
self
.
_all_words
(
self
.
_nucleotides
,
''
,
length
,
result
)
return
result
#filterDistance
#BarCode
def
barcode
(
length
,
max_stretch
,
min_dist
,
distance
):
"""
Make a set of barcodes, filter them for mononucleotide stretches and for
distances with other barcodes.
@arg length: Lenth of the barcodes.
@type length: int
@arg max_stretch: Maximum mononucleotide stretch length.
@type max_stretch: int
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
@arg distance: Distance function.
@type distance: func
"""
B
=
BarCode
(
distance
)
def
filter_stretches
(
self
,
barcodes
,
max_stretch
):
"""
Filter a list of barcodes for mononucleotide stretches.
return
B
.
filterDistance
(
B
.
filterStretches
(
B
.
allBarcodes
(
length
),
max_stretch
),
min_dist
)
#barcode
:arg list barcodes: List of barcodes.
:arg int max_stretch: Maximum mononucleotide stretch length.
def
testBarcodes
(
barcodes
,
min_dist
,
distance
,
handle
):
"""
Test a set of barcodes.
:returns list: List of barcodes filtered for mononucleotide stretches.
"""
stretches
=
map
(
lambda
x
:
(
max_stretch
+
1
)
*
x
,
self
.
_nucleotides
)
result
=
[]
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
@arg distance: Distance function.
@type distance: func
for
i
in
barcodes
:
if
self
.
_filter_stretch
(
i
,
stretches
):
result
.
append
(
i
)
@returns: The number of barcodes that violate the distance constraint.
@rtype: int
"""
B
=
BarCode
(
distance
)
return
result
good_subset
=
B
.
filter
D
istance
(
barcodes
,
min_dist
)
if
handle
:
handle
.
write
(
"
\n
"
.
join
(
good_subset
))
def
filter
_d
istance
(
self
,
barcodes
,
min_dist
)
:
"""
Filter a list of barcodes for distance to other barcodes.
return
len
(
barcodes
)
-
len
(
good_subset
)
#testB
arcodes
:arg list barcodes: List of barcodes.
:arg int min_dist: Minimum distance between the b
arcodes
.
def
main
():
"""
Main entry point.
"""
output_parser
=
argparse
.
ArgumentParser
(
add_help
=
False
)
output_parser
.
add_argument
(
"OUTPUT"
,
type
=
argparse
.
FileType
(
'w'
),
help
=
"output file"
)
input_parser
=
argparse
.
ArgumentParser
(
add_help
=
False
)
input_parser
.
add_argument
(
"INPUT"
,
type
=
argparse
.
FileType
(
'r'
),
help
=
"input file"
)
distance_parser
=
argparse
.
ArgumentParser
(
add_help
=
False
)
distance_parser
.
add_argument
(
"-d"
,
dest
=
"distance"
,
type
=
int
,
default
=
3
,
help
=
"minimum distance between the barcodes (int default=%(default)s)"
)
distance_parser
.
add_argument
(
"-H"
,
dest
=
"hamming"
,
default
=
False
,
action
=
"store_true"
,
help
=
"use Hamming distance"
)
usage
=
__doc__
.
split
(
"
\n\n\n
"
)
parser
=
argparse
.
ArgumentParser
(
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
description
=
usage
[
0
],
epilog
=
usage
[
1
])
parser
.
add_argument
(
"-v"
,
action
=
"version"
,
version
=
version
(
parser
.
prog
))
subparsers
=
parser
.
add_subparsers
(
dest
=
"subcommand"
)
parser_make
=
subparsers
.
add_parser
(
"make"
,
parents
=
[
output_parser
,
distance_parser
],
description
=
docSplit
(
barcode
))
parser_make
.
add_argument
(
"-l"
,
dest
=
"length"
,
type
=
int
,
default
=
8
,
help
=
"lenght of the barcodes (int default=%(default)s)"
)
parser_make
.
add_argument
(
"-s"
,
dest
=
"stretch"
,
type
=
int
,
default
=
2
,
help
=
"maximum mononucleotide stretch length (int default=%(default)s)"
)
parser_test
=
subparsers
.
add_parser
(
"test"
,
parents
=
[
input_parser
,
distance_parser
],
description
=
docSplit
(
testBarcodes
))
parser_test
.
add_argument
(
"-o"
,
dest
=
"output"
,
type
=
argparse
.
FileType
(
'w'
),
help
=
"list of good barcodes"
)
args
=
parser
.
parse_args
()
dfunc
=
Levenshtein
.
distance
if
args
.
hamming
:
dfunc
=
Levenshtein
.
hamming
if
args
.
subcommand
==
"make"
:
args
.
OUTPUT
.
write
(
"
\n
"
.
join
(
barcode
(
args
.
length
,
args
.
stretch
,
args
.
distance
,
dfunc
)))
if
args
.
subcommand
==
"test"
:
print
"%s barcodes violate the distance contraint."
%
testBarcodes
(
map
(
lambda
x
:
x
.
strip
(),
args
.
INPUT
.
readlines
()),
args
.
distance
,
dfunc
,
args
.
output
)
#main
if
__name__
==
"__main__"
:
main
()
:returns list: List of barcodes filtered for distance to other
barcodes.
"""
result
=
[]
for
i
in
barcodes
:
if
self
.
_filter_distance
(
result
,
i
,
min_dist
):
result
.
append
(
i
)
return
result
barcode/cli.py
0 → 100644
View file @
35e0fc3d
#!/usr/bin/env python
import
argparse
import
sys
import
Levenshtein
from
.
import
doc_split
,
version
,
usage
from
.barcode
import
BarCode
def
make_barcodes
(
length
,
max_stretch
,
min_dist
,
distance
):
"""
Make a set of barcodes, filter them for mononucleotide stretches and for
distances with other barcodes.
:arg int length: Lenth of the barcodes.
:arg int max_stretch: Maximum mononucleotide stretch length.
:arg int min_dist: Minimum distance between the barcodes.
:arg function distance: Distance function.
"""
bc
=
BarCode
(
distance
)
return
bc
.
filter_distance
(
bc
.
filter_stretches
(
bc
.
all_barcodes
(
length
),
max_stretch
),
min_dist
)
def
test_barcodes
(
barcodes
,
min_dist
,
distance
,
handle
):
"""
Test a set of barcodes.
:arg list barcodes: List of barcodes.
:arg int min_dist: Minimum distance between the barcodes.
:arg function distance: Distance function.
:returns int: The number of barcodes that violate the distance constraint.
"""
bc
=
BarCode
(
distance
)
good_subset
=
bc
.
filter_distance
(
barcodes
,
min_dist
)
if
handle
:
handle
.
write
(
'
\n
'
.
join
(
good_subset
))
return
len
(
barcodes
)
-
len
(
good_subset
)
def
main
():
"""
Main entry point.
"""
output_parser
=
argparse
.
ArgumentParser
(
add_help
=
False
)
output_parser
.
add_argument
(
'OUTPUT'
,
type
=
argparse
.
FileType
(
'w'
),
help
=
'output file'
)
input_parser
=
argparse
.
ArgumentParser
(
add_help
=
False
)
input_parser
.
add_argument
(
'INPUT'
,
type
=
argparse
.
FileType
(
'r'
),
help
=
'input file'
)
distance_parser
=
argparse
.
ArgumentParser
(
add_help
=
False
)
distance_parser
.
add_argument
(
'-d'
,
dest
=
'distance'
,
type
=
int
,
default
=
3
,
help
=
'minimum distance between the barcodes (int default=%(default)s)'
)
distance_parser
.
add_argument
(
'-H'
,
dest
=
'hamming'
,
default
=
False
,
action
=
'store_true'
,
help
=
'use Hamming distance'
)
parser
=
argparse
.
ArgumentParser
(
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
description
=
usage
[
0
],
epilog
=
usage
[
1
])
parser
.
add_argument
(
'-v'
,
action
=
'version'
,
version
=
version
(
parser
.
prog
))
subparsers
=
parser
.
add_subparsers
(
dest
=
'subcommand'
)
parser_make
=
subparsers
.
add_parser
(
'make'
,
parents
=
[
output_parser
,
distance_parser
],
description
=
doc_split
(
make_barcodes
))
parser_make
.
add_argument
(
'-l'
,
dest
=
'length'
,
type
=
int
,
default
=
8
,
help
=
'lenght of the barcodes (int default=%(default)s)'
)
parser_make
.
add_argument
(
'-s'
,
dest
=
'stretch'
,
type
=
int
,
default
=
2
,
help
=
'maximum mononucleotide stretch length (int default=%(default)s)'
)
parser_test
=
subparsers
.
add_parser
(
'test'
,
parents
=
[
input_parser
,
distance_parser
],
description
=
doc_split
(
test_barcodes
))
parser_test
.
add_argument
(
'-o'
,
dest
=
'output'
,
type
=
argparse
.
FileType
(
'w'
),
help
=
'list of good barcodes'
)
args
=
parser
.
parse_args
()
dfunc
=
Levenshtein
.
distance
if
args
.
hamming
:
dfunc
=
Levenshtein
.
hamming
if
args
.
subcommand
==
'make'
:
args
.
OUTPUT
.
write
(
'
\n
'
.
join
(
make_barcodes
(
args
.
length
,
args
.
stretch
,
args
.
distance
,
dfunc
)))
if
args
.
subcommand
==
'test'
:
sys
.
stdout
.
write
(
'{} barcodes violate the distance contraint.'
.
format
(
test_barcodes
(
map
(
lambda
x
:
x
.
strip
(),
args
.
INPUT
.
readlines
()),
args
.
distance
,
dfunc
,
args
.
output
)))
if
__name__
==
'__main__'
:
main
()
setup.py
View file @
35e0fc3d
...
...
@@ -29,7 +29,7 @@ setup(
install_requires
=
requires
,
entry_points
=
{
'console_scripts'
:
[
'barcode = barcode.
barcode
:main'
'barcode = barcode.
cli
:main'
]
},
classifiers
=
[
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment