Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Laros
barcode
Commits
2308bc1e
Commit
2308bc1e
authored
Jun 01, 2013
by
Laros
Browse files
Added a hamming distance selection option and a version option.
parent
1fa761af
Changes
2
Hide whitespace changes
Inline
Side-by-side
barcode/__init__.py
View file @
2308bc1e
...
...
@@ -22,10 +22,10 @@ Licensed under the MIT license, see the LICENSE file.
RELEASE
=
False
__version_info__
=
(
'0'
,
'
3
'
,
'dev'
)
__version_info__
=
(
'0'
,
'
4
'
,
'dev'
)
__version__
=
'.'
.
join
(
__version_info__
)
__author__
=
'LUMC, Jeroen F.J. Laros'
__contact__
=
'j.f.j.laros@
@
lumc.nl'
__contact__
=
'j.f.j.laros@lumc.nl'
__homepage__
=
'https://git.lumc.nl/j.f.j.laros/barcode'
barcode/barcode.py
View file @
2308bc1e
...
...
@@ -9,119 +9,151 @@ Use any of the positional arguments with the -h option for more information.
import
argparse
import
Levenshtein
__nucleotides
=
[
'A'
,
'C'
,
'G'
,
'T'
]
from
.
import
__version__
,
__author__
,
__contact__
,
__homepage__
def
docSplit
(
func
):
return
func
.
__doc__
.
split
(
"
\n\n
"
)[
0
]
def
__allWords
(
bucket
,
word
,
length
,
result
):
"""
Generate all possible words of a certain length over a specified alphabet.
@arg bucket: An alphabet.
@type bucket: list[str]
@arg word: A word over the alphabet {bucket}.
@type word: str
@arg length: Lenth of the barcodes.
@type length: int
@arg result: Constructed words.
@type result: list[str]
"""
if
length
:
for
i
in
bucket
:
__allWords
(
bucket
,
word
+
i
,
length
-
1
,
result
)
else
:
result
.
append
(
word
)
#__allWords
def
allBarcodes
(
length
):
"""
Generate all possible barcodes of a certain length.
@arg length: Lenth of the barcodes.
@type length: int
"""
result
=
[]
__allWords
(
__nucleotides
,
""
,
length
,
result
)
return
result
#allBarcodes
def
__filterStretch
(
barcode
,
stretches
):
"""
Test whether {barcode} contains none of the stretches in {stretches}.
@arg barcode: A barcode.
@type barcode: str
@arg stretches:
@type stretches: list[str]
"""
for
i
in
stretches
:
if
i
in
barcode
:
return
False
return
True
#__filterStretch
def
filterStretches
(
barcodes
,
max_stretch
):
"""
Filter a list of barcodes for mononucleotide stretches.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg max_stretch: Maximum mononucleotide stretch length.
@type max_stretch: int
"""
stretches
=
map
(
lambda
x
:
max_stretch
*
x
,
__nucleotides
)
result
=
[]
for
i
in
barcodes
:
if
__filterStretch
(
i
,
stretches
):
result
.
append
(
i
)
return
result
#filterStretches
def
__filterDistance
(
barcodes
,
candidate
,
min_dist
):
def
version
(
name
):
"""
Test whether {candidate} can be added to {barcodes} based on the minimum
distance between {candidate} and all barcodes in {barcodes}.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg candidate: Candidate barcode.
@type candidate: str
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
for
i
in
barcodes
:
if
Levenshtein
.
distance
(
i
,
candidate
)
<
min_dist
:
return
False
return
True
#__filterDistance
def
filterDistance
(
barcodes
,
min_dist
):
"""
Filter a list of barcodes for distances with other barcodes.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
result
=
[]
for
i
in
barcodes
:
if
__filterDistance
(
result
,
i
,
min_dist
):
result
.
append
(
i
)
return
result
#filterDistance
def
barcode
(
length
,
max_stretch
,
min_dist
):
Return version information.
@arg name: Name of the program.
@type name: str
@returns: Version information.
@rtype: str
"""
return
"%s version %s
\n
%s
\n
%s
\n
%s"
%
(
name
,
__version__
,
__author__
,
__contact__
,
__homepage__
)
#version
class
BarCode
(
object
):
"""
Design and test NGS barcodes.
"""
__nucleotides
=
[
'A'
,
'C'
,
'G'
,
'T'
]
def
__init__
(
self
,
distance
=
Levenshtein
.
distance
):
"""
Initialise the class.
@arg distance: Distance function.
@type distance: func
"""
self
.
distance
=
distance
#__init__
def
__allWords
(
self
,
bucket
,
word
,
length
,
result
):
"""
Generate all possible words of a certain length over a specified
alphabet.
@arg bucket: An alphabet.
@type bucket: list[str]
@arg word: A word over the alphabet {bucket}.
@type word: str
@arg length: Lenth of the barcodes.
@type length: int
@arg result: Constructed words.
@type result: list[str]
"""
if
length
:
for
i
in
bucket
:
self
.
__allWords
(
bucket
,
word
+
i
,
length
-
1
,
result
)
else
:
result
.
append
(
word
)
#__allWords
def
allBarcodes
(
self
,
length
):
"""
Generate all possible barcodes of a certain length.
@arg length: Lenth of the barcodes.
@type length: int
"""
result
=
[]
self
.
__allWords
(
self
.
__nucleotides
,
""
,
length
,
result
)
return
result
#allBarcodes
def
__filterStretch
(
self
,
barcode
,
stretches
):
"""
Test whether {barcode} contains none of the stretches in {stretches}.
@arg barcode: A barcode.
@type barcode: str
@arg stretches:
@type stretches: list[str]
"""
for
i
in
stretches
:
if
i
in
barcode
:
return
False
return
True
#__filterStretch
def
filterStretches
(
self
,
barcodes
,
max_stretch
):
"""
Filter a list of barcodes for mononucleotide stretches.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg max_stretch: Maximum mononucleotide stretch length.
@type max_stretch: int
"""
stretches
=
map
(
lambda
x
:
max_stretch
*
x
,
self
.
__nucleotides
)
result
=
[]
for
i
in
barcodes
:
if
self
.
__filterStretch
(
i
,
stretches
):
result
.
append
(
i
)
return
result
#filterStretches
def
__filterDistance
(
self
,
barcodes
,
candidate
,
min_dist
):
"""
Test whether {candidate} can be added to {barcodes} based on the
minimum distance between {candidate} and all barcodes in {barcodes}.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg candidate: Candidate barcode.
@type candidate: str
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
for
i
in
barcodes
:
if
self
.
distance
(
i
,
candidate
)
<
min_dist
:
return
False
return
True
#__filterDistance
def
filterDistance
(
self
,
barcodes
,
min_dist
):
"""
Filter a list of barcodes for distances with other barcodes.
@arg barcodes: List of barcodes.
@type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
"""
result
=
[]
for
i
in
barcodes
:
if
self
.
__filterDistance
(
result
,
i
,
min_dist
):
result
.
append
(
i
)
return
result
#filterDistance
#BarCode
def
barcode
(
length
,
max_stretch
,
min_dist
,
distance
):
"""
Make a set of barcodes, filter them for mononucleotide stretches and for
distances with other barcodes.
...
...
@@ -132,12 +164,16 @@ def barcode(length, max_stretch, min_dist):
@type max_stretch: int
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
@arg distance: Distance function.
@type distance: func
"""
return
filterDistance
(
filterStretches
(
allBarcodes
(
length
),
max_stretch
),
min_dist
)
B
=
BarCode
(
distance
)
return
B
.
filterDistance
(
B
.
filterStretches
(
B
.
allBarcodes
(
length
),
max_stretch
),
min_dist
)
#barcode
def
testBarcodes
(
barcodes
,
min_dist
):
def
testBarcodes
(
barcodes
,
min_dist
,
distance
):
"""
Test a set of barcodes.
...
...
@@ -145,11 +181,15 @@ def testBarcodes(barcodes, min_dist):
@type barcodes: list[str]
@arg min_dist: Minimum distance between the barcodes.
@type min_dist: int
@arg distance: Distance function.
@type distance: func
@returns: The number of barcodes that violate the distance constraint.
@rtype: int
"""
return
len
(
barcodes
)
-
len
(
filterDistance
(
barcodes
,
min_dist
))
B
=
BarCode
(
distance
)
return
len
(
barcodes
)
-
len
(
B
.
filterDistance
(
barcodes
,
min_dist
))
#testBarcodes
def
main
():
...
...
@@ -165,11 +205,14 @@ def main():
distance_parser
=
argparse
.
ArgumentParser
(
add_help
=
False
)
distance_parser
.
add_argument
(
"-d"
,
dest
=
"distance"
,
type
=
int
,
default
=
3
,
help
=
"minimum distance between the barcodes"
)
distance_parser
.
add_argument
(
"-H"
,
dest
=
"hamming"
,
default
=
False
,
action
=
"store_true"
,
help
=
"use Hamming distance"
)
usage
=
__doc__
.
split
(
"
\n\n\n
"
)
parser
=
argparse
.
ArgumentParser
(
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
description
=
usage
[
0
],
epilog
=
usage
[
1
])
parser
.
add_argument
(
"-v"
,
action
=
"version"
,
version
=
version
(
parser
.
prog
))
subparsers
=
parser
.
add_subparsers
(
dest
=
"subcommand"
)
parser_make
=
subparsers
.
add_parser
(
"make"
,
parents
=
[
output_parser
,
...
...
@@ -184,13 +227,18 @@ def main():
args
=
parser
.
parse_args
()
dfunc
=
Levenshtein
.
distance
if
args
.
hamming
:
dfunc
=
Levenshtein
.
hamming
if
args
.
subcommand
==
"make"
:
args
.
OUTPUT
.
write
(
"
\n
"
.
join
(
barcode
(
args
.
length
,
args
.
stretch
,
args
.
distance
)))
args
.
distance
,
dfunc
)))
if
args
.
subcommand
==
"test"
:
print
"%s barcodes violate the distance contraint."
%
testBarcodes
(
map
(
lambda
x
:
x
.
strip
(),
args
.
INPUT
.
readlines
()),
args
.
distance
)
map
(
lambda
x
:
x
.
strip
(),
args
.
INPUT
.
readlines
()),
args
.
distance
,
dfunc
)
#main
if
__name__
==
"__main__"
:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment