Laros
barcode
Commits
288e605c
Commit
288e605c
authored
Nov 05, 2016
by
Laros
Browse files
Got rid of the class.
parent
35e0fc3d
Changes
3
Hide whitespace changes
Inline
Sidebyside
barcode/__init__.py
View file @
288e605c
...
...
@@ 7,7 +7,7 @@ Copyright (c) 20132016 Jeroen F.J. Laros <J.F.J.Laros@lumc.nl>
Licensed under the MIT license, see the LICENSE file.
"""
from
.barcode
import
B
ar
C
ode
from
.barcode
import
filter_distance
,
all_b
ar
c
ode
s
,
filter_stretches
__version_info__
=
(
'0'
,
'6'
,
'0'
)
...
...
barcode/barcode.py
View file @
288e605c
import
Levenshtein
class
BarCode
(
object
):
_nucleotides
=
[
'A'
,
'C'
,
'G'
,
'T'
]
def
_all_words
(
bucket
,
word
,
length
,
result
):
"""
Design and test NGS barcodes.
Generate all possible words of a certain length over a specified
alphabet.
:arg list bucket: An alphabet.
:arg str word: A word over the alphabet {bucket}.
:arg int length: Lenth of the barcodes.
:arg list result: Constructed words.
"""
_nucleotides
=
[
'A'
,
'C'
,
'G'
,
'T'
]
if
length
:
for
i
in
bucket
:
_all_words
(
bucket
,
word
+
i
,
length

1
,
result
)
else
:
result
.
append
(
word
)
def
__init__
(
self
,
distance
=
Levenshtein
.
distance
):
"""
Initialise the class.
:arg function distance: Distance function.
"""
self
.
distance
=
distance
def
_filter_stretch
(
barcode
,
stretches
):
"""
Test whether {barcode} contains none of the stretches in {stretches}.
def
_all_words
(
self
,
bucket
,
word
,
length
,
result
):
"""
Generate all possible words of a certain length over a specified
alphabet.
:arg str barcode: A barcode.
:arg list stretches:
:arg list bucket: An alphabet.
:arg str word: A word over the alphabet {bucket}.
:arg int length: Lenth of the barcodes.
:arg list result: Constructed words.
"""
if
length
:
for
i
in
bucket
:
self
.
_all_words
(
bucket
,
word
+
i
,
length

1
,
result
)
else
:
result
.
append
(
word
)
:returns bool: True if the barcode is clean, False otherwise.
"""
for
i
in
stretches
:
if
i
in
barcode
:
return
False
return
True
def
_filter_stretch
(
self
,
barcode
,
stretches
):
"""
Test whether {barcode} contains none of the stretches in {stretches}.
:arg str barcode: A barcode.
:arg list stretches:
def
_filter_distance
(
barcodes
,
candidate
,
min_dist
,
distance
):
"""
Test whether {candidate} can be added to {barcodes} based on the
minimum distance between {candidate} and all barcodes in {barcodes}.
:returns bool: True if the barcode is clean, False otherwise.
"""
for
i
in
stretches
:
if
i
in
barcode
:
return
False
:arg list barcodes: List of barcodes.
:arg str candidate: Candidate barcode.
:arg int min_dist: Minimum distance between the barcodes.
:arg function distance: Distance function.
return
True
:returns bool: True if the barcode is clean, False otherwise.
"""
for
i
in
barcodes
:
if
distance
(
i
,
candidate
)
<
min_dist
:
return
False
def
_filter_distance
(
self
,
barcodes
,
candidate
,
min_dist
):
"""
Test whether {candidate} can be added to {barcodes} based on the
minimum distance between {candidate} and all barcodes in {barcodes}.
return
True
:arg list barcodes: List of barcodes.
:arg str candidate: Candidate barcode.
:arg int min_dist: Minimum distance between the barcodes.
:returns bool: True if the barcode is clean, False otherwise.
"""
for
i
in
barcodes
:
if
self
.
distance
(
i
,
candidate
)
<
min_dist
:
return
False
def
all_barcodes
(
length
):
"""
Generate all possible barcodes of a certain length.
return
True
:arg int length: Lenth of the barcodes.
def
all_barcodes
(
self
,
length
):
"""
Generate all possible barcodes of a certain length.
:returns list: List of barcodes.
"""
result
=
[]
:arg int length: Lenth of the barcodes.
_all_words
(
_nucleotides
,
''
,
length
,
result
)
:returns list: List of barcodes.
"""
result
=
[]
return
result
self
.
_all_words
(
self
.
_nucleotides
,
''
,
length
,
result
)
return
result
def
filter_stretches
(
barcodes
,
max_stretch
):
"""
Filter a list of barcodes for mononucleotide stretches.
def
filter_stretches
(
self
,
barcodes
,
max_stretch
):
"""
Filter a list of barcodes for mononucleotide stretches.
:arg list barcodes: List of barcodes.
:arg int max_stretch: Maximum mononucleotide stretch length.
:arg list barcodes: List of barcodes.
:arg int max_stretch: Maximum mononucleotide stretch length.
:returns list: List of barcodes filtered for mononucleotide stretches.
"""
stretches
=
map
(
lambda
x
:
(
max_stretch
+
1
)
*
x
,
_nucleotides
)
result
=
[]
:returns list: List of barcodes filtered for mononucleotide stretches.
"""
stretches
=
map
(
lambda
x
:
(
max_stretch
+
1
)
*
x
,
self
.
_nucleotides
)
result
=
[]
for
i
in
barcodes
:
if
_filter_stretch
(
i
,
stretches
):
result
.
append
(
i
)
for
i
in
barcodes
:
if
self
.
_filter_stretch
(
i
,
stretches
):
result
.
append
(
i
)
return
result
return
result
def
filter_distance
(
self
,
barcodes
,
min_dist
):
"""
Filter a list of barcodes for distance to other barcodes.
def
filter_distance
(
barcodes
,
min_dist
,
distance
=
Levenshtein
.
distance
):
"""
Filter a list of barcodes for distance to other barcodes.
:arg list barcodes: List of barcodes.
:arg int min_dist: Minimum distance between the barcodes.
:arg list barcodes: List of barcodes.
:arg int min_dist: Minimum distance between the barcodes.
:arg function distance: Distance function.
:returns list: List of barcodes filtered for distance to other
barcodes.
"""
result
=
[]
:returns list: List of barcodes filtered for distance to other
barcodes.
"""
result
=
[]
for
i
in
barcodes
:
if
self
.
_filter_distance
(
result
,
i
,
min_dist
):
result
.
append
(
i
)
for
i
in
barcodes
:
if
_filter_distance
(
result
,
i
,
min_dist
,
distance
):
result
.
append
(
i
)
return
result
return
result
barcode/cli.py
View file @
288e605c
...
...
@@ 5,8 +5,8 @@ import sys
import
Levenshtein
from
.
import
doc_split
,
version
,
usage
from
.barcode
import
B
ar
C
ode
from
.
import
doc_split
,
usage
,
version
from
.barcode
import
all_b
ar
c
ode
s
,
filter_distance
,
filter_stretches
def
make_barcodes
(
length
,
max_stretch
,
min_dist
,
distance
):
...
...
@@ 19,10 +19,8 @@ def make_barcodes(length, max_stretch, min_dist, distance):
:arg int min_dist: Minimum distance between the barcodes.
:arg function distance: Distance function.
"""
bc
=
BarCode
(
distance
)
return
bc
.
filter_distance
(
bc
.
filter_stretches
(
bc
.
all_barcodes
(
length
),
max_stretch
),
min_dist
)
return
filter_distance
(
filter_stretches
(
all_barcodes
(
length
),
max_stretch
),
min_dist
)
def
test_barcodes
(
barcodes
,
min_dist
,
distance
,
handle
):
...
...
@@ 32,12 +30,11 @@ def test_barcodes(barcodes, min_dist, distance, handle):
:arg list barcodes: List of barcodes.
:arg int min_dist: Minimum distance between the barcodes.
:arg function distance: Distance function.
:arg steam handle: Open readable handle to a file.
:returns int: The number of barcodes that violate the distance constraint.
"""
bc
=
BarCode
(
distance
)
good_subset
=
bc
.
filter_distance
(
barcodes
,
min_dist
)
good_subset
=
filter_distance
(
barcodes
,
min_dist
)
if
handle
:
handle
.
write
(
'
\n
'
.
join
(
good_subset
))
...
...
