Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
extractor
Commits
716433c2
Commit
716433c2
authored
Sep 14, 2016
by
jkvis
Browse files
Testing
parent
008ba894
Changes
1
Hide whitespace changes
Inline
Side-by-side
repeat-extractor.py
View file @
716433c2
...
@@ -94,47 +94,50 @@ for line in lines:
...
@@ -94,47 +94,50 @@ for line in lines:
else
:
else
:
sequences
[
label
]
=
[
string
.
strip
()]
sequences
[
label
]
=
[
string
.
strip
()]
select
=
'D13S317'
#select = 'D13S317'
unit_list
=
[
'TATC'
]
#unit_list = ['TATC']
reference
=
sequences
[
select
][
0
]
#reference = sequences[select][0]
sample
=
sequences
[
select
][
0
]
#sample = sequences[select][0]
description
,
_
,
_
=
describe_repeats
(
reference
,
sample
,
unit_list
)
#description, _, _ = describe_repeats(reference, sample, unit_list)
print
'l.{}'
.
format
(
description
)
#print 'l.{}'.format(description)
#for sequence in sequences:
for
sequence
in
sequences
:
# best = 0
best
=
0
# for string in sequences[sequence]:
for
string
in
sequences
[
sequence
]:
# repeats = short_sequence_repeat_extractor(string, min_length)
repeats
=
short_sequence_repeat_extractor
(
string
,
min_length
)
# score = 0
score
=
0
# for repeat in repeats:
for
repeat
in
repeats
:
# if repeat.count + 1 >= min_count:
if
repeat
.
count
+
1
>=
min_count
:
# score += (repeat.end - repeat.start) * (repeat.end - repeat.start) * (repeat.count + 1)
score
+=
(
repeat
.
end
-
repeat
.
start
)
*
(
repeat
.
end
-
repeat
.
start
)
*
(
repeat
.
count
+
1
)
# if score > best:
if
score
>
best
:
# reference = string
reference
=
string
# best = score
best
=
score
# repeats = short_sequence_repeat_extractor(reference, min_length)
repeats
=
short_sequence_repeat_extractor
(
reference
,
min_length
)
# units = {}
units
=
{}
# for repeat in repeats:
for
repeat
in
repeats
:
# if repeat.count + 1 >= min_count:
if
repeat
.
count
+
1
>=
min_count
:
# units[reference[repeat.start:repeat.end]] = repeat.count + 1
units
[
reference
[
repeat
.
start
:
repeat
.
end
]]
=
repeat
.
count
+
1
# unit_list = []
unit_list
=
[]
# for unit in units:
for
unit
in
units
:
# unit_list.append(unit)
unit_list
.
append
(
unit
)
# print sequence,
reference
=
sequences
[
sequence
][
0
]
# if best > 0:
print
sequence
+
':'
,
# print unit_list
print
reference
# else:
if
best
>
0
:
# print 'no repeat unit identified'
print
'repeat units:'
,
unit_list
# reference = sequences[sequence][0]
else
:
# for string in sequences[sequence]:
print
'repeat units: []'
# if best > 0:
for
string
in
sequences
[
sequence
]:
# description, _, _ = describe_repeats(reference, string, unit_list)
rep_start
=
1
# else:
rep_end
=
len
(
reference
)
# description = describe_dna(reference, string)
if
best
>
0
:
# print 'l.{}'.format(description)
description
,
rep_start
,
rep_end
=
describe_repeats
(
reference
,
string
,
unit_list
)
# print
else
:
description
=
describe_dna
(
reference
,
string
)
print
'{}({}_{}):l.{}'
.
format
(
sequence
,
rep_start
,
rep_end
,
description
)
print
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment