Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
E
extractor
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
Mirrors
extractor
Commits
716433c2
Commit
716433c2
authored
Sep 14, 2016
by
jkvis
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Testing
parent
008ba894
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
45 additions
and
42 deletions
+45
-42
repeat-extractor.py
repeat-extractor.py
+45
-42
No files found.
repeat-extractor.py
View file @
716433c2
...
...
@@ -94,47 +94,50 @@ for line in lines:
else
:
sequences
[
label
]
=
[
string
.
strip
()]
select
=
'D13S317'
unit_list
=
[
'TATC'
]
reference
=
sequences
[
select
][
0
]
sample
=
sequences
[
select
][
0
]
description
,
_
,
_
=
describe_repeats
(
reference
,
sample
,
unit_list
)
print
'l.{}'
.
format
(
description
)
#for sequence in sequences:
# best = 0
# for string in sequences[sequence]:
# repeats = short_sequence_repeat_extractor(string, min_length)
# score = 0
# for repeat in repeats:
# if repeat.count + 1 >= min_count:
# score += (repeat.end - repeat.start) * (repeat.end - repeat.start) * (repeat.count + 1)
# if score > best:
# reference = string
# best = score
# repeats = short_sequence_repeat_extractor(reference, min_length)
# units = {}
# for repeat in repeats:
# if repeat.count + 1 >= min_count:
# units[reference[repeat.start:repeat.end]] = repeat.count + 1
# unit_list = []
# for unit in units:
# unit_list.append(unit)
# print sequence,
# if best > 0:
# print unit_list
# else:
# print 'no repeat unit identified'
# reference = sequences[sequence][0]
# for string in sequences[sequence]:
# if best > 0:
# description, _, _ = describe_repeats(reference, string, unit_list)
# else:
# description = describe_dna(reference, string)
# print 'l.{}'.format(description)
# print
#select = 'D13S317'
#unit_list = ['TATC']
#reference = sequences[select][0]
#sample = sequences[select][0]
#description, _, _ = describe_repeats(reference, sample, unit_list)
#print 'l.{}'.format(description)
for
sequence
in
sequences
:
best
=
0
for
string
in
sequences
[
sequence
]:
repeats
=
short_sequence_repeat_extractor
(
string
,
min_length
)
score
=
0
for
repeat
in
repeats
:
if
repeat
.
count
+
1
>=
min_count
:
score
+=
(
repeat
.
end
-
repeat
.
start
)
*
(
repeat
.
end
-
repeat
.
start
)
*
(
repeat
.
count
+
1
)
if
score
>
best
:
reference
=
string
best
=
score
repeats
=
short_sequence_repeat_extractor
(
reference
,
min_length
)
units
=
{}
for
repeat
in
repeats
:
if
repeat
.
count
+
1
>=
min_count
:
units
[
reference
[
repeat
.
start
:
repeat
.
end
]]
=
repeat
.
count
+
1
unit_list
=
[]
for
unit
in
units
:
unit_list
.
append
(
unit
)
reference
=
sequences
[
sequence
][
0
]
print
sequence
+
':'
,
print
reference
if
best
>
0
:
print
'repeat units:'
,
unit_list
else
:
print
'repeat units: []'
for
string
in
sequences
[
sequence
]:
rep_start
=
1
rep_end
=
len
(
reference
)
if
best
>
0
:
description
,
rep_start
,
rep_end
=
describe_repeats
(
reference
,
string
,
unit_list
)
else
:
description
=
describe_dna
(
reference
,
string
)
print
'{}({}_{}):l.{}'
.
format
(
sequence
,
rep_start
,
rep_end
,
description
)
print
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment