Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
extractor
Commits
0853cced
Commit
0853cced
authored
Sep 13, 2016
by
jkvis
Browse files
Fixed description for no repeat structures
parent
fba10c7e
Changes
2
Hide whitespace changes
Inline
Side-by-side
extractor/describe.py
View file @
0853cced
...
...
@@ -460,12 +460,17 @@ def describe_repeats(reference, sample, units):
seq_list
.
append
(
DNAVar
(
type
=
'repeat'
,
inserted
=
repeats
[
repeat
][
'unit'
],
count
=
repeats
[
repeat
][
'count'
]))
repeat
+=
1
description
.
append
(
DNAVar
(
start
=
reference_start
+
1
,
end
=
reference_end
,
sample_start
=
sample_start
,
sample_end
=
sample_end
,
type
=
'delins'
,
inserted
=
seq_list
))
suffix
=
describe_dna
(
reference
[
reference_end
:],
sample
[
sample_end
:])
for
variant
in
suffix
:
if
variant
.
type
!=
'none'
:
description
.
append
(
variant
)
if
len
(
variant_list
)
>
0
or
len
(
repeats
)
>
0
:
description
.
append
(
DNAVar
(
start
=
reference_start
+
1
,
end
=
reference_end
,
sample_start
=
sample_start
,
sample_end
=
sample_end
,
type
=
'delins'
,
inserted
=
seq_list
))
suffix
=
describe_dna
(
reference
[
reference_end
:],
sample
[
sample_end
:])
for
variant
in
suffix
:
if
variant
.
type
!=
'none'
:
variant
.
start
+=
reference_end
variant
.
end
+=
reference_end
description
.
append
(
variant
)
else
:
description
=
prefix
return
description
...
...
repeat-extractor.py
View file @
0853cced
...
...
@@ -81,7 +81,7 @@ def short_sequence_repeat_extractor(string, min_length=1):
min_count
=
3
min_length
=
2
min_length
=
3
with
open
(
'strlist.txt'
,
'r'
)
as
infile
:
lines
=
infile
.
readlines
()
...
...
@@ -95,39 +95,39 @@ for line in lines:
else
:
sequences
[
label
]
=
[
string
.
strip
()]
select
=
'
D8S1179
'
unit_list
=
[
'TCTA'
,
'TATC'
]
reference
=
sequences
[
select
][
0
]
sample
=
sequences
[
select
][
7
]
description
=
describe_repeats
(
reference
,
sample
,
unit_list
)
print
'l.{}'
.
format
(
description
)
#
for sequence in sequences:
#
best = 0
#
for string in sequences[sequence]:
#
repeats = short_sequence_repeat_extractor(string, min_length)
#
score = 0
#
for repeat in repeats:
#
if repeat.count + 1 >= min_count:
#
score += (repeat.end - repeat.start) * (repeat.end - repeat.start) * (repeat.count + 1)
#
if score > best:
#
reference = string
#
best = score
#
repeats = short_sequence_repeat_extractor(reference, min_length)
#
units = {}
#
for repeat in repeats:
#
if repeat.count + 1 >= min_count:
#
units[reference[repeat.start:repeat.end]] = repeat.count + 1
#
unit_list = []
#
for unit in units:
#
unit_list.append(unit)
#
print sequence, unit_list
#
reference = sequences[sequence][0]
#
for string in sequences[sequence]:
#
description = describe_repeats(reference, string, unit_list)
#
print 'l.{}'.format(description)
#
print
#
select = '
Amel
'
#
unit_list = ['TATC']
#
reference = sequences[select][0]
#
sample = sequences[select][
0
]
#
description = describe_repeats(reference, sample, unit_list)
#
print 'l.{}'.format(description)
for
sequence
in
sequences
:
best
=
0
for
string
in
sequences
[
sequence
]:
repeats
=
short_sequence_repeat_extractor
(
string
,
min_length
)
score
=
0
for
repeat
in
repeats
:
if
repeat
.
count
+
1
>=
min_count
:
score
+=
(
repeat
.
end
-
repeat
.
start
)
*
(
repeat
.
end
-
repeat
.
start
)
*
(
repeat
.
count
+
1
)
if
score
>
best
:
reference
=
string
best
=
score
repeats
=
short_sequence_repeat_extractor
(
reference
,
min_length
)
units
=
{}
for
repeat
in
repeats
:
if
repeat
.
count
+
1
>=
min_count
:
units
[
reference
[
repeat
.
start
:
repeat
.
end
]]
=
repeat
.
count
+
1
unit_list
=
[]
for
unit
in
units
:
unit_list
.
append
(
unit
)
print
sequence
,
unit_list
reference
=
sequences
[
sequence
][
0
]
for
string
in
sequences
[
sequence
]:
description
=
describe_repeats
(
reference
,
string
,
unit_list
)
print
'l.{}'
.
format
(
description
)
print
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment