Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
E
extractor
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
Mirrors
extractor
Commits
98670dd8
Commit
98670dd8
authored
Sep 20, 2016
by
jkvis
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Removed deletions from repeat structure description
parent
4233b4c4
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
65 additions
and
34 deletions
+65
-34
extractor/describe.py
extractor/describe.py
+9
-9
repeat-extractor.py
repeat-extractor.py
+56
-25
No files found.
extractor/describe.py
View file @
98670dd8
...
...
@@ -391,13 +391,13 @@ def describe_repeats(reference, sample, units):
i
+=
1
if
i
<
variant
.
sample_end
:
split
=
extractor
.
Variant
()
split
.
reference_start
=
variant
.
reference_start
split
.
reference_end
=
variant
.
reference_end
split
.
reference_start
=
variant
.
reference_start
+
reference_start
split
.
reference_end
=
variant
.
reference_end
+
reference_start
split
.
sample_start
=
start
split
.
sample_end
=
i
split
.
type
=
variant
.
type
split
.
transposition_start
=
variant
.
transposition_start
split
.
transposition_end
=
variant
.
transposition_end
split
.
transposition_start
=
variant
.
transposition_start
+
reference_start
split
.
transposition_end
=
variant
.
transposition_end
+
reference_start
replaced
.
append
(
split
)
while
i
<
variant
.
sample_end
and
masked_alt
[
i
+
sample_start
]
==
MASK
:
i
+=
1
...
...
@@ -405,13 +405,13 @@ def describe_repeats(reference, sample, units):
if
len
(
replaced
)
>
0
:
split
=
extractor
.
Variant
()
split
.
reference_start
=
variant
.
reference_start
split
.
reference_end
=
variant
.
reference_end
split
.
reference_start
=
variant
.
reference_start
+
reference_start
split
.
reference_end
=
variant
.
reference_end
+
reference_start
split
.
sample_start
=
start
split
.
sample_end
=
variant
.
sample_end
split
.
type
=
variant
.
type
split
.
transposition_start
=
variant
.
transposition_start
split
.
transposition_end
=
variant
.
transposition_end
split
.
transposition_start
=
variant
.
transposition_start
+
reference_start
split
.
transposition_end
=
variant
.
transposition_end
+
reference_start
replaced
.
append
(
split
)
variant_list
+=
replaced
else
:
...
...
@@ -449,7 +449,7 @@ def describe_repeats(reference, sample, units):
elif
variant
.
type
&
extractor
.
REVERSE_COMPLEMENT
:
seq_list
.
append
(
ISeq
(
start
=
variant
.
reference_start
+
1
+
reference_start
,
end
=
variant
.
reference_end
+
reference_start
,
reverse
=
True
,
weight_position
=
extracted
.
weight_position
))
el
se
:
#bases insertion
el
if
variant
.
sample_end
!=
variant
.
sample_start
:
seq_list
.
append
(
ISeq
(
sequence
=
sample
[
variant
.
sample_start
+
sample_start
:
variant
.
sample_end
+
sample_start
],
weight_position
=
extracted
.
weight_position
))
...
...
repeat-extractor.py
View file @
98670dd8
...
...
@@ -94,46 +94,77 @@ for line in lines:
else
:
sequences
[
label
]
=
[
string
.
strip
()]
literature
=
{
'Amel'
:
[],
'CSF1P0'
:
[
'AGAT'
],
'D10S1248'
:
[
'GGAA'
],
'D12S391'
:
[
'AGAT'
,
'AGAC'
],
'D13S317'
:
[
'TATC'
],
'D16S539'
:
[
'GATA'
],
'D18S51'
:
[
'GAAA'
],
'D19S433'
:
[
'AAGG'
],
'D1S1656'
:
[
'TAGA'
,
'TG'
],
'D21S11'
:
[
'TCTA'
,
'TCTG'
],
'D22S1045'
:
[
'ATT'
],
'D2S1338'
:
[
'TGCC'
,
'TTCC'
],
'D2S441'
:
[
'TCTA'
],
'D3S1358'
:
[
'AGAT'
,
'TCTA'
],
'D5S818'
:
[
'AGAT'
],
'D7S820'
:
[
'GATA'
],
'D8S1179'
:
[
'TATC'
],
'FGA'
:
[
'TTTC'
,
'CTTT'
,
'TTCC'
],
'PentaD'
:
[
'AAAGA'
],
'PentaE'
:
[
'AAAGA'
],
'TH01'
:
[
'TCAT'
],
'TPOX'
:
[
'AATG'
],
'vWA'
:
[
'TCTA'
,
'TCTG'
,
'TCCA'
],
'DYS391'
:
[
'TCTA'
]
}
#select = 'D13S317'
#unit_list =
['TATC'
]
#unit_list =
literature[select
]
#reference = sequences[select][0]
#sample = sequences[select][0]
#description, _, _ = describe_repeats(reference, sample, unit_list)
#print 'l.{}'.format(description)
#sample = sequences[select][14]
#description, rep_start, rep_end = describe_repeats(reference, sample, unit_list)
#print '{}({}_{}):l.{}'.format(select, rep_start, rep_end, description)
for
sequence
in
sequences
:
best
=
0
for
string
in
sequences
[
sequence
]:
repeats
=
short_sequence_repeat_extractor
(
string
,
min_length
)
score
=
0
for
repeat
in
repeats
:
if
repeat
.
count
+
1
>=
min_count
:
score
+=
(
repeat
.
end
-
repeat
.
start
)
*
(
repeat
.
end
-
repeat
.
start
)
*
(
repeat
.
count
+
1
)
if
score
>
best
:
reference
=
string
best
=
score
repeats
=
short_sequence_repeat_extractor
(
reference
,
min_length
)
units
=
{}
for
repeat
in
repeats
:
if
repeat
.
count
+
1
>=
min_count
:
units
[
reference
[
repeat
.
start
:
repeat
.
end
]]
=
repeat
.
count
+
1
unit_list
=
[]
for
unit
in
units
:
unit_list
.
append
(
unit
)
# best = 0
# for string in sequences[sequence]:
# repeats = short_sequence_repeat_extractor(string, min_length)
# score = 0
# for repeat in repeats:
# if repeat.count + 1 >= min_count:
# score += (repeat.end - repeat.start) * (repeat.end - repeat.start) * (repeat.count + 1)
# if score > best:
# reference = string
# best = score
# repeats = short_sequence_repeat_extractor(reference, min_length)
# units = {}
# for repeat in repeats:
# if repeat.count + 1 >= min_count:
# units[reference[repeat.start:repeat.end]] = repeat.count + 1
# unit_list = []
# for unit in units:
# unit_list.append(unit)
unit_list
=
literature
[
sequence
]
reference
=
sequences
[
sequence
][
0
]
print
sequence
+
':'
,
print
reference
if
best
>
0
:
if
len
(
unit_list
)
>
0
:
print
'repeat units:'
,
unit_list
else
:
print
'repeat units: []'
for
string
in
sequences
[
sequence
]:
rep_start
=
1
rep_end
=
len
(
reference
)
if
best
>
0
:
if
len
(
unit_list
)
>
0
:
description
,
rep_start
,
rep_end
=
describe_repeats
(
reference
,
string
,
unit_list
)
else
:
description
=
describe_dna
(
reference
,
string
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment