Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
extractor
Commits
3929f40f
Commit
3929f40f
authored
Sep 14, 2016
by
jkvis
Browse files
Added the Crossmapper module
parent
0853cced
Changes
2
Hide whitespace changes
Inline
Side-by-side
extractor/describe.py
View file @
3929f40f
...
...
@@ -15,6 +15,8 @@ from .variant import (ISeq, AISeq, ISeqList, AISeqList, DNAVar, ProteinVar,
Allele
,
ProteinAllele
,
FS
)
from
.
import
extractor
,
util
from
crossmapper
import
Crossmap
def
roll
(
s
,
first
,
last
):
"""
...
...
@@ -472,7 +474,15 @@ def describe_repeats(reference, sample, units):
else
:
description
=
prefix
return
description
cm
=
Crossmap
([
reference_start
+
1
,
reference_end
],
[],
1
)
for
variant
in
description
:
for
inserted
in
variant
.
inserted
:
inserted
.
start
=
cm
.
tuple2string
(
cm
.
g2x
(
inserted
.
start
))
inserted
.
end
=
cm
.
tuple2string
(
cm
.
g2x
(
inserted
.
end
))
variant
.
start
=
cm
.
tuple2string
(
cm
.
g2x
(
variant
.
start
))
variant
.
end
=
cm
.
tuple2string
(
cm
.
g2x
(
variant
.
end
))
return
description
,
reference_start
,
reference_end
def
print_var
(
variant
):
...
...
repeat-extractor.py
View file @
3929f40f
...
...
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
from
extractor
import
*
#ref = 'AGCTGTGGGAGGGAGCCAGTGGATTTGGAAACAGAAATGGCTTGGCCTTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCCTCCTGCAATCCTTTAACTTACTGAATAACTCATGATTATGGGCCACCTGCAGGTACCATGCTAG'
#alt = 'AGCTGTGGGAGGGAGCCAGTGGATTTGGAAACAGAAATGGCTTCGCCTTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCCCTCCTGCAATCCTATAACTTACTGAATAACTCATGATTATGGGCCACCTGCAGGTACCATGCTAG'
#units = ['TCCT', 'GCCT']
...
...
@@ -95,39 +94,47 @@ for line in lines:
else
:
sequences
[
label
]
=
[
string
.
strip
()]
#select = 'Amel'
#unit_list = ['TATC']
#reference = sequences[select][0]
#sample = sequences[select][0]
#description = describe_repeats(reference, sample, unit_list)
#print 'l.{}'.format(description)
for
sequence
in
sequences
:
best
=
0
for
string
in
sequences
[
sequence
]:
repeats
=
short_sequence_repeat_extractor
(
string
,
min_length
)
score
=
0
for
repeat
in
repeats
:
if
repeat
.
count
+
1
>=
min_count
:
score
+=
(
repeat
.
end
-
repeat
.
start
)
*
(
repeat
.
end
-
repeat
.
start
)
*
(
repeat
.
count
+
1
)
if
score
>
best
:
reference
=
string
best
=
score
repeats
=
short_sequence_repeat_extractor
(
reference
,
min_length
)
units
=
{}
for
repeat
in
repeats
:
if
repeat
.
count
+
1
>=
min_count
:
units
[
reference
[
repeat
.
start
:
repeat
.
end
]]
=
repeat
.
count
+
1
unit_list
=
[]
for
unit
in
units
:
unit_list
.
append
(
unit
)
print
sequence
,
unit_list
reference
=
sequences
[
sequence
][
0
]
for
string
in
sequences
[
sequence
]:
description
=
describe_repeats
(
reference
,
string
,
unit_list
)
print
'l.{}'
.
format
(
description
)
print
select
=
'D13S317'
unit_list
=
[
'TATC'
]
reference
=
sequences
[
select
][
0
]
sample
=
sequences
[
select
][
0
]
description
,
_
,
_
=
describe_repeats
(
reference
,
sample
,
unit_list
)
print
'l.{}'
.
format
(
description
)
#for sequence in sequences:
# best = 0
# for string in sequences[sequence]:
# repeats = short_sequence_repeat_extractor(string, min_length)
# score = 0
# for repeat in repeats:
# if repeat.count + 1 >= min_count:
# score += (repeat.end - repeat.start) * (repeat.end - repeat.start) * (repeat.count + 1)
# if score > best:
# reference = string
# best = score
# repeats = short_sequence_repeat_extractor(reference, min_length)
# units = {}
# for repeat in repeats:
# if repeat.count + 1 >= min_count:
# units[reference[repeat.start:repeat.end]] = repeat.count + 1
# unit_list = []
# for unit in units:
# unit_list.append(unit)
# print sequence,
# if best > 0:
# print unit_list
# else:
# print 'no repeat unit identified'
# reference = sequences[sequence][0]
# for string in sequences[sequence]:
# if best > 0:
# description, _, _ = describe_repeats(reference, string, unit_list)
# else:
# description = describe_dna(reference, string)
# print 'l.{}'.format(description)
# print
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment