Commit 0853cced authored by jkvis's avatar jkvis

Fixed description for no repeat structures

parent fba10c7e
...@@ -460,12 +460,17 @@ def describe_repeats(reference, sample, units): ...@@ -460,12 +460,17 @@ def describe_repeats(reference, sample, units):
seq_list.append(DNAVar(type='repeat',inserted=repeats[repeat]['unit'],count=repeats[repeat]['count'])) seq_list.append(DNAVar(type='repeat',inserted=repeats[repeat]['unit'],count=repeats[repeat]['count']))
repeat += 1 repeat += 1
description.append(DNAVar(start=reference_start + 1,end=reference_end,sample_start=sample_start,sample_end=sample_end,type='delins',inserted=seq_list)) if len(variant_list) > 0 or len(repeats) > 0:
description.append(DNAVar(start=reference_start + 1,end=reference_end,sample_start=sample_start,sample_end=sample_end,type='delins',inserted=seq_list))
suffix = describe_dna(reference[reference_end:], sample[sample_end:])
for variant in suffix: suffix = describe_dna(reference[reference_end:], sample[sample_end:])
if variant.type != 'none': for variant in suffix:
description.append(variant) if variant.type != 'none':
variant.start += reference_end
variant.end += reference_end
description.append(variant)
else:
description = prefix
return description return description
......
...@@ -81,7 +81,7 @@ def short_sequence_repeat_extractor(string, min_length=1): ...@@ -81,7 +81,7 @@ def short_sequence_repeat_extractor(string, min_length=1):
min_count = 3 min_count = 3
min_length = 2 min_length = 3
with open('strlist.txt', 'r') as infile: with open('strlist.txt', 'r') as infile:
lines = infile.readlines() lines = infile.readlines()
...@@ -95,39 +95,39 @@ for line in lines: ...@@ -95,39 +95,39 @@ for line in lines:
else: else:
sequences[label] = [string.strip()] sequences[label] = [string.strip()]
select = 'D8S1179' #select = 'Amel'
unit_list = ['TCTA', 'TATC'] #unit_list = ['TATC']
reference = sequences[select][0] #reference = sequences[select][0]
sample = sequences[select][7] #sample = sequences[select][0]
description = describe_repeats(reference, sample, unit_list) #description = describe_repeats(reference, sample, unit_list)
print 'l.{}'.format(description) #print 'l.{}'.format(description)
#for sequence in sequences: for sequence in sequences:
# best = 0 best = 0
# for string in sequences[sequence]: for string in sequences[sequence]:
# repeats = short_sequence_repeat_extractor(string, min_length) repeats = short_sequence_repeat_extractor(string, min_length)
# score = 0 score = 0
# for repeat in repeats: for repeat in repeats:
# if repeat.count + 1 >= min_count: if repeat.count + 1 >= min_count:
# score += (repeat.end - repeat.start) * (repeat.end - repeat.start) * (repeat.count + 1) score += (repeat.end - repeat.start) * (repeat.end - repeat.start) * (repeat.count + 1)
# if score > best: if score > best:
# reference = string reference = string
# best = score best = score
# repeats = short_sequence_repeat_extractor(reference, min_length) repeats = short_sequence_repeat_extractor(reference, min_length)
# units = {} units = {}
# for repeat in repeats: for repeat in repeats:
# if repeat.count + 1 >= min_count: if repeat.count + 1 >= min_count:
# units[reference[repeat.start:repeat.end]] = repeat.count + 1 units[reference[repeat.start:repeat.end]] = repeat.count + 1
# unit_list = [] unit_list = []
# for unit in units: for unit in units:
# unit_list.append(unit) unit_list.append(unit)
# print sequence, unit_list print sequence, unit_list
# reference = sequences[sequence][0] reference = sequences[sequence][0]
# for string in sequences[sequence]: for string in sequences[sequence]:
# description = describe_repeats(reference, string, unit_list) description = describe_repeats(reference, string, unit_list)
# print 'l.{}'.format(description) print 'l.{}'.format(description)
# print print
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment