From 716433c2e0fe000e07a485a2145911c9c455abd6 Mon Sep 17 00:00:00 2001 From: jkvis Date: Wed, 14 Sep 2016 10:14:47 +0200 Subject: [PATCH] Testing --- repeat-extractor.py | 87 +++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/repeat-extractor.py b/repeat-extractor.py index 7af3b0b..1f785ce 100644 --- a/repeat-extractor.py +++ b/repeat-extractor.py @@ -94,47 +94,50 @@ for line in lines: else: sequences[label] = [string.strip()] -select = 'D13S317' -unit_list = ['TATC'] -reference = sequences[select][0] -sample = sequences[select][0] -description, _, _ = describe_repeats(reference, sample, unit_list) -print 'l.{}'.format(description) - - -#for sequence in sequences: -# best = 0 -# for string in sequences[sequence]: -# repeats = short_sequence_repeat_extractor(string, min_length) -# score = 0 -# for repeat in repeats: -# if repeat.count + 1 >= min_count: -# score += (repeat.end - repeat.start) * (repeat.end - repeat.start) * (repeat.count + 1) -# if score > best: -# reference = string -# best = score - -# repeats = short_sequence_repeat_extractor(reference, min_length) -# units = {} -# for repeat in repeats: -# if repeat.count + 1 >= min_count: -# units[reference[repeat.start:repeat.end]] = repeat.count + 1 -# unit_list = [] -# for unit in units: -# unit_list.append(unit) - -# print sequence, -# if best > 0: -# print unit_list -# else: -# print 'no repeat unit identified' -# reference = sequences[sequence][0] -# for string in sequences[sequence]: -# if best > 0: -# description, _, _ = describe_repeats(reference, string, unit_list) -# else: -# description = describe_dna(reference, string) -# print 'l.{}'.format(description) -# print +#select = 'D13S317' +#unit_list = ['TATC'] +#reference = sequences[select][0] +#sample = sequences[select][0] +#description, _, _ = describe_repeats(reference, sample, unit_list) +#print 'l.{}'.format(description) + + +for sequence in sequences: + best = 0 + for string in sequences[sequence]: + repeats = short_sequence_repeat_extractor(string, min_length) + score = 0 + for repeat in repeats: + if repeat.count + 1 >= min_count: + score += (repeat.end - repeat.start) * (repeat.end - repeat.start) * (repeat.count + 1) + if score > best: + reference = string + best = score + + repeats = short_sequence_repeat_extractor(reference, min_length) + units = {} + for repeat in repeats: + if repeat.count + 1 >= min_count: + units[reference[repeat.start:repeat.end]] = repeat.count + 1 + unit_list = [] + for unit in units: + unit_list.append(unit) + + reference = sequences[sequence][0] + print sequence + ':', + print reference + if best > 0: + print 'repeat units:', unit_list + else: + print 'repeat units: []' + for string in sequences[sequence]: + rep_start = 1 + rep_end = len(reference) + if best > 0: + description, rep_start, rep_end = describe_repeats(reference, string, unit_list) + else: + description = describe_dna(reference, string) + print '{}({}_{}):l.{}'.format(sequence, rep_start, rep_end, description) + print -- GitLab