Commit 3929f40f authored by jkvis's avatar jkvis

Added the Crossmapper module

parent 0853cced
...@@ -15,6 +15,8 @@ from .variant import (ISeq, AISeq, ISeqList, AISeqList, DNAVar, ProteinVar, ...@@ -15,6 +15,8 @@ from .variant import (ISeq, AISeq, ISeqList, AISeqList, DNAVar, ProteinVar,
Allele, ProteinAllele, FS) Allele, ProteinAllele, FS)
from . import extractor, util from . import extractor, util
from crossmapper import Crossmap
def roll(s, first, last): def roll(s, first, last):
""" """
...@@ -472,7 +474,15 @@ def describe_repeats(reference, sample, units): ...@@ -472,7 +474,15 @@ def describe_repeats(reference, sample, units):
else: else:
description = prefix description = prefix
return description cm = Crossmap([reference_start + 1, reference_end], [], 1)
for variant in description:
for inserted in variant.inserted:
inserted.start = cm.tuple2string(cm.g2x(inserted.start))
inserted.end = cm.tuple2string(cm.g2x(inserted.end))
variant.start = cm.tuple2string(cm.g2x(variant.start))
variant.end = cm.tuple2string(cm.g2x(variant.end))
return description, reference_start, reference_end
def print_var(variant): def print_var(variant):
......
...@@ -4,7 +4,6 @@ from __future__ import unicode_literals ...@@ -4,7 +4,6 @@ from __future__ import unicode_literals
from extractor import * from extractor import *
#ref = 'AGCTGTGGGAGGGAGCCAGTGGATTTGGAAACAGAAATGGCTTGGCCTTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCCTCCTGCAATCCTTTAACTTACTGAATAACTCATGATTATGGGCCACCTGCAGGTACCATGCTAG' #ref = 'AGCTGTGGGAGGGAGCCAGTGGATTTGGAAACAGAAATGGCTTGGCCTTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCCTCCTGCAATCCTTTAACTTACTGAATAACTCATGATTATGGGCCACCTGCAGGTACCATGCTAG'
#alt = 'AGCTGTGGGAGGGAGCCAGTGGATTTGGAAACAGAAATGGCTTCGCCTTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCCCTCCTGCAATCCTATAACTTACTGAATAACTCATGATTATGGGCCACCTGCAGGTACCATGCTAG' #alt = 'AGCTGTGGGAGGGAGCCAGTGGATTTGGAAACAGAAATGGCTTCGCCTTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCCCTCCTGCAATCCTATAACTTACTGAATAACTCATGATTATGGGCCACCTGCAGGTACCATGCTAG'
#units = ['TCCT', 'GCCT'] #units = ['TCCT', 'GCCT']
...@@ -95,39 +94,47 @@ for line in lines: ...@@ -95,39 +94,47 @@ for line in lines:
else: else:
sequences[label] = [string.strip()] sequences[label] = [string.strip()]
#select = 'Amel' select = 'D13S317'
#unit_list = ['TATC'] unit_list = ['TATC']
#reference = sequences[select][0] reference = sequences[select][0]
#sample = sequences[select][0] sample = sequences[select][0]
#description = describe_repeats(reference, sample, unit_list) description, _, _ = describe_repeats(reference, sample, unit_list)
#print 'l.{}'.format(description) print 'l.{}'.format(description)
for sequence in sequences: #for sequence in sequences:
best = 0 # best = 0
for string in sequences[sequence]: # for string in sequences[sequence]:
repeats = short_sequence_repeat_extractor(string, min_length) # repeats = short_sequence_repeat_extractor(string, min_length)
score = 0 # score = 0
for repeat in repeats: # for repeat in repeats:
if repeat.count + 1 >= min_count: # if repeat.count + 1 >= min_count:
score += (repeat.end - repeat.start) * (repeat.end - repeat.start) * (repeat.count + 1) # score += (repeat.end - repeat.start) * (repeat.end - repeat.start) * (repeat.count + 1)
if score > best: # if score > best:
reference = string # reference = string
best = score # best = score
repeats = short_sequence_repeat_extractor(reference, min_length) # repeats = short_sequence_repeat_extractor(reference, min_length)
units = {} # units = {}
for repeat in repeats: # for repeat in repeats:
if repeat.count + 1 >= min_count: # if repeat.count + 1 >= min_count:
units[reference[repeat.start:repeat.end]] = repeat.count + 1 # units[reference[repeat.start:repeat.end]] = repeat.count + 1
unit_list = [] # unit_list = []
for unit in units: # for unit in units:
unit_list.append(unit) # unit_list.append(unit)
print sequence, unit_list # print sequence,
reference = sequences[sequence][0] # if best > 0:
for string in sequences[sequence]: # print unit_list
description = describe_repeats(reference, string, unit_list) # else:
print 'l.{}'.format(description) # print 'no repeat unit identified'
print # reference = sequences[sequence][0]
# for string in sequences[sequence]:
# if best > 0:
# description, _, _ = describe_repeats(reference, string, unit_list)
# else:
# description = describe_dna(reference, string)
# print 'l.{}'.format(description)
# print
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment