Commit dffb83c5 authored by jkvis's avatar jkvis

temp. Fixed duplicates in transpositions: this needs re-thinking when refactoring

parent 98670dd8
...@@ -127,9 +127,12 @@ def var_to_dna_var(s1, s2, var, seq_list=[], weight_position=1): ...@@ -127,9 +127,12 @@ def var_to_dna_var(s1, s2, var, seq_list=[], weight_position=1):
'$' not in s1[var.reference_start - ins_length:var.reference_start] and '$' not in s1[var.reference_start - ins_length:var.reference_start] and
'$' not in s2[var.sample_start:var.sample_end] and '$' not in s2[var.sample_start:var.sample_end] and
s1[var.reference_start - ins_length:var.reference_start] == s1[var.reference_start - ins_length:var.reference_start] ==
s2[var.sample_start:var.sample_end] and
s2[var.sample_start - ins_length:var.sample_start] ==
s2[var.sample_start:var.sample_end]): s2[var.sample_start:var.sample_end]):
# NOTE: We may want to omit the inserted / deleted sequence and # NOTE: We may want to omit the inserted / deleted sequence and
# use the ranges instead. # use the ranges instead.
return DNAVar(start=var.reference_start - ins_length + 1, return DNAVar(start=var.reference_start - ins_length + 1,
end=var.reference_end, type='dup', shift=shift, end=var.reference_end, type='dup', shift=shift,
sample_start=var.sample_start + 1, sample_end=var.sample_end, sample_start=var.sample_start + 1, sample_end=var.sample_end,
...@@ -310,6 +313,7 @@ def describe_dna(s1, s2): ...@@ -310,6 +313,7 @@ def describe_dna(s1, s2):
s2_swig[0], s2_swig[1], extractor.TYPE_DNA) s2_swig[0], s2_swig[1], extractor.TYPE_DNA)
for variant in extracted.variants: for variant in extracted.variants:
if variant.type & extractor.TRANSPOSITION_OPEN: if variant.type & extractor.TRANSPOSITION_OPEN:
if not in_transposition: if not in_transposition:
seq_list = ISeqList() seq_list = ISeqList()
......
...@@ -94,36 +94,36 @@ for line in lines: ...@@ -94,36 +94,36 @@ for line in lines:
else: else:
sequences[label] = [string.strip()] sequences[label] = [string.strip()]
literature = { standard = {
'Amel': [], 'Amel': [],
'CSF1P0': ['AGAT'], 'CSF1P0': ['AGAT'],
'D10S1248': ['GGAA'], 'D10S1248': ['GGAA'],
'D12S391': ['AGAT', 'AGAC'], 'D12S391': ['AGAT', 'AGAC'],
'D13S317': ['TATC'], 'D13S317': ['TATC'],
'D16S539': ['GATA'], 'D16S539': ['GATA'],
'D18S51': ['GAAA'], 'D18S51': ['GAAA'],
'D19S433': ['AAGG'], 'D19S433': ['AAGG'],
'D1S1656': ['TAGA', 'TG'], 'D1S1656': ['TAGA', 'TG'],
'D21S11': ['TCTA', 'TCTG'], 'D21S11': ['TCTA', 'TCTG'],
'D22S1045': ['ATT'], 'D22S1045': ['ATT'],
'D2S1338': ['TGCC', 'TTCC'], 'D2S1338': ['TGCC', 'TTCC'],
'D2S441': ['TCTA'], 'D2S441': ['TCTA'],
'D3S1358': ['AGAT', 'TCTA'], 'D3S1358': ['AGAT', 'TCTA'],
'D5S818': ['AGAT'], 'D5S818': ['AGAT'],
'D7S820': ['GATA'], 'D7S820': ['GATA'],
'D8S1179': ['TATC'], 'D8S1179': ['TATC'],
'FGA': ['TTTC', 'CTTT', 'TTCC'], 'FGA': ['TTTC', 'CTTT', 'TTCC'],
'PentaD': ['AAAGA'], 'PentaD': ['AAAGA'],
'PentaE': ['AAAGA'], 'PentaE': ['AAAGA'],
'TH01': ['TCAT'], 'TH01': ['TCAT'],
'TPOX': ['AATG'], 'TPOX': ['AATG'],
'vWA': ['TCTA', 'TCTG', 'TCCA'], 'vWA': ['TCTA', 'TCTG', 'TCCA'],
'DYS391': ['TCTA'] 'DYS391': ['TCTA']
} }
#select = 'D13S317' #select = 'D13S317'
#unit_list = literature[select] #unit_list = standard[select]
#reference = sequences[select][0] #reference = sequences[select][0]
#sample = sequences[select][14] #sample = sequences[select][14]
#description, rep_start, rep_end = describe_repeats(reference, sample, unit_list) #description, rep_start, rep_end = describe_repeats(reference, sample, unit_list)
...@@ -152,7 +152,7 @@ for sequence in sequences: ...@@ -152,7 +152,7 @@ for sequence in sequences:
# for unit in units: # for unit in units:
# unit_list.append(unit) # unit_list.append(unit)
unit_list = literature[sequence] unit_list = standard[sequence]
reference = sequences[sequence][0] reference = sequences[sequence][0]
print sequence + ':', print sequence + ':',
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment