Commit e30c0408 authored by Hoogenboom, Jerry's avatar Hoogenboom, Jerry

Allelefinder fixes and mtDNA allele naming

Fixed:
* The -c/--stuttermark-column option of Allelefinder was not filtering
  out the non-ALLELE sequences as it was supposed to. (This issue was
  introduced in ce7f34fb, in which a bug was fixed that caused this
  option to filter all sequences, including the ones marked with ALLELE.
  So it turns out this option has been broken since 732e83ba.)

Improved:
* Allelefinder will no longer reject a marker based on the number of
  reads of 'Other sequences'.
* Adjusted sequence alignment parameters for mtDNA sequences to produce
  allele names that more closely follow historical mtDNA mutation
  nomenclature.
parent 53fbe9ec
......@@ -172,6 +172,11 @@ def call_variants(template, sequence, location="suffix", cache=True,
raise ValueError("Unknown location %r. It should be 'prefix', "
"'suffix', or a tuple (chromosome, position [, endpos])" %
location)
elif location[0] == "M":
MATCH_SCORE = 1
MISMATCH_SCORE = -1
GAP_OPEN_SCORE = -2
GAP_EXTEND_SCORE = -1
for i in range(len(matrix_match)):
x = i % row_offset
......
......@@ -18,7 +18,7 @@ this file to do their job. One may use the allelefinder report
of what might be wrong.
"""
from ..lib import pos_int_arg, add_input_output_args, get_input_output_files, \
ensure_sequence_format, get_sample_data, \
ensure_sequence_format, get_sample_data, SEQ_SPECIAL_VALUES,\
add_sequence_format_args
__version__ = "1.0.0"
......@@ -59,7 +59,7 @@ def find_alleles(samples_in, outfile, reportfile, min_reads, min_allele_pct,
lambda tag, data: find_alleles_sample(
data if stuttermark_column is None
else {key: data[key] for key in data if key[0] in
allelelist[tag]},
allelelist[tag] and key[1] in allelelist[tag][key[0]]},
outfile, reportfile, tag, min_reads, min_allele_pct, max_noise_pct,
max_alleles, max_noisy, seqformat, library),
allelelist,
......@@ -81,10 +81,10 @@ def find_alleles_sample(data, outfile, reportfile, tag, min_reads,
top_allele[marker] = 0
top_noise[marker] = ["-", 0]
if sequence == "Other sequences" and reads >= top_noise[marker][1]:
# Aggregated sequences are new highest noise!
top_noise[marker] = [sequence, reads]
elif reads > top_allele[marker]:
if sequence in SEQ_SPECIAL_VALUES:
continue
if reads > top_allele[marker]:
# New highest allele!
top_allele[marker] = reads
for allele in alleles[marker].keys():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment