Commit e30c0408 authored by Hoogenboom, Jerry's avatar Hoogenboom, Jerry
Browse files

Allelefinder fixes and mtDNA allele naming

Fixed:
* The -c/--stuttermark-column option of Allelefinder was not filtering
  out the non-ALLELE sequences as it was supposed to. (This issue was
  introduced in ce7f34fb, in which a bug was fixed that caused this
  option to filter all sequences, including the ones marked with ALLELE.
  So it turns out this option has been broken since 732e83ba.)

Improved:
* Allelefinder will no longer reject a marker based on the number of
  reads of 'Other sequences'.
* Adjusted sequence alignment parameters for mtDNA sequences to produce
  allele names that more closely follow historical mtDNA mutation
  nomenclature.
parent 53fbe9ec
...@@ -172,6 +172,11 @@ def call_variants(template, sequence, location="suffix", cache=True, ...@@ -172,6 +172,11 @@ def call_variants(template, sequence, location="suffix", cache=True,
raise ValueError("Unknown location %r. It should be 'prefix', " raise ValueError("Unknown location %r. It should be 'prefix', "
"'suffix', or a tuple (chromosome, position [, endpos])" % "'suffix', or a tuple (chromosome, position [, endpos])" %
location) location)
elif location[0] == "M":
MATCH_SCORE = 1
MISMATCH_SCORE = -1
GAP_OPEN_SCORE = -2
GAP_EXTEND_SCORE = -1
for i in range(len(matrix_match)): for i in range(len(matrix_match)):
x = i % row_offset x = i % row_offset
......
...@@ -18,7 +18,7 @@ this file to do their job. One may use the allelefinder report ...@@ -18,7 +18,7 @@ this file to do their job. One may use the allelefinder report
of what might be wrong. of what might be wrong.
""" """
from ..lib import pos_int_arg, add_input_output_args, get_input_output_files, \ from ..lib import pos_int_arg, add_input_output_args, get_input_output_files, \
ensure_sequence_format, get_sample_data, \ ensure_sequence_format, get_sample_data, SEQ_SPECIAL_VALUES,\
add_sequence_format_args add_sequence_format_args
__version__ = "1.0.0" __version__ = "1.0.0"
...@@ -59,7 +59,7 @@ def find_alleles(samples_in, outfile, reportfile, min_reads, min_allele_pct, ...@@ -59,7 +59,7 @@ def find_alleles(samples_in, outfile, reportfile, min_reads, min_allele_pct,
lambda tag, data: find_alleles_sample( lambda tag, data: find_alleles_sample(
data if stuttermark_column is None data if stuttermark_column is None
else {key: data[key] for key in data if key[0] in else {key: data[key] for key in data if key[0] in
allelelist[tag]}, allelelist[tag] and key[1] in allelelist[tag][key[0]]},
outfile, reportfile, tag, min_reads, min_allele_pct, max_noise_pct, outfile, reportfile, tag, min_reads, min_allele_pct, max_noise_pct,
max_alleles, max_noisy, seqformat, library), max_alleles, max_noisy, seqformat, library),
allelelist, allelelist,
...@@ -81,10 +81,10 @@ def find_alleles_sample(data, outfile, reportfile, tag, min_reads, ...@@ -81,10 +81,10 @@ def find_alleles_sample(data, outfile, reportfile, tag, min_reads,
top_allele[marker] = 0 top_allele[marker] = 0
top_noise[marker] = ["-", 0] top_noise[marker] = ["-", 0]
if sequence == "Other sequences" and reads >= top_noise[marker][1]: if sequence in SEQ_SPECIAL_VALUES:
# Aggregated sequences are new highest noise! continue
top_noise[marker] = [sequence, reads]
elif reads > top_allele[marker]: if reads > top_allele[marker]:
# New highest allele! # New highest allele!
top_allele[marker] = reads top_allele[marker] = reads
for allele in alleles[marker].keys(): for allele in alleles[marker].keys():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment