Commit b30bdbbc authored by jhoogenboom's avatar jhoogenboom
Browse files

Greatly increased argument help

* All tools now have a longer description in the tool-specific help
  page.
* Arguments are now presented in groups and the order is the same
  across tools.

Furthermore:
* Fixed bug that rendered BGHomStats and BGEstimate with the -H
  option useless.
* The report of Allelefinder and BGEstimate is now written to
  sys.stderr by default. This means the report is now always
  generated (but it may be sent directly to /dev/null explicitly by
  the user). The big plus is that the progress of the tools is
  visible in the terminal when the tools are run by hand.
parent 732e83ba
#!/usr/bin/env python
import argparse, pkgutil, os
import argparse, pkgutil, os, re, textwrap
import tools
from . import usage, version
......@@ -35,11 +35,24 @@ class _VersionAction(argparse.Action):
#_VersionAction
class _HelpFormatter(argparse.HelpFormatter):
_pat_paragraph_delim = re.compile("\n\n+")
def _fill_text(self, text, width, indent):
# Reflow (wrap) description text, but maintain paragraphs.
return "\n\n".join(
textwrap.fill(self._whitespace_matcher.sub(" ", p).strip(), width,
initial_indent=indent, subsequent_indent=indent)
for p in self._pat_paragraph_delim.split(text))
#_fill_text
#_HelpFormatter
def main():
"""
Main entry point.
"""
parser = argparse.ArgumentParser(add_help=False, description=usage[0])
parser = argparse.ArgumentParser(formatter_class=_HelpFormatter,
add_help=False, description=usage[0])
parser.version = version(parser.prog)
parser.add_argument('-h', '--help', action=_HelpAction,
default=argparse.SUPPRESS, nargs=argparse.REMAINDER,
......@@ -62,7 +75,9 @@ def main():
"tools")]):
module = importer.find_module(prefix + name).load_module(prefix + name)
subparser = subparsers.add_parser(
name, help=module.__doc__.split("\n\n\n", 1)[0],
name,
formatter_class=_HelpFormatter,
help=module.__doc__.split("\n\n", 1)[0],
description=module.__doc__,
version=version(parser.prog, name, module.__version__))
__tools__[name] = subparser
......
......@@ -35,7 +35,7 @@ PAT_SPLIT = re.compile("[,; ]+")
# Default regular expression to capture sample tags in file names.
# This is the default of the -e command line option.
DEF_TAG_EXPR = "^(.+?)(?:\.[^.]+)?$"
DEF_TAG_EXPR = "^(.*?)(?:\.[^.]+)?$"
# Default formatting template to write sample tags.
# This is the default of the -f command line option.
......@@ -960,9 +960,9 @@ def get_sample_data(tags_to_files, callback, allelelist=None,
else:
allelelist[tag] = {}
if homozygotes:
for marker in allelelist[tag].keys():
if len(allelelist[tag][marker]) > 1:
del allelelist[tag][marker]
for markerx in allelelist[tag].keys():
if len(allelelist[tag][markerx]) > 1:
del allelelist[tag][markerx]
callback(tag, data)
#get_sample_data
......@@ -1014,10 +1014,12 @@ def pos_int_arg(value):
def add_allele_detection_args(parser):
parser.add_argument('-a', '--allelelist', metavar="ALLELEFILE",
group = parser.add_argument_group("allele detection options")
group.add_argument('-a', '--allelelist', metavar="ALLELEFILE",
type=argparse.FileType('r'),
help="file containing a list of the true alleles of each sample")
parser.add_argument('-c', '--annotation-column', metavar="COLNAME",
help="file containing a list of the true alleles of each sample "
"(e.g., obtained from allelefinder)")
group.add_argument('-c', '--annotation-column', metavar="COLNAME",
help="name of a column in the sample files, which contains a value "
"beginning with 'ALLELE' for the true alleles of the sample")
#add_allele_detection_args
......@@ -1027,20 +1029,64 @@ def add_sample_files_args(parser):
"""Add arguments for opening sample files to the given parser."""
parser.add_argument('filelist', nargs='*', metavar="FILE",
default=[sys.stdin], type=argparse.FileType('r'),
help="the data file(s) to process (default: read from stdin)")
parser.add_argument('-e', '--tag-expr', metavar="REGEX", type=re.compile,
help="the sample data file(s) to process (default: read from stdin)")
group = parser.add_argument_group("sample tag parsing options")
group.add_argument('-e', '--tag-expr', metavar="REGEX", type=re.compile,
default=DEF_TAG_EXPR,
help="regular expression that captures (using one or more capturing "
"groups) the sample tags from the file names; by default, the "
"entire file name except for its extension (if any) is captured")
parser.add_argument('-f', '--tag-format', metavar="EXPR",
group.add_argument('-f', '--tag-format', metavar="EXPR",
default=DEF_TAG_FORMAT,
help="format of the sample tags produced; a capturing group reference "
"like '\\n' refers to the n-th capturing group in the regular "
"expression specified with -e/--tag-expr (the default of '\\1' "
"simply uses the first capturing group); with a single sample, "
"you can enter the samle tag here explicitly")
#add_sample_fils_args
"you can enter the sample tag here explicitly")
#add_sample_files_args
def add_output_args(parser, report=True):
group = parser.add_argument_group("output destination options")
group.add_argument('-o', '--output', metavar="FILE",
type=argparse.FileType('w'),
default=sys.stdout,
help="file to write output to (default: write to stdout)")
if report:
group.add_argument('-r', '--report', metavar="FILE",
type=argparse.FileType('w'),
default=sys.stderr,
help="file to write a report to (default: write to stderr)")
#add_output_args
def add_sequence_format_args(parser, default_format=None, force=False):
group = parser.add_argument_group("sequence format options")
if force:
group.set_defaults(sequence_format=default_format)
else:
group.add_argument('-F', '--sequence-format', metavar="FORMAT",
choices=("raw", "tssv", "allelename"),
default=default_format,
help="convert sequences to the specified format: one of "
"%(choices)s (default: " + (
"no conversion" if default_format is None else default_format)
+ ")")
group.add_argument('-l', '--library', metavar="LIBRARY",
type=argparse.FileType('r'),
help="library file for sequence format conversion")
#add_sequence_format_args
def add_random_subsampling_args(parser):
group = parser.add_argument_group("random subsampling options (advanced)")
group.add_argument('-R', '--limit-reads', metavar="N", type=pos_int_arg,
default=sys.maxint,
help="simulate lower sequencing depth by randomly dropping reads down "
"to this maximum total number of reads for each sample")
group.add_argument('-x', '--drop-samples', metavar="N", type=float,
default=0, help="randomly drop this fraction of input samples")
#add_random_subsampling_args
def get_tag(filename, tag_expr, tag_format):
......
......@@ -2,13 +2,20 @@
"""
Find true alleles in reference samples and detect possible
contaminations.
In each sample, the sequences with the highest read counts of each
marker are called alleles, with a user-defined maximum number of alleles
par marker. The allele balance is kept within given bounds. If the
highest non-allelic sequence exceeds a given limit, no alleles are
called for this marker. If this happens for multiple markers in one
sample, no alleles are called for this sample at all.
"""
import argparse
import sys
from ..lib import get_column_ids, pos_int_arg, map_tags_to_files, \
add_sample_files_args, ensure_sequence_format, \
get_sample_data
get_sample_data, add_sequence_format_args, add_output_args
__version__ = "0.1dev"
......@@ -40,29 +47,29 @@ _DEF_MAX_ALLELES = 2
_DEF_MAX_NOISY = 2
def find_alleles(filelist, reportfile, tag_expr, tag_format, min_reads,
min_allele_pct, max_noise_pct, max_alleles, max_noisy,
stuttermark_column, seqformat, library):
def find_alleles(filelist, outfile, reportfile, tag_expr, tag_format,
min_reads, min_allele_pct, max_noise_pct, max_alleles,
max_noisy, stuttermark_column, seqformat, library):
if seqformat is not None and library is not None:
library = parse_library(library)
print("\t".join(["sample", "marker", "total", "allele"]))
outfile.write("\t".join(["sample", "marker", "total", "allele"]) + "\n")
allelelist = {}
get_sample_data(
map_tags_to_files(filelist, tag_expr, tag_format),
lambda tag, data: find_alleles_sample(
data if stuttermark_column is None
else {key: data[key] for key in allelelist[tag]},
reportfile, tag, min_reads, min_allele_pct, max_noise_pct,
outfile, reportfile, tag, min_reads, min_allele_pct, max_noise_pct,
max_alleles, max_noisy, seqformat, library),
allelelist,
stuttermark_column)
#find_alleles
def find_alleles_sample(data, reportfile, tag, min_reads, min_allele_pct,
max_noise_pct, max_alleles, max_noisy, seqformat,
library):
def find_alleles_sample(data, outfile, reportfile, tag, min_reads,
min_allele_pct, max_noise_pct, max_alleles, max_noisy,
seqformat, library):
top_noise = {}
top_allele = {}
alleles = {}
......@@ -96,11 +103,10 @@ def find_alleles_sample(data, reportfile, tag, min_reads, min_allele_pct,
noisy_markers = 0
for marker in alleles:
if top_allele[marker] < min_reads:
if reportfile:
reportfile.write(
"Sample %s is not suitable for marker %s:\n"
"highest allele has only %i reads\n\n" %
(tag, marker, top_allele[marker]))
reportfile.write(
"Sample %s is not suitable for marker %s:\n"
"highest allele has only %i reads\n\n" %
(tag, marker, top_allele[marker]))
alleles[marker] = {}
continue
if len(alleles[marker]) > max_alleles:
......@@ -111,31 +117,27 @@ def find_alleles_sample(data, reportfile, tag, min_reads, min_allele_pct,
alleles[marker] = {x: alleles[marker][x]
for x in allele_order[:max_alleles]}
if top_noise[marker][1] > top_allele[marker]*(max_noise_pct/100.):
if reportfile:
reportfile.write(
"Sample %s is not suitable for marker %s:\n"
"highest non-allele is %.1f%% of the highest allele\n" %
(tag, marker,
100.*top_noise[marker][1]/top_allele[marker]))
for allele in sorted(alleles[marker],
key=lambda x: -alleles[marker][x]):
seq = allele if seqformat is None \
else ensure_sequence_format(allele, seqformat,
library=library, marker=marker)
reportfile.write("%i\tALLELE\t%s\n" %
(alleles[marker][allele], seq))
seq = top_noise[marker][0] if seqformat is None \
else ensure_sequence_format(top_noise[marker][0],
seqformat, library=library, marker=marker)
reportfile.write("%i\tNOISE\t%s\n\n" %
(top_noise[marker][1], seq))
reportfile.write(
"Sample %s is not suitable for marker %s:\n"
"highest non-allele is %.1f%% of the highest allele\n" %
(tag, marker, 100.*top_noise[marker][1]/top_allele[marker]))
for allele in sorted(alleles[marker],
key=lambda x: -alleles[marker][x]):
seq = allele if seqformat is None \
else ensure_sequence_format(allele, seqformat,
library=library, marker=marker)
reportfile.write("%i\tALLELE\t%s\n" %
(alleles[marker][allele], seq))
seq = top_noise[marker][0] if seqformat is None \
else ensure_sequence_format(top_noise[marker][0],
seqformat, library=library, marker=marker)
reportfile.write("%i\tNOISE\t%s\n\n" % (top_noise[marker][1], seq))
noisy_markers += 1
alleles[marker] = {}
# Drop this sample completely if it has too many noisy markers.
if noisy_markers > max_noisy:
if reportfile:
reportfile.write("Sample %s appears to be contaminated!\n\n" % tag)
reportfile.write("Sample %s appears to be contaminated!\n\n" % tag)
return
# The sample is OK, write out its alleles.
......@@ -144,49 +146,41 @@ def find_alleles_sample(data, reportfile, tag, min_reads, min_allele_pct,
key=lambda x: -alleles[marker][x]):
seq = allele if seqformat is None else ensure_sequence_format(
allele, seqformat, library=library, marker=marker)
print("\t".join(
[tag, marker, str(alleles[marker][allele]), seq]))
outfile.write("\t".join(
[tag, marker, str(alleles[marker][allele]), seq]) + "\n")
#find_alleles_sample
def add_arguments(parser):
add_sample_files_args(parser)
parser.add_argument('-r', '--report', metavar="OUTFILE",
type=argparse.FileType("w"),
help="write a report to the given file, detailing possibly "
"contaminated or otherwise unsuitable samples")
parser.add_argument('-n', '--min-reads', metavar="N", type=pos_int_arg,
default=_DEF_MIN_READS,
help="require at least this number of reads for the highest allele "
"(default: %(default)s)")
parser.add_argument('-m', '--min-allele-pct', metavar="PCT", type=float,
default=_DEF_MIN_ALLELE_PCT,
add_output_args(parser)
filtergroup = parser.add_argument_group("filtering options")
filtergroup.add_argument('-m', '--min-allele-pct', metavar="PCT",
type=float, default=_DEF_MIN_ALLELE_PCT,
help="call heterozygous if the second allele is at least this "
"percentage of the highest allele (default: %(default)s)")
parser.add_argument('-M', '--max-noise-pct', metavar="PCT", type=float,
default=_DEF_MAX_NOISE_PCT,
filtergroup.add_argument('-M', '--max-noise-pct', metavar="PCT",
type=float, default=_DEF_MAX_NOISE_PCT,
help="a sample is considered contaminated/unsuitable for a marker if "
"the highest non-allelic sequence is at least this percentage of "
"the highest allele (default: %(default)s)")
parser.add_argument('-a', '--max-alleles', metavar="N", type=pos_int_arg,
default=_DEF_MAX_ALLELES,
filtergroup.add_argument('-n', '--min-reads', metavar="N",
type=pos_int_arg, default=_DEF_MIN_READS,
help="require at least this number of reads for the highest allele "
"(default: %(default)s)")
filtergroup.add_argument('-a', '--max-alleles', metavar="N",
type=pos_int_arg, default=_DEF_MAX_ALLELES,
help="allow no more than this number of alleles per marker (default: "
"%(default)s)")
parser.add_argument('-x', '--max-noisy', metavar="N", type=pos_int_arg,
default=_DEF_MAX_NOISY,
filtergroup.add_argument('-x', '--max-noisy', metavar="N",
type=pos_int_arg, default=_DEF_MAX_NOISY,
help="entirely reject a sample if more than this number of markers "
"have a high non-allelic sequence (default: %(default)s)")
parser.add_argument('-c', '--stuttermark-column', metavar="COLNAME",
filtergroup.add_argument('-c', '--stuttermark-column', metavar="COLNAME",
help="name of column with Stuttermark output; if specified, sequences "
"for which the value in this column does not start with ALLELE "
"are ignored")
parser.add_argument('-F', '--sequence-format', metavar="FORMAT",
choices=("raw", "tssv", "allelename"),
help="convert sequences to the specified format: one of %(choices)s "
"(default: no conversion)")
parser.add_argument('-l', '--library', metavar="LIBRARY",
type=argparse.FileType('r'),
help="library file for sequence format conversion")
add_sequence_format_args(parser)
add_sample_files_args(parser)
#add_arguments
......@@ -195,10 +189,10 @@ def run(args):
raise ValueError("please specify an input file, or pipe in the output "
"of another program")
find_alleles(args.filelist, args.report, args.tag_expr, args.tag_format,
args.min_reads, args.min_allele_pct, args.max_noise_pct,
args.max_alleles, args.max_noisy, args.stuttermark_column,
args.sequence_format, args.library)
find_alleles(args.filelist, args.output, args.report, args.tag_expr,
args.tag_format, args.min_reads, args.min_allele_pct,
args.max_noise_pct, args.max_alleles, args.max_noisy,
args.stuttermark_column, args.sequence_format, args.library)
#run
......
#!/usr/bin/env python
"""
Match background noise profiles to samples.
Match background noise profiles (obtained from e.g., bgestimate) to
samples.
Six new columns are added to the output giving, for each sequence, the
number of reads attributable to noise from other sequences (_noise
columns) and the number of noise reads caused by the prescense of this
sequence (_add columns).
"""
import argparse
import sys
#import numpy as np # Only imported when actually running this tool.
from ..lib import parse_library, load_profiles, ensure_sequence_format, nnls, \
get_column_ids
get_column_ids, add_sequence_format_args
__version__ = "0.1dev"
......@@ -169,15 +175,10 @@ def add_arguments(parser):
parser.add_argument('outfile', nargs='?', metavar="OUT",
default=sys.stdout, type=argparse.FileType('w'),
help="the file to write the output to (default: write to stdout)")
parser.add_argument('-F', '--sequence-format', metavar="FORMAT",
choices=("raw", "tssv", "allelename"),
help="convert sequences to the specified format: one of %(choices)s "
"(default: no conversion)")
parser.add_argument('-l', '--library', metavar="LIBRARY",
type=argparse.FileType('r'),
help="library file for sequence format conversion")
parser.add_argument('-M', '--marker', metavar="MARKER",
filtergroup = parser.add_argument_group("filtering options")
filtergroup.add_argument('-M', '--marker', metavar="MARKER",
help="work only on MARKER")
add_sequence_format_args(parser)
#add_arguments
......
#!/usr/bin/env python
"""
Estimate allele-centric background noise profiles (means).
Estimate allele-centric background noise profiles (means) from reference
samples.
Compute a profile of recurring background noise for each unique allele
in the database of reference samples. The profiles obtained can be used
by bgcorrect to filter background noise from samples.
"""
import argparse
import sys
......@@ -11,7 +16,8 @@ import math
from ..lib import get_column_ids, pos_int_arg, add_sample_files_args,\
add_allele_detection_args, map_tags_to_files, nnls,\
ensure_sequence_format, parse_allelelist, parse_library,\
get_sample_data
get_sample_data, add_random_subsampling_args,\
add_sequence_format_args, add_output_args
__version__ = "0.1dev"
......@@ -261,10 +267,10 @@ def ensure_min_samples(allelelist, min_samples):
if marker not in allelelist[tag]:
continue
for true_allele in allelelist[tag][marker]:
if true_allele not in true_alleles:
true_alleles[true_allele] = 1
else:
try:
true_alleles[true_allele] += 1
except KeyError:
true_alleles[true_allele] = 1
# Drop any alleles that occur in less than min_samples samples
# (by dropping the sample for this marker completely).
......@@ -392,7 +398,7 @@ def preprocess_data(data, min_sample_pct):
def generate_profiles(filelist, tag_expr, tag_format, allelefile,
annotation_column, reportfile, min_pct, min_abs,
annotation_column, outfile, reportfile, min_pct, min_abs,
min_samples, min_sample_pct, seqformat, library,
crosstab, marker, homozygotes, limit_reads,
drop_samples):
......@@ -431,7 +437,8 @@ def generate_profiles(filelist, tag_expr, tag_format, allelefile,
(t1-t0))
if not crosstab:
print("\t".join(["marker", "allele", "sequence", "fmean", "rmean"]))
outfile.write("\t".join(
["marker", "allele", "sequence", "fmean", "rmean"]) + "\n")
for marker in data.keys():
p = data[marker]["profiles"]
profile_size = len(p["alleles"])
......@@ -464,12 +471,14 @@ def generate_profiles(filelist, tag_expr, tag_format, allelefile,
if crosstab:
# Cross-tabular output (profiles in rows)
print("\t".join([marker, "0"] + p["alleles"]))
outfile.write("\t".join([marker, "0"] + p["alleles"]) + "\n")
for i in range(p["true alleles"]):
print("\t".join(
[marker, str(i+1)] + map(str, p["profiles_forward"][i])))
print("\t".join(
[marker, str(-i-1)] + map(str, p["profiles_reverse"][i])))
outfile.write("\t".join(
[marker, str(i+1)] + map(str, p["profiles_forward"][i])) +
"\n")
outfile.write("\t".join(
[marker, str(-i-1)] + map(str, p["profiles_reverse"][i])) +
"\n")
else:
# Tab-separated columns format.
for i in range(p["true alleles"]):
......@@ -477,57 +486,45 @@ def generate_profiles(filelist, tag_expr, tag_format, allelefile,
if not (p["profiles_forward"][i][j] +
p["profiles_reverse"][i][j]):
continue
print("\t".join([marker, p["alleles"][i], p["alleles"][j]]+
outfile.write("\t".join(
[marker, p["alleles"][i], p["alleles"][j]] +
map(str, [p["profiles_forward"][i][j],
p["profiles_reverse"][i][j]])))
p["profiles_reverse"][i][j]])) + "\n")
del data[marker]
#generate_profiles
def add_arguments(parser):
add_sample_files_args(parser)
add_output_args(parser)
parser.add_argument('-C', '--cross-tabular', action="store_true",
help="if specified, a space-efficient cross-tabular output format is "
"used instead of the default tab-separated columns format")
add_allele_detection_args(parser)
parser.add_argument('-r', '--report', metavar="OUTFILE",
type=argparse.FileType("w"),
help="write a report to the given file")
parser.add_argument('-m', '--min-pct', metavar="PCT", type=float,
filtergroup = parser.add_argument_group("filtering options")
filtergroup.add_argument('-m', '--min-pct', metavar="PCT", type=float,
default=_DEF_THRESHOLD_PCT,
help="minimum amount of background to consider, as a percentage "
"of the highest allele (default: %4.2f)" % _DEF_THRESHOLD_PCT)
parser.add_argument('-n', '--min-abs', metavar="N", type=pos_int_arg,
filtergroup.add_argument('-n', '--min-abs', metavar="N", type=pos_int_arg,
default=_DEF_THRESHOLD_ABS,
help="minimum amount of background to consider, as an absolute "
"number of reads (default: %(default)s)")
parser.add_argument('-s', '--min-samples', metavar="N", type=pos_int_arg,
default=_DEF_MIN_SAMPLES,
filtergroup.add_argument('-s', '--min-samples', metavar="N",
type=pos_int_arg, default=_DEF_MIN_SAMPLES,
help="require this minimum number of samples for each true allele "
"(default: %(default)s)")
parser.add_argument('-S', '--min-sample-pct', metavar="PCT", type=float,
default=_DEF_MIN_SAMPLE_PCT,
filtergroup.add_argument('-S', '--min-sample-pct', metavar="PCT",
type=float, default=_DEF_MIN_SAMPLE_PCT,
help="require this minimum number of samples for each background "
"product, as a percentage of the number of samples with a "
"particular true allele (default: %(default)s)")
#parser.add_argument('-F', '--sequence-format', metavar="FORMAT",
# choices=("raw", "tssv", "allelename"),
# help="convert sequences to the specified format: one of %(choices)s "
# "(default: no conversion)")
parser.set_defaults(sequence_format="raw") # Force raw sequences.
parser.add_argument('-l', '--library', metavar="LIBRARY",
type=argparse.FileType('r'),
help="library file for sequence format conversion")
parser.add_argument('-C', '--cross-tabular', action="store_true",
help="if specified, a space-efficient cross-tabular output format is "
"used instead of the default tab-separated columns format")
parser.add_argument('-M', '--marker', metavar="MARKER",
filtergroup.add_argument('-M', '--marker', metavar="MARKER",
help="work only on MARKER")
parser.add_argument('-H', '--homozygotes', action="store_true",
filtergroup.add_argument('-H', '--homozygotes', action="store_true",
help="if specified, only homozygous samples will be considered")
parser.add_argument('-R', '--limit-reads', metavar="N", type=pos_int_arg,
default=sys.maxint,
help="simulate lower sequencing depth by randomly dropping reads down "
"to this maximum total number of reads for each sample")
parser.add_argument('-x', '--drop-samples', metavar="N", type=float,
default=0, help="randomly drop this fraction of input samples")
add_sequence_format_args(parser, "raw", True) # Force raw seqs.
add_sample_files_args(parser)
add_random_subsampling_args(parser)
#add_arguments
......@@ -536,11 +533,12 @@ def run(args):
raise ValueError("please specify an input file, or pipe in the output "
"of another program")
generate_profiles(args.filelist, args.tag_expr, args.tag_format,
args.allelelist, args.annotation_column, args.report,
args.min_pct, args.min_abs, args.min_samples,
args.min_sample_pct, args.sequence_format, args.library,
args.cross_tabular, args.marker, args.homozygotes,
args.limit_reads, args.drop_samples)
args.allelelist, args.annotation_column, args.output,
args.report, args.min_pct, args.min_abs,
args.min_samples, args.min_sample_pct,
args.sequence_format, args.library, args.cross_tabular,
args.marker, args.homozygotes, args.limit_reads,
args.drop_samples)
#run
......
#!/usr/bin/env python
"""
Compute allele-centric statistics for background noise in homozygous samples
(min, max, mean, sample variance).
Compute allele-centric statistics for background noise in homozygous
reference samples (min, max, mean, sample variance).
Compute a profile of recurring background noise for each unique allele
in the database of reference samples. The profiles obtained can be used
by bgcorrect to filter background noise from samples.
"""
import argparse
import sys
......@@ -9,7 +13,8 @@ import sys
from ..lib import get_column_ids, pos_int_arg, add_sample_files_args,\
add_allele_detection_args, map_tags_to_files, adjust_stats,\
ensure_sequence_format, parse_allelelist, parse_library,\
get_sample_data
get_sample_data, add_sequence_format_args, add_output_args,\
add_random_subsampling_args
__version__ = "0.1dev"
......@@ -84,7 +89,7 @@ def filter_data(data, min_samples, min_sample_pct):
def compute_stats(filelist, tag_expr, tag_format, allelefile,
annotation_column, min_pct, min_abs, min_samples,