Commit 1083919c authored by Hoogenboom, Jerry's avatar Hoogenboom, Jerry
Browse files

Implemented support for non-STR markers, improved file handling and more

Additions and improvements to the FDSTools library file format:
* New [genome_position] section in FDSTools-style library files allows
for specifying the chromosome and position of each marker.
* New [no_repeat] section in FDSTools-style library files allows for
including non-STR markers.
* Comma/semicolon/space-separated values in FDSTools-style library files
can now also be separated by tab characters and multiple consecutive
separators are no longer collapsed (with the exception of whitespace).
* If no prefix and/or suffix has been specified for an alias, the
prefix/suffix of the marker itself is used.
* Implemented support for non-STR markers (e.g. SNP clusters) and mtDNA
markers. Allele names of the latter follow mtDNA nomenclature.
* Improved the logic of generating STR allele names for sequences that
have a prefix or suffix sequence that was not included in the library
file.
* Updated and clarified various explanatory texts in generated FDSTools
library files.

Fixed:
* Fixed a bug that caused prefix/suffix variants in aliases to go
missing in allele names.

Improved file handling:
* Library files are now closed immediately after parsing them.
* Sample data input files are opened one at a time now.

Visualisations:
* Updated Vega to version 2.3.1.
* Worked around a bug in Google Chrome that caused the 'Save image' link
to stop working after having been used once.
parent d96335b0
*.pyc *.pyc
dist/* dist/*
*.egg-info/* *.egg-info/*
/.project
""" """
Tools for characterisation and filtering of PCR stutter artefacts and other Tools for characterisation and filtering of PCR stutter artefacts and other
systemic noise in Next Generation Sequencing data of forensic STR markers. systemic noise in Next Generation Sequencing data of forensic DNA markers.
""" """
__version_info__ = ('0', '0', '2') __version_info__ = ('0', '0', '2')
......
...@@ -62,7 +62,7 @@ def main(): ...@@ -62,7 +62,7 @@ def main():
default=argparse.SUPPRESS, nargs=argparse.REMAINDER, default=argparse.SUPPRESS, nargs=argparse.REMAINDER,
help="show version number and exit") help="show version number and exit")
parser.add_argument('-d', "--debug", action="store_true", parser.add_argument('-d', "--debug", action="store_true",
help="if specified, debug output is printed to stdout") help="if specified, additional debug output is given")
subparsers = parser.add_subparsers(title='available tools', dest='tool', subparsers = parser.add_subparsers(title='available tools', dest='tool',
metavar='TOOL', help="specify which " metavar='TOOL', help="specify which "
"tool to run") "tool to run")
...@@ -82,7 +82,7 @@ def main(): ...@@ -82,7 +82,7 @@ def main():
version=version(parser.prog, name, module.__version__)) version=version(parser.prog, name, module.__version__))
__tools__[name] = subparser __tools__[name] = subparser
subparser.add_argument('-d', "--debug", action="store_true", subparser.add_argument('-d', "--debug", action="store_true",
help="if specified, debug output is printed to stdout") help="if specified, additional debug output is given")
module.add_arguments(subparser) module.add_arguments(subparser)
subparser.set_defaults(func=module.run) subparser.set_defaults(func=module.run)
try: try:
......
This diff is collapsed.
...@@ -18,7 +18,7 @@ this file to do their job. One may use the allelefinder report ...@@ -18,7 +18,7 @@ this file to do their job. One may use the allelefinder report
of what might be wrong. of what might be wrong.
""" """
from ..lib import pos_int_arg, add_input_output_args, get_input_output_files, \ from ..lib import pos_int_arg, add_input_output_args, get_input_output_files, \
ensure_sequence_format, get_sample_data, parse_library, \ ensure_sequence_format, get_sample_data, \
add_sequence_format_args add_sequence_format_args
__version__ = "0.1dev" __version__ = "0.1dev"
...@@ -50,7 +50,7 @@ _DEF_MAX_NOISY = 2 ...@@ -50,7 +50,7 @@ _DEF_MAX_NOISY = 2
def find_alleles(samples_in, outfile, reportfile, min_reads, min_allele_pct, def find_alleles(samples_in, outfile, reportfile, min_reads, min_allele_pct,
max_noise_pct, max_alleles, max_noisy, stuttermark_column, max_noise_pct, max_alleles, max_noisy, stuttermark_column,
seqformat, library): seqformat, library):
library = parse_library(library) if library is not None else {} library = library if library is not None else {}
outfile.write("\t".join(["sample", "marker", "total", "allele"]) + "\n") outfile.write("\t".join(["sample", "marker", "total", "allele"]) + "\n")
allelelist = {} allelelist = {}
......
...@@ -9,10 +9,10 @@ columns) and the number of noise reads caused by the prescense of this ...@@ -9,10 +9,10 @@ columns) and the number of noise reads caused by the prescense of this
sequence (_add columns), as well as the resulting number of reads after sequence (_add columns), as well as the resulting number of reads after
correction (_corrected columns: original minus _noise plus _add). correction (_corrected columns: original minus _noise plus _add).
""" """
import argparse import argparse, sys
#import numpy as np # Only imported when actually running this tool. #import numpy as np # Only imported when actually running this tool.
from ..lib import parse_library, load_profiles, ensure_sequence_format, nnls, \ from ..lib import load_profiles, ensure_sequence_format, nnls, \
get_column_ids, add_sequence_format_args, \ get_column_ids, add_sequence_format_args, \
add_input_output_args, get_input_output_files add_input_output_args, get_input_output_files
...@@ -203,8 +203,7 @@ def run(args): ...@@ -203,8 +203,7 @@ def run(args):
"of another program") "of another program")
# Read library and profiles once. # Read library and profiles once.
library = parse_library(args.library) if args.library else None profiles = load_profiles(args.profiles, args.library)
profiles = load_profiles(args.profiles, library)
if args.marker: if args.marker:
profiles = {args.marker: profiles[args.marker]} \ profiles = {args.marker: profiles[args.marker]} \
if args.marker in profiles else {} if args.marker in profiles else {}
...@@ -214,6 +213,9 @@ def run(args): ...@@ -214,6 +213,9 @@ def run(args):
if len(infiles) > 1: if len(infiles) > 1:
raise ValueError( raise ValueError(
"multiple input files for sample '%s' specified " % tag) "multiple input files for sample '%s' specified " % tag)
match_profiles(infiles[0], outfile, profiles, library, infile = sys.stdin if infiles[0] == "-" else open(infiles[0], "r")
match_profiles(infile, outfile, profiles, args.library,
args.sequence_format) args.sequence_format)
if infile != sys.stdin:
infile.close()
#run #run
...@@ -14,7 +14,7 @@ import math ...@@ -14,7 +14,7 @@ import math
from ..lib import pos_int_arg, add_input_output_args, get_input_output_files,\ from ..lib import pos_int_arg, add_input_output_args, get_input_output_files,\
add_allele_detection_args, nnls, add_sequence_format_args,\ add_allele_detection_args, nnls, add_sequence_format_args,\
parse_allelelist, parse_library, get_sample_data, \ parse_allelelist, get_sample_data, \
add_random_subsampling_args add_random_subsampling_args
__version__ = "0.1dev" __version__ = "0.1dev"
...@@ -401,8 +401,7 @@ def generate_profiles(samples_in, outfile, reportfile, allelefile, ...@@ -401,8 +401,7 @@ def generate_profiles(samples_in, outfile, reportfile, allelefile,
if reportfile: if reportfile:
t0 = time.time() t0 = time.time()
# Parse library and allele list. # Parse allele list.
library = parse_library(library) if library is not None else None
allelelist = {} if allelefile is None \ allelelist = {} if allelefile is None \
else parse_allelelist(allelefile, seqformat, library) else parse_allelelist(allelefile, seqformat, library)
......
...@@ -8,7 +8,7 @@ can be visualised using "fdstools vis bgraw". Use bghomstats or ...@@ -8,7 +8,7 @@ can be visualised using "fdstools vis bgraw". Use bghomstats or
bgestimate to compute aggregate statistics on noise instead. bgestimate to compute aggregate statistics on noise instead.
""" """
from ..lib import pos_int_arg, add_input_output_args, get_input_output_files,\ from ..lib import pos_int_arg, add_input_output_args, get_input_output_files,\
add_allele_detection_args, parse_allelelist, parse_library,\ add_allele_detection_args, parse_allelelist,\
get_sample_data, add_sequence_format_args get_sample_data, add_sequence_format_args
__version__ = "0.1dev" __version__ = "0.1dev"
...@@ -108,8 +108,7 @@ def compute_ratios(samples_in, outfile, allelefile, annotation_column, min_pct, ...@@ -108,8 +108,7 @@ def compute_ratios(samples_in, outfile, allelefile, annotation_column, min_pct,
min_abs, min_samples, min_sample_pct, seqformat, library, min_abs, min_samples, min_sample_pct, seqformat, library,
marker): marker):
# Parse library and allele list. # Parse allele list.
library = parse_library(library) if library is not None else None
allelelist = {} if allelefile is None \ allelelist = {} if allelefile is None \
else parse_allelelist(allelefile, seqformat, library) else parse_allelelist(allelefile, seqformat, library)
......
...@@ -11,7 +11,7 @@ markers), it is preferable to use bgestimate instead, since it can ...@@ -11,7 +11,7 @@ markers), it is preferable to use bgestimate instead, since it can
handle heterozygous samples as well. handle heterozygous samples as well.
""" """
from ..lib import pos_int_arg, add_input_output_args, get_input_output_files,\ from ..lib import pos_int_arg, add_input_output_args, get_input_output_files,\
add_allele_detection_args, parse_allelelist, parse_library,\ add_allele_detection_args, parse_allelelist,\
get_sample_data, add_sequence_format_args, adjust_stats,\ get_sample_data, add_sequence_format_args, adjust_stats,\
add_random_subsampling_args add_random_subsampling_args
...@@ -101,8 +101,7 @@ def compute_stats(samples_in, outfile, allelefile, annotation_column, min_pct, ...@@ -101,8 +101,7 @@ def compute_stats(samples_in, outfile, allelefile, annotation_column, min_pct,
min_abs, min_samples, min_sample_pct, seqformat, library, min_abs, min_samples, min_sample_pct, seqformat, library,
marker, limit_reads, drop_samples): marker, limit_reads, drop_samples):
# Parse library and allele list. # Parse allele list.
library = parse_library(library) if library is not None else None
allelelist = {} if allelefile is None \ allelelist = {} if allelefile is None \
else parse_allelelist(allelefile, seqformat, library) else parse_allelelist(allelefile, seqformat, library)
......
...@@ -19,19 +19,20 @@ Example: fdstools bgpredict ... | fdstools bgmerge old.txt > out.txt ...@@ -19,19 +19,20 @@ Example: fdstools bgpredict ... | fdstools bgmerge old.txt > out.txt
import argparse import argparse
import sys import sys
from ..lib import load_profiles, ensure_sequence_format, parse_library,\ from ..lib import load_profiles, ensure_sequence_format,\
add_sequence_format_args add_sequence_format_args
__version__ = "0.1dev" __version__ = "0.1dev"
def merge_profiles(infiles, outfile, crosstab, seqformat, library): def merge_profiles(infiles, outfile, crosstab, seqformat, library):
# Parse library file.
library = parse_library(library) if library is not None else None
amounts = {} amounts = {}
for infile in infiles: for infile in infiles:
profiles = load_profiles(infile, library) if infile == "-":
profiles = load_profiles(sys.stdin, library)
else:
with open(infile, "r") as handle:
profiles = load_profiles(handle, library)
for marker in profiles: for marker in profiles:
if marker not in amounts: if marker not in amounts:
amounts[marker] = {} amounts[marker] = {}
...@@ -82,7 +83,6 @@ def merge_profiles(infiles, outfile, crosstab, seqformat, library): ...@@ -82,7 +83,6 @@ def merge_profiles(infiles, outfile, crosstab, seqformat, library):
def add_arguments(parser): def add_arguments(parser):
parser.add_argument('infiles', nargs='+', metavar="FILE", parser.add_argument('infiles', nargs='+', metavar="FILE",
type=argparse.FileType('r'),
help="files containing the background noise profiles to combine; " help="files containing the background noise profiles to combine; "
"if a single file is given, it is merged with input from stdin; " "if a single file is given, it is merged with input from stdin; "
"use '-' to use stdin as an explicit input source") "use '-' to use stdin as an explicit input source")
...@@ -100,9 +100,9 @@ def add_arguments(parser): ...@@ -100,9 +100,9 @@ def add_arguments(parser):
def run(args): def run(args):
if len(args.infiles) < 2: if len(args.infiles) < 2:
if sys.stdin.isatty() or sys.stdin in args.infiles: if sys.stdin.isatty() or "-" in args.infiles:
raise ValueError("please specify at least two input files") raise ValueError("please specify at least two input files")
args.infiles.append(sys.stdin) args.infiles.append("-")
merge_profiles(args.infiles, args.outfile, args.cross_tabular, merge_profiles(args.infiles, args.outfile, args.cross_tabular,
args.sequence_format, args.library) args.sequence_format, args.library)
......
...@@ -28,7 +28,7 @@ import sys ...@@ -28,7 +28,7 @@ import sys
from operator import mul from operator import mul
from ..lib import get_column_ids, reverse_complement, get_repeat_pattern,\ from ..lib import get_column_ids, reverse_complement, get_repeat_pattern,\
mutate_sequence, parse_library,\ mutate_sequence,\
PAT_SEQ_RAW, ensure_sequence_format, add_sequence_format_args PAT_SEQ_RAW, ensure_sequence_format, add_sequence_format_args
__version__ = "0.1dev" __version__ = "0.1dev"
...@@ -209,8 +209,7 @@ def get_relative_frequencies(stutters, combinations): ...@@ -209,8 +209,7 @@ def get_relative_frequencies(stutters, combinations):
def predict_profiles(stuttermodel, seqsfile, outfile, marker_column, def predict_profiles(stuttermodel, seqsfile, outfile, marker_column,
allele_column, default_marker, use_all_data, crosstab, allele_column, default_marker, use_all_data, crosstab,
min_pct, min_r2, seqformat, library): min_pct, min_r2, seqformat, library):
# Parse library and stutter model file. # Parse stutter model file.
library = parse_library(library) if library is not None else None
model = parse_stuttermodel(stuttermodel, min_r2, use_all_data) model = parse_stuttermodel(stuttermodel, min_r2, use_all_data)
# Read list of sequences. # Read list of sequences.
......
...@@ -17,7 +17,7 @@ import argparse ...@@ -17,7 +17,7 @@ import argparse
from ..lib import pos_int_arg, add_input_output_args, get_input_output_files,\ from ..lib import pos_int_arg, add_input_output_args, get_input_output_files,\
add_allele_detection_args, nnls, ensure_sequence_format,\ add_allele_detection_args, nnls, ensure_sequence_format,\
parse_allelelist, load_profiles, add_sequence_format_args,\ parse_allelelist, load_profiles, add_sequence_format_args,\
parse_library, get_sample_data get_sample_data
__version__ = "0.1dev" __version__ = "0.1dev"
...@@ -60,8 +60,7 @@ def add_sample_data(data, sample_data, sample_tag, alleles): ...@@ -60,8 +60,7 @@ def add_sample_data(data, sample_data, sample_tag, alleles):
def blame(samples_in, outfile, allelefile, annotation_column, mode, def blame(samples_in, outfile, allelefile, annotation_column, mode,
profilefile, num, seqformat, libfile, marker): profilefile, num, seqformat, library, marker):
library = parse_library(libfile) if libfile else None
allelelist = {} if allelefile is None \ allelelist = {} if allelefile is None \
else parse_allelelist(allelefile, "raw", library) else parse_allelelist(allelefile, "raw", library)
data = load_profiles(profilefile, library) data = load_profiles(profilefile, library)
......
...@@ -15,6 +15,12 @@ prefix, the STR, and a suffix. The prefix and suffix are optional and ...@@ -15,6 +15,12 @@ prefix, the STR, and a suffix. The prefix and suffix are optional and
are meant to fill the gap between the STR and the primer binding sites. are meant to fill the gap between the STR and the primer binding sites.
The primer binding sites are called 'flanks' in the library file. The primer binding sites are called 'flanks' in the library file.
For non-STR markers, FDSTools library files simply contain the reference
sequence of the region between the flanks. All markers in TSSV library
files are assumed to be STR markers, but the libconvert tool will
include the non-STR markers on a best-effort basis when converting to
the TSSV format.
Allele names typically consist of an allele number compatible with those Allele names typically consist of an allele number compatible with those
obtained from Capillary Electrophoresis (CE), followed by the STR obtained from Capillary Electrophoresis (CE), followed by the STR
sequence in a shortened form and any substitutions or other variants sequence in a shortened form and any substitutions or other variants
...@@ -44,14 +50,13 @@ __version__ = "0.1dev" ...@@ -44,14 +50,13 @@ __version__ = "0.1dev"
# If no input is given, convert the following to FDSTools format. # If no input is given, convert the following to FDSTools format.
_DEFAULT_LIBRARY = "\t".join([ _DEFAULT_LIBRARY = "\t".join([
"MyMarker", "MyMarker",
"ACTAGCTAGCGCTA", "CTGTTTCTGAGTTTCAAGTATGTCTGAG",
"GCTCGATCGATCGA", "TTACATGCTCGTGCACCTTATGGAGGGG",
"TGAT 0 2 AGAT 3 20 ACCT 0 5"]) "GT 0 4 AGGGGA 1 1 GTGA 0 5 GT 8 25"])
def convert_library(infile, outfile, aliases=False): def convert_library(infile, outfile, aliases=False):
pattern_reverse = re.compile("\(([ACGT]+)\)\{(\d+),(\d+)\}") pattern_reverse = re.compile("\(([ACGT]+)\)\{(\d+),(\d+)\}")
library = parse_library(infile) library = parse_library(infile, stream=True)
if "aliases" in library: if "aliases" in library:
# FDSTools -> TSSV # FDSTools -> TSSV
markers = set() markers = set()
...@@ -63,6 +68,8 @@ def convert_library(infile, outfile, aliases=False): ...@@ -63,6 +68,8 @@ def convert_library(infile, outfile, aliases=False):
markers.add(marker) markers.add(marker)
for marker in library["regex"]: for marker in library["regex"]:
markers.add(marker) markers.add(marker)
for marker in library["nostr_reference"]:
markers.add(marker)
marker_aliases = {} marker_aliases = {}
for alias in library["aliases"]: for alias in library["aliases"]:
...@@ -91,13 +98,15 @@ def convert_library(infile, outfile, aliases=False): ...@@ -91,13 +98,15 @@ def convert_library(infile, outfile, aliases=False):
pattern = pattern_reverse.findall( pattern = pattern_reverse.findall(
library["regex"][marker].pattern) library["regex"][marker].pattern)
elif aliases or marker not in marker_aliases: elif aliases or marker not in marker_aliases:
# Normal marker, or separtely from its aliases. # Normal marker, or separately from its aliases.
if marker not in library["flanks"]: if marker not in library["flanks"]:
continue # Worthless, no flanks. continue # Worthless, no flanks.
flanks = library["flanks"][marker] flanks = library["flanks"][marker]
if marker in library["regex"]: if marker in library["regex"]:
pattern = pattern_reverse.findall( pattern = pattern_reverse.findall(
library["regex"][marker].pattern) library["regex"][marker].pattern)
elif marker in library["nostr_reference"]:
pattern = [(library["nostr_reference"][marker], "1", "1")]
else: else:
# Merge marker with its aliases. # Merge marker with its aliases.
flanks = False flanks = False
...@@ -142,6 +151,9 @@ def convert_library(infile, outfile, aliases=False): ...@@ -142,6 +151,9 @@ def convert_library(infile, outfile, aliases=False):
if unmatched: if unmatched:
middle = [(x[0], "0", x[2]) for x in middle] + \ middle = [(x[0], "0", x[2]) for x in middle] + \
[(x, "0", "1") for x in unmatched] [(x, "0", "1") for x in unmatched]
elif marker in library["nostr_reference"]:
middle = [(library["nostr_reference"][marker],
"0" if marker in marker_aliases else "1", "1")]
# Add prefixes and suffixes of aliases. # Add prefixes and suffixes of aliases.
if marker in marker_aliases: if marker in marker_aliases:
...@@ -175,51 +187,100 @@ def convert_library(infile, outfile, aliases=False): ...@@ -175,51 +187,100 @@ def convert_library(infile, outfile, aliases=False):
# Create sections. Most of them will be empty but we will put # Create sections. Most of them will be empty but we will put
# comments in them to explain how to use them. # comments in them to explain how to use them.
ini.add_section("aliases") ini.add_section("aliases")
ini.set("aliases", "; Specify three comma-separated values: marker " ini.set("aliases",
"name, sequence, and allele name.") "; Specify three comma-separated values: marker name, "
ini.set("aliases", "; You may use the alias name to specify flanks, " "sequence, and allele name.")
"prefix, and suffix for this") ini.set("aliases",
ini.set("aliases", "; allele specifically. You cannot specify a " "; You may use the alias name to specify flanks, prefix, and "
"repeat structure for an alias.") "suffix for this")
ini.set("aliases", ";MyAlias = MyMarker, AGCTAGC, MySpecialAlleleName") ini.set("aliases",
"; allele specifically. You cannot specify a repeat structure "
"for an alias.")
ini.set("aliases",
";MyAlias = MyMarker, AGCTAGC, MySpecialAlleleName")
ini.add_section("flanks") ini.add_section("flanks")
ini.set("flanks", "; Specify two comma-separated values: left flank " ini.set("flanks",
"and right flank.") "; Specify two comma-separated values: left flank and right "
"flank.")
ini.add_section("prefix") ini.add_section("prefix")
ini.set("prefix", "; Specify all known prefix sequences separated " ini.set("prefix",
"by commas. The first sequence") "; Specify all known prefix sequences separated by commas. "
ini.set("prefix", "; listed is used as the reference sequence when " "The first sequence")
"generating allele names. The") ini.set("prefix",
ini.set("prefix", "; prefix is the sequence between the left flank " "; listed is used as the reference sequence when generating "
"and the repeat and is omitted") "allele names. The")
ini.set("prefix", "; from allele names. Deviations from the reference " ini.set("prefix",
"are expressed as variants.") "; prefix is the sequence between the left flank and the "
"repeat and is omitted")
ini.set("prefix",
"; from allele names. Deviations from the reference are "
"expressed as variants.")
ini.add_section("suffix") ini.add_section("suffix")
ini.set("suffix", "; Specify all known suffix sequences separated " ini.set("suffix",
"by commas. The first sequence") "; Specify all known suffix sequences separated by commas. "
ini.set("suffix", "; listed is used as the reference sequence when " "The first sequence")
"generating allele names. The") ini.set("suffix",
ini.set("suffix", "; suffix is the sequence between the repeat and " "; listed is used as the reference sequence when generating "
"the right flank.") "allele names. The")
ini.set("suffix",
"; suffix is the sequence between the repeat and the right "
"flank.")
ini.add_section("repeat") ini.add_section("repeat")
ini.set("repeat", "; Specify the STR repeat structure in " ini.set("repeat",
"space-separated triples of sequence,") "; Specify the STR repeat structure in space-separated "
ini.set("repeat", "; minimum number of repeats, and maximum number of " "triples of sequence,")
"repeats.") ini.set("repeat",
"; minimum number of repeats, and maximum number of repeats.")
ini.add_section("no_repeat")
ini.set("no_repeat",
"; Specify the reference sequence for non-STR markers.")
ini.set("no_repeat",
";MySNPMarker = TTTTAACACAAAAAATTTAAAATAAGAAGAATAAATAGTGCTTGCTTT")
ini.set("no_repeat",
";MyMtMarker = AACCCCCCCT")
ini.add_section("genome_position")
ini.set("genome_position",
"; Specify the chromosome number and position of the first "
"base after the first")
ini.set("genome_position",
"; flank of each marker. Specify 'M' as the chromosome name "
"for markers on")
ini.set("genome_position",
"; mitochondrial DNA. Allele names generated for these "
"markers will follow mtDNA")
ini.set("genome_position",
"; nomenclature guidelines.")
ini.set("genome_position",
";MyMarker = 9, 36834400")
ini.set("genome_position",
";MySNPMarker = X, 21214600")
ini.set("genome_position",
";MyMtMarker = M, 301")
ini.add_section("length_adjust") ini.add_section("length_adjust")
ini.set("length_adjust", "; When generating allele names, the CE " ini.set("length_adjust",
"allele number is based on the length") "; When generating allele names for STR alleles, the CE "
ini.set("length_adjust", "; of the sequence (prefix+repeat+suffix) " "allele number is based")
"minus the adjustment specified here.") ini.set("length_adjust",
"; on the length of the sequence (prefix+repeat+suffix) minus "
"the adjustment")
ini.set("length_adjust",
"; specified here.")
ini.add_section("block_length") ini.add_section("block_length")
ini.set("block_length", "; Specify the core repeat unit length of " ini.set("block_length",
"each marker. The default length is 4.") "; Specify the core repeat unit length of each marker. The "
"default length is 4.")
ini.add_section("max_expected_copies") ini.add_section("max_expected_copies")
ini.set("max_expected_copies", "; Specify the maximum expected number " ini.set("max_expected_copies",
"copies (i.e., alleles) for each " "; Specify the maximum expected number of copies (i.e., "
"marker.") "alleles) for each")
ini.set("max_expected_copies", "; The default is 2. Specify 1 " ini.set("max_expected_copies",
"here for markers on the Y chromosome.") "; marker in a single reference sample (only used for "
"allelefinder). The default")
ini.set("max_expected_copies",
"; is 2. Specify 1 here for haploid markers (i.e., those on "
"mitochondrial DNA or")
ini.set("max_expected_copies",
"; on the Y chromosome).")
# Enter flanking sequences and STR definitions. # Enter flanking sequences and STR definitions.
fmt = "%%-%is" % reduce(max, map(len, fmt = "%%-%is" % reduce(max, map(len,
......
...@@ -27,7 +27,7 @@ input files; instead it automatically performs any required conversions ...@@ -27,7 +27,7 @@ input files; instead it automatically performs any required conversions
while running any tool. Explicitly running seqconvert is never a while running any tool. Explicitly running seqconvert is never a
necessity; use this tool for your own convenience. necessity; use this tool for your own convenience.
""" """
import argparse import argparse, sys
from ..lib import get_column_ids, ensure_sequence_format, parse_library,\ from ..lib import get_column_ids, ensure_sequence_format, parse_library,\
reverse_complement, add_input_output_args,\ reverse_complement, add_input_output_args,\
...@@ -110,10 +110,10 @@ def add_arguments(parser): ...@@ -110,10 +110,10 @@ def add_arguments(parser):
parser.add_argument('-M', '--marker', metavar="MARKER", parser.add_argument('-M', '--marker', metavar="MARKER",
help="assume the specified marker for all sequences") help="assume the specified marker for all sequences")
parser.add_argument('-l', '--library', metavar="LIBRARY", parser.add_argument('-l', '--library', metavar="LIBRARY",
type=argparse.FileType('r'), type=parse_library,
help="library file for sequence format conversion") help="library file for sequence format conversion")
parser.add_argument('-L', '--library2', metavar="LIBRARY", parser.add_argument('-L', '--library2', metavar="LIBRARY",
type=argparse</