bgmerge.py 4.19 KB
Newer Older
jhoogenboom's avatar
jhoogenboom committed
1
#!/usr/bin/env python
2
3

#
4
# Copyright (C) 2017 Jerry Hoogenboom
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#
# This file is part of FDSTools, data analysis tools for Next
# Generation Sequencing of forensic DNA markers.
#
# FDSTools is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# FDSTools is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with FDSTools.  If not, see <http://www.gnu.org/licenses/>.
#

jhoogenboom's avatar
jhoogenboom committed
23
24
25
26
27
28
29
30
31
32
33
"""
Merge multiple files containing background noise profiles.

Background noise profiles are merged in the order in which they are
specified.  If multple files specify a different value for the same
allele and sequence, the value of the first file is used.

It is convenient to pipe the output of bgpredict and/or bgestimate
into bgmerge to merge that with an existing file containing background
profiles.  Specify '-' as one of the input files to read from stdin
(i.e., read input from a pipe).  If only one input file is specified,
jhoogenboom's avatar
jhoogenboom committed
34
35
36
'-' is implicitly used as the second input file.  Note that as a result,
in case of conflicting values, the value in the specified input file
will take precedence over the value in the data that was piped in.
jhoogenboom's avatar
jhoogenboom committed
37
38
39
40
41
42

Example: fdstools bgpredict ... | fdstools bgmerge old.txt > out.txt
"""
import argparse
import sys

43
from errno import EPIPE
44
from ..lib import load_profiles_new, ensure_sequence_format, glob_path,\
jhoogenboom's avatar
jhoogenboom committed
45
46
                  add_sequence_format_args

47
__version__ = "1.0.3"
jhoogenboom's avatar
jhoogenboom committed
48
49


50
def merge_profiles(infiles, outfile, seqformat, library):
51
52
53
    outfile.write("\t".join(
        ["marker", "allele", "sequence", "fmean", "rmean", "tool"]) + "\n")
    seen = {}
jhoogenboom's avatar
jhoogenboom committed
54
    for infile in infiles:
55
        if infile == "-":
56
            profiles = load_profiles_new(sys.stdin, library)
57
58
        else:
            with open(infile, "r") as handle:
59
                profiles = load_profiles_new(handle, library)
jhoogenboom's avatar
jhoogenboom committed
60
        for marker in profiles:
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
            for allele in profiles[marker]:
                for sequence in profiles[marker][allele]:
                    if marker not in seen:
                        seen[marker] = {}
                    if allele not in seen[marker]:
                        seen[marker][allele] = set()
                    elif sequence in seen[marker][allele]:
                        continue
                    outfile.write("\t".join([marker] + [
                        ensure_sequence_format(seq, seqformat, library=library,
                            marker=marker) for seq in (allele, sequence)] +
                        map(str, (
                            profiles[marker][allele][sequence]["forward"],
                            profiles[marker][allele][sequence]["reverse"])) +
                        [profiles[marker][allele][sequence]["tool"]]) + "\n")
                    seen[marker][allele].add(sequence)
jhoogenboom's avatar
jhoogenboom committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#merge_profiles


def add_arguments(parser):
    parser.add_argument('infiles', nargs='+', metavar="FILE",
        help="files containing the background noise profiles to combine; "
             "if a single file is given, it is merged with input from stdin; "
             "use '-' to use stdin as an explicit input source")
    outgroup = parser.add_argument_group("output file options")
    outgroup.add_argument('-o', '--output', dest="outfile", metavar="FILE",
        type=argparse.FileType('w'),
        default=sys.stdout,
        help="file to write output to (default: write to stdout)")
    add_sequence_format_args(parser, "raw", True)  # Force raw seqs.
#add_arguments


def run(args):
95
96
97
    infiles = [x for x in args.infiles for x in glob_path(x)]
    if len(infiles) < 2:
        if sys.stdin.isatty() or "-" in infiles:
jhoogenboom's avatar
jhoogenboom committed
98
            raise ValueError("please specify at least two input files")
99
        infiles.append("-")
jhoogenboom's avatar
jhoogenboom committed
100

101
102
103
104
105
106
    try:
        merge_profiles(infiles, args.outfile,args.sequence_format,args.library)
    except IOError as e:
        if e.errno == EPIPE:
            return
        raise
jhoogenboom's avatar
jhoogenboom committed
107
#run