extractor.i 5.47 KB
Newer Older
jkvis's avatar
jkvis committed
1
// *******************************************************************
jkvis's avatar
jkvis committed
2
//   (C) Copyright 2014 Leiden Institute of Advanced Computer Science
jkvis's avatar
jkvis committed
3
4
5
6
7
8
9
10
//   Universiteit Leiden
//   All Rights Reserved
// *******************************************************************
// Extractor (library)
// *******************************************************************
// FILE INFORMATION:
//   File:     extractor.i (SWIG interface file)
//   Author:   Jonathan K. Vis
J.K. Vis's avatar
J.K. Vis committed
11
12
//   Revision: 2.1.4
//   Date:     2014/08/21
jkvis's avatar
jkvis committed
13
14
15
16
17
18
// *******************************************************************
// DESCRIPTION:
//   Defines the SWIG interface for the Extractor library for use in
//   other languages than C/C++.
// *******************************************************************

jkvis's avatar
jkvis committed
19
20
21
22
23
%include "std_vector.i"

%module extractor
%{
#include "extractor.h"
24
%} // extractor
jkvis's avatar
jkvis committed
25
26
27
28

namespace std
{
%template(VariantVector) vector<mutalyzer::Variant>;
29
} // std
jkvis's avatar
jkvis committed
30
31
32
33

namespace mutalyzer
{

34
// Version string for run-time identification.
J.K. Vis's avatar
J.K. Vis committed
35
static char const* const VERSION = "2.1.4";
36

jkvis's avatar
jkvis committed
37
// The character type used for all strings. For now it should just be
38
// a char.
J.K. Vis's avatar
J.K. Vis committed
39
40
typedef char char_t;

41
42
43
44
// These constants can be used to specify the type of string to be
// extracted. The extractor is primarily focussed on DNA/RNA. When
// TYPE_PROTEIN (or another value) is used no complement string is
// constructed and no reverse complement is calculated.
45
46
47
static int const TYPE_DNA     = 0;
static int const TYPE_PROTEIN = 1;

48
49
50
// These constants can be used to deterimine the type of variant.
// Substitution covers most: deletions, insertions, substitutions, and
// insertion/deletions. Indentity is used to describe the unchanged
51
52
53
54
// (matched) regions. The constants are coded as bitfields and should
// be appropriately combined, e.g., IDENTITY | TRANSPOSITION_OPEN for
// describing a real transposition. Note that some combinations do NOT
// make sense, e.g., SUBSTITUION | REVERSE_COMPLEMENT.
55
56
57
58
59
static unsigned int const IDENTITY            = 0x01;
static unsigned int const REVERSE_COMPLEMENT  = 0x02;
static unsigned int const SUBSTITUTION        = 0x04;
static unsigned int const TRANSPOSITION_OPEN  = 0x08;
static unsigned int const TRANSPOSITION_CLOSE = 0x10;
60

jkvis's avatar
jkvis committed
61
62
63
64
// These constants are used in calculating the weight of the generated
// description and consequently used to end the description process
// when a certain ``trivial'' weight is exeeded. The weight constants
// are based on their HGVS description lengths, i.e., the amount of
jkvis's avatar
jkvis committed
65
// characters used.
jkvis's avatar
jkvis committed
66
67
68
69
70
71
72
73
static size_t const WEIGHT_BASE               = 1; // i.e., A, G, T, C
static size_t const WEIGHT_DELETION           = 3; // i.e., del
static size_t const WEIGHT_DELETION_INSERTION = 6; // i.e., delins
static size_t const WEIGHT_INSERTION          = 3; // i.e., ins
static size_t const WEIGHT_INVERSION          = 3; // i.e., inv
static size_t const WEIGHT_SEPARATOR          = 1; // i.e., _, [, ], ;
static size_t const WEIGHT_SUBSTITUTION       = 1; // i.e., >

74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
// *******************************************************************
// Variant structure
//   This structure describes a variant (region of change).
//
//   @member reference_start: starting position of the variant within
//                            the reference string
//   @member reference_end: ending position of the variant within the
//                          reference string
//   @member sample_start: starting position of the variant within the
//                         sample string
//   @member sample_end: ending position of the variant within the
//                       sample string
//   @member type: type of the variant described using the
//                 constants above
//   @member transposition_start: starting position of a transposition
//                                withing the reference string
//   @member transposition_end: ending position of a transposition
//                              withing the reference string
// *******************************************************************
jkvis's avatar
jkvis committed
93
94
struct Variant
{
95
96
97
98
99
100
101
  size_t       reference_start;
  size_t       reference_end;
  size_t       sample_start;
  size_t       sample_end;
  unsigned int type;
  size_t       transposition_start;
  size_t       transposition_end;
jkvis's avatar
jkvis committed
102
103
}; // Variant

104
105
106
107
108
109
110
111
112
113
114
115
116
117
// *******************************************************************
// Variant_List structure
//   This structure describes a list of variants with associated
//   metadata.
//
//   @member weight_position: weight used for position descriptors
//   @member variants: vector of variants
// *******************************************************************
struct Variant_List
{
  size_t               weight_position;
  std::vector<Variant> variants;
}; // Variant_List

118
119
120
121
122
123
124
125
126
127
// *******************************************************************
// extract function
//   This function is the interface function for Python.
//
//   @arg reference: reference string
//   @arg reference_length: length of the reference string
//   @arg sample: sample string
//   @arg sample_length: length of the sample string
//   @arg type: type of strings  0 --- DNA/RNA (default)
//                               1 --- Protein/other
128
//   @return: variant list with metadata
129
// *******************************************************************
130
131
132
133
134
Variant_List extract(char_t const* const reference,
                     size_t const        reference_length,
                     char_t const* const sample,
                     size_t const        sample_length,
                     int const           type = TYPE_DNA);
jkvis's avatar
jkvis committed
135

136
} // mutalyzer
jkvis's avatar
jkvis committed
137
138
139

#include "extractor.h"