extractor.h 27 KB
Newer Older
jkvis's avatar
jkvis committed
1
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
2
//   (C) Copyright 2015 Leiden Institute of Advanced Computer Science
jkvis's avatar
jkvis committed
3
4
5
6
7
8
9
10
//   Universiteit Leiden
//   All Rights Reserved
// *******************************************************************
// Extractor (library)
// *******************************************************************
// FILE INFORMATION:
//   File:     extractor.h (implemented in extractor.cc)
//   Author:   Jonathan K. Vis
J.K. Vis's avatar
J.K. Vis committed
11
12
//   Revision: 2.1.8
//   Date:     2015/01/12
jkvis's avatar
jkvis committed
13
14
// *******************************************************************
// DESCRIPTION:
jkvis's avatar
jkvis committed
15
//   This library can be used to generate HGVS variant descriptions as
jkvis's avatar
jkvis committed
16
17
18
19
20
21
//   accepted by the Mutalyzer Name Checker.
// *******************************************************************

#if !defined(__extractor_h__)
#define __extractor_h__

J.K. Vis's avatar
J.K. Vis committed
22
#include <cmath>
jkvis's avatar
jkvis committed
23
#include <cstddef>
J.K. Vis's avatar
J.K. Vis committed
24
#include <cstdlib>
jkvis's avatar
jkvis committed
25
26
#include <vector>

J.K. Vis's avatar
J.K. Vis committed
27
28
29
30
31
32

#if defined(__debug__)
#include <cstdio>
#endif


jkvis's avatar
jkvis committed
33
34
35
namespace mutalyzer
{

36
// Version string for run-time identification.
J.K. Vis's avatar
J.K. Vis committed
37
static char const* const VERSION = "2.1.8";
J.K. Vis's avatar
J.K. Vis committed
38
39


jkvis's avatar
jkvis committed
40
// The character type used for all strings. For now it should just be
41
// a char.
J.K. Vis's avatar
J.K. Vis committed
42
43
44
typedef char char_t;


45
46
47
48
49
50
51
52
53
54
// *******************************************************************
// Variant Extraction
//   These functions are used to extract variants (regions of change)
//   between two strings.
// *******************************************************************


// These constants can be used to specify the type of string to be
// extracted. The extractor is primarily focussed on DNA/RNA. When
// TYPE_PROTEIN (or another value) is used no complement string is
J.K. Vis's avatar
J.K. Vis committed
55
56
57
// constructed and no reverse complement is calculated. For
// TYPE_PROTEIN frame shift detection is applied on
// deletions/insertions.
58
static int const TYPE_DNA     = 0; // DNA/RNA (default)
J.K. Vis's avatar
J.K. Vis committed
59
60
static int const TYPE_PROTEIN = 1; // Protein
static int const TYPE_OTHER   = 2; // Other strings
61

J.K. Vis's avatar
J.K. Vis committed
62

63
64
65
// These constants can be used to deterimine the type of variant.
// Substitution covers most: deletions, insertions, substitutions, and
// insertion/deletions. Indentity is used to describe the unchanged
66
67
68
69
// (matched) regions. The constants are coded as bitfields and should
// be appropriately combined, e.g., IDENTITY | TRANSPOSITION_OPEN for
// describing a real transposition. Note that some combinations do NOT
// make sense, e.g., SUBSTITUION | REVERSE_COMPLEMENT.
70
71
72
73
74
static unsigned int const IDENTITY            = 0x01;
static unsigned int const REVERSE_COMPLEMENT  = 0x02;
static unsigned int const SUBSTITUTION        = 0x04;
static unsigned int const TRANSPOSITION_OPEN  = 0x08;
static unsigned int const TRANSPOSITION_CLOSE = 0x10;
75
76
77
static unsigned int const FRAME_SHIFT         = 0x20;
static unsigned int const FRAME_SHIFT_1       = 0x01;
static unsigned int const FRAME_SHIFT_2       = 0x02;
jkvis's avatar
jkvis committed
78
79


80
81
82
83
84
85
86
87
// These constants are used in calculating the weight of the generated
// description and consequently used to end the description process
// when a certain ``trivial'' weight is exeeded. The weight constants
// are based on their HGVS description lengths, i.e., the amount of
// characters used. The weight_position variable is used to have a
// constant weight for a position description regardless the actual
// position. It is usually set to ceil(log10(|reference| / 4)), and
// its intention is to be constant during an extraction run.
J.K. Vis's avatar
J.K. Vis committed
88
extern size_t       weight_position;
jkvis's avatar
jkvis committed
89

J.K. Vis's avatar
J.K. Vis committed
90
static size_t const WEIGHT_BASE               = 1; // i.e., A, C, G, T
91
92
93
94
95
96
static size_t const WEIGHT_DELETION           = 3; // i.e., del
static size_t const WEIGHT_DELETION_INSERTION = 6; // i.e., delins
static size_t const WEIGHT_INSERTION          = 3; // i.e., ins
static size_t const WEIGHT_INVERSION          = 3; // i.e., inv
static size_t const WEIGHT_SEPARATOR          = 1; // i.e., _, [, ], ;
static size_t const WEIGHT_SUBSTITUTION       = 1; // i.e., >
jkvis's avatar
jkvis committed
97

98

99
// Cut-off constants. The threshold is used to specify the maximum
J.K. Vis's avatar
J.K. Vis committed
100
101
102
// reference length without any cut-off. Otherwise the weight_position
// is used as a cut-off. For transpositions the cut-off is specified
// as a fraction of the sample_length.
J.K. Vis's avatar
J.K. Vis committed
103
static size_t const THRESHOLD_CUT_OFF     = 16000;
104
static double const TRANSPOSITION_CUT_OFF =   0.1;
J.K. Vis's avatar
J.K. Vis committed
105
106


107
108
109
// This global variable is used to have access to the whole reference
// string at any point in the extraction process. Commonly used in
// transposition extraction.
J.K. Vis's avatar
J.K. Vis committed
110
extern size_t global_reference_length;
jkvis's avatar
jkvis committed
111

112

J.K. Vis's avatar
J.K. Vis committed
113
114
// Codon to amino acid table assuming the order of A, C, G, T, thus:
// codon_table[0] = AAA, ..., codon_table[63] = TTT.
J.K. Vis's avatar
J.K. Vis committed
115
116
117
extern char_t codon_table[64];


118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// *******************************************************************
// Variant structure
//   This structure describes a variant (region of change).
//
//   @member reference_start: starting position of the variant within
//                            the reference string
//   @member reference_end: ending position of the variant within the
//                          reference string
//   @member sample_start: starting position of the variant within the
//                         sample string
//   @member sample_end: ending position of the variant within the
//                       sample string
//   @member type: type of the variant described using the
//                 constants above
//   @member weight: weight of the variant according to the weight
//                   constants above (used internally)
//   @member transposition_start: starting position of a transposition
J.K. Vis's avatar
J.K. Vis committed
135
//                                within the reference string
136
//   @member transposition_end: ending position of a transposition
J.K. Vis's avatar
J.K. Vis committed
137
//                              within the reference string
138
// *******************************************************************
jkvis's avatar
jkvis committed
139
140
struct Variant
{
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
  size_t       reference_start;
  size_t       reference_end;
  size_t       sample_start;
  size_t       sample_end;
  unsigned int type;
  size_t       weight;
  size_t       transposition_start;
  size_t       transposition_end;

  inline Variant(size_t const       reference_start,
                 size_t const       reference_end,
                 size_t const       sample_start,
                 size_t const       sample_end,
                 unsigned int const type                = IDENTITY,
                 size_t const       weight              = 0,
                 size_t const       transposition_start = 0,
                 size_t const       transposition_end   = 0):
158
159
160
161
         reference_start(reference_start),
         reference_end(reference_end),
         sample_start(sample_start),
         sample_end(sample_end),
J.K. Vis's avatar
J.K. Vis committed
162
163
         type(type),
         weight(weight),
164
         transposition_start(transposition_start),
J.K. Vis's avatar
J.K. Vis committed
165
         transposition_end(transposition_end) { }
jkvis's avatar
jkvis committed
166
167
168
169

  inline Variant(void) { }
}; // Variant

170
171
172
173
174
175
176
177
178
179
180
181
182
183
// *******************************************************************
// Variant_List structure
//   This structure describes a list of variants with associated
//   metadata.
//
//   @member weight_position: weight used for position descriptors
//   @member variants: vector of variants
// *******************************************************************
struct Variant_List
{
  size_t               weight_position;
  std::vector<Variant> variants;
}; // Variant_List

184
185
186
187
188
189
190
191
192
193
// *******************************************************************
// extract function
//   This function is the interface function for Python. It is just a
//   wrapper for the C++ extract function below.
//
//   @arg reference: reference string
//   @arg reference_length: length of the reference string
//   @arg sample: sample string
//   @arg sample_length: length of the sample string
//   @arg type: type of strings  0 --- DNA/RNA (default)
J.K. Vis's avatar
J.K. Vis committed
194
195
//                               1 --- Protein
//                               2 --- Other
J.K. Vis's avatar
J.K. Vis committed
196
197
198
//   @arg codon_string: serialized codon table: 64 characters
//                      corresponding to the codons AAA, ..., TTT.
//                      Only for protein extraction (frame shifts).
199
//   @return: variant list with metadata
200
// *******************************************************************
201
202
203
204
Variant_List extract(char_t const* const reference,
                     size_t const        reference_length,
                     char_t const* const sample,
                     size_t const        sample_length,
J.K. Vis's avatar
J.K. Vis committed
205
206
                     int const           type = TYPE_DNA,
                     char_t const* const codon_string = 0);
J.K. Vis's avatar
J.K. Vis committed
207

208
209
210
211
212
213
214
215
216
217
218
219
220
// *******************************************************************
// extract function
//   This function extracts the variants (regions of change) between
//   the reference and the sample string. It automatically constructs
//   the reverse complement string for the reference string if the
//   string type is DNA/RNA.
//
//   @arg variant: vector of variants
//   @arg reference: reference string
//   @arg reference_length: length of the reference string
//   @arg sample: sample string
//   @arg sample_length: length of the sample string
//   @arg type: type of strings  0 --- DNA/RNA (default)
J.K. Vis's avatar
J.K. Vis committed
221
222
//                               1 --- Protein
//                               2 --- Other
J.K. Vis's avatar
J.K. Vis committed
223
224
225
//   @arg codon_string: serialized codon table: 64 characters
//                      corresponding to the codons AAA, ..., TTT.
//                      Only for protein extraction (frame shifts).
226
227
//   @return: weight of the extracted variants
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
228
229
230
231
232
size_t extract(std::vector<Variant> &variant,
               char_t const* const   reference,
               size_t const          reference_length,
               char_t const* const   sample,
               size_t const          sample_length,
J.K. Vis's avatar
J.K. Vis committed
233
234
               int const             type = TYPE_DNA,
               char_t const* const   codon_string = 0);
J.K. Vis's avatar
J.K. Vis committed
235

236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
// *******************************************************************
// extractor function
//   This function extracts the variants (regions of change) between
//   the reference and the sample string by recursively calling itself
//   on prefixes and suffixes of a longest common substring.
//
//   @arg variant: vector of variants
//   @arg reference: reference string
//   @arg complement: complement string (can be null for strings other
//                    than DNA/RNA)
//   @arg reference_start: starting position in the reference string
//   @arg reference_end: ending position in the reference string
//   @arg sample: sample string
//   @arg sample_start: starting position in the sample string
//   @arg sample_end: ending position in the sample string
//   @return: weight of the extracted variants
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
253
254
255
256
257
258
259
260
261
size_t extractor(std::vector<Variant> &variant,
                 char_t const* const   reference,
                 char_t const* const   complement,
                 size_t const          reference_start,
                 size_t const          reference_end,
                 char_t const* const   sample,
                 size_t const          sample_start,
                 size_t const          sample_end);

262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
// *******************************************************************
// extractor_transposition function
//   This function extracts the variants (regions of change) between
//   a part of the sample string classified as an insertion and the
//   whole reference string.
//
//   @arg variant: vector of variants
//   @arg reference: reference string
//   @arg complement: complement string (can be null for strings other
//                    than DNA/RNA)
//   @arg reference_start: starting position in the reference string
//                         used for the deletion part
//   @arg reference_end: ending position in the reference string used
//                       for the deletion part
//   @arg sample: sample string
//   @arg sample_start: starting position in the sample string
//   @arg sample_end: ending position in the sample string
//   @arg weight_trivial: trivial weight to describe the transposition
//                        as a normal insertion (used for ending the
//                        extraction process)
//   @return: weight of the extracted variants
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
284
285
286
287
288
289
290
291
292
size_t extractor_transposition(std::vector<Variant> &variant,
                               char_t const* const   reference,
                               char_t const* const   complement,
                               size_t const          reference_start,
                               size_t const          reference_end,
                               char_t const* const   sample,
                               size_t const          sample_start,
                               size_t const          sample_end,
                               size_t const          weight_trivial = 0);
jkvis's avatar
jkvis committed
293

J.K. Vis's avatar
J.K. Vis committed
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
// *******************************************************************
// extractor_protein function
//   This function extracts the variants (regions of change) between
//   the reference and the sample protein string by recursively
//   calling itself on prefixes and suffixes of a longest common
//   substring, calculated by the LCS_1 algorithm (these strings are
//   very short).
//
//   @arg variant: vector of variants
//   @arg reference: reference string
//   @arg reference_start: starting position in the reference string
//   @arg reference_end: ending position in the reference string used
//   @arg sample: sample string
//   @arg sample_start: starting position in the sample string
//   @arg sample_end: ending position in the sample string
//   @return: weight of the extracted variants
// *******************************************************************
size_t extractor_protein(std::vector<Variant> &variant,
                         char_t const* const   reference,
                         size_t const          reference_start,
                         size_t const          reference_end,
                         char_t const* const   sample,
                         size_t const          sample_start,
                         size_t const          sample_end);

jkvis's avatar
jkvis committed
319

320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
// *******************************************************************
// Longest Common Substring (LCS) calculation
//   These functions are useful for LCS calculation of (large similar)
//   strings.
// *******************************************************************


// *******************************************************************
// Substring structure
//   This structure describes a common substring between two strings.
//
//   @member reference_index: starting position of the substring
//                            within the reference sequence
//   @member sample_index: ending position of the substring within the
//                         sample sequence
//   @member length: length of the substring
//   @member reverse_complement: indicates a reverse complement
//                               substring (only for DNA/RNA)
// *******************************************************************
jkvis's avatar
jkvis committed
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
struct Substring
{
  size_t reference_index;
  size_t sample_index;
  size_t length;
  bool   reverse_complement;

  inline Substring(size_t const reference_index,
                   size_t const sample_index,
                   size_t const length,
                   bool const   reverse_complement = false):
         reference_index(reference_index),
         sample_index(sample_index),
         length(length),
         reverse_complement(reverse_complement) { }

  inline Substring(void) { }
}; // Substring

358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
// *******************************************************************
// LCS function
//   This function calculates the longest common substrings between
//   two (three?) strings by choosing an initial k and calling the
//   lcs_k function. The k is automatically reduced if necessary until
//   the LCS of the two strings approaches some cutoff threshold.
//
//   @arg substring: vector of substrings
//   @arg reference: reference string
//   @arg complement: complement string (can be null for strings other
//                    than DNA/RNA)
//   @arg reference_start: starting position in the reference string
//   @arg reference_end: ending position in the reference string
//   @arg sample: sample string
//   @arg sample_start: starting position in the sample string
//   @arg sample_end: ending position in the sample string
J.K. Vis's avatar
J.K. Vis committed
374
//   @arg cut_off: optional cut-off value for the k in LCS_k
375
376
//   @return: length of the LCS
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
377
378
379
380
381
382
383
size_t LCS(std::vector<Substring> &substring,
           char_t const* const     reference,
           char_t const* const     complement,
           size_t const            reference_start,
           size_t const            reference_end,
           char_t const* const     sample,
           size_t const            sample_start,
J.K. Vis's avatar
J.K. Vis committed
384
385
           size_t const            sample_end,
           size_t const            cut_off = 1);
J.K. Vis's avatar
J.K. Vis committed
386

387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
// *******************************************************************
// LCS_1 function
//   This function calculates the longest common substrings between
//   two (three?) strings. It asumes no similarity between both
//   strings. Not for use for large strings. This is the classical
//   dynamic programming algorithm.
//
//   @arg substring: vector of substrings
//   @arg reference: reference string
//   @arg complement: complement string (can be null for strings other
//                    than DNA/RNA)
//   @arg reference_start: starting position in the reference string
//   @arg reference_end: ending position in the reference string
//   @arg sample: sample string
//   @arg sample_start: starting position in the sample string
//   @arg sample_end: ending position in the sample string
//   @return: length of the LCS
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
405
406
407
408
409
410
411
412
413
size_t LCS_1(std::vector<Substring> &substring,
             char_t const* const     reference,
             char_t const* const     complement,
             size_t const            reference_start,
             size_t const            reference_end,
             char_t const* const     sample,
             size_t const            sample_start,
             size_t const            sample_end);

414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
// *******************************************************************
// LCS_k function
//   This function calculates the longest common substrings between
//   two (three?) strings by encoding the reference and complement
//   strings into non-overlapping k-mers and the sample string into
//   overlapping k-mers. This function can be used for large similar
//   strings. If the returned vector is empty or the length of the
//   substrings is less or equal 2k, try again with a smaller k.
//
//   @arg substring: vector of substrings
//   @arg reference: reference string
//   @arg complement: complement string (can be null for strings other
//                    than DNA/RNA)
//   @arg reference_start: starting position in the reference string
//   @arg reference_end: ending position in the reference string
//   @arg sample: sample string
//   @arg sample_start: starting position in the sample string
//   @arg sample_end: ending position in the sample string
//   @arg k: size of the k-mers, must be greater than 1
//   @return: length of the LCS
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
435
436
437
438
439
440
441
442
443
444
445
size_t LCS_k(std::vector<Substring> &substring,
             char_t const* const     reference,
             char_t const* const     complement,
             size_t const            reference_start,
             size_t const            reference_end,
             char_t const* const     sample,
             size_t const            sample_start,
             size_t const            sample_end,
             size_t const            k);


446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
// *******************************************************************
// General string matching functions
//   These functions are useful for string matching.
// *******************************************************************

// *******************************************************************
// string_match function
//   This function is more or less equivalent to C's strncmp.
//
//   @arg string_1: first string to be compared
//   @arg string_2: second string to be compared
//   @arg length: maximum length to be compared

//   @return: true iff string_1 and string_2 match for the given
//            length
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
462
463
464
465
bool string_match(char_t const* const string_1,
                  char_t const* const string_2,
                  size_t const        length);

466
467
468
469
470
471
472
473
474
475
476
477
478
// *******************************************************************
// string_match_reverse function
//   This function is very similar to C's strncmp, but it traverses
//   string_1 from end to start while traversing string_2 from start
//   to end (useful for the reverse complement in DNA/RNA).
//
//   @arg string_1: first string to be compared
//   @arg string_2: second string to be compared
//   @arg length: maximum length to be compared

//   @return: true iff string_1 and string_2 match in their respective
//            directions for the given length
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
479
480
481
482
bool string_match_reverse(char_t const* const string_1,
                          char_t const* const string_2,
                          size_t const        length);

483
484
485
486
487
488
489
490
491
492
493
494
495
// *******************************************************************
// prefix_match function
//   This function calculates the length (in characters) of the common
//   prefix between two strings. The result of this function is also
//   used in the suffix_match function.
//
//   @arg reference: reference string
//   @arg reference_length: reference length
//   @arg sample: sample string
//   @arg sample_length: sample length

//   @return: the length of the common prefix
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
496
497
498
499
size_t prefix_match(char_t const* const reference,
                    size_t const        reference_length,
                    char_t const* const sample,
                    size_t const        sample_length);
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514

// *******************************************************************
// suffix_match function
//   This function calculates the length (in characters) of the common
//   suffix between two strings. It needs the calculated common
//   prefix.
//
//   @arg reference: reference string
//   @arg reference_length: reference length
//   @arg sample: sample string
//   @arg sample_length: sample length
//   @arg prefix: length of the common prefix

//   @return: the length of the common suffix
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
515
516
517
518
519
520
size_t suffix_match(char_t const* const reference,
                    size_t const        reference_length,
                    char_t const* const sample,
                    size_t const        sample_length,
                    size_t const        prefix = 0);

521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537

// *******************************************************************
// IUPAC Nucleotide Acid Notation functions
//   These functions are useful for calculating the complement of DNA/
//   RNA strings.
// *******************************************************************


// *******************************************************************
// IUPAC_complement function
//   This function converts a IUPAC Nucleotide Acid Notation into its
//   complement.
//
//   @arg base: character from the IUPAC Nucleotide Acid Notation
//              alphabet
//   @return: its corresponding IUPAC complement for single bases only
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
538
char_t IUPAC_complement(char_t const base);
539
540
541
542
543
544
545
546
547
548
549
550
551

// *******************************************************************
// IUPAC_complement function
//   This function converts a string in IUPAC Nucleotide Acid Notation
//   into its complement. A new string is allocated, so deletion is
//   the responsibility of the caller.
//
//   @arg string: string in the IUPAC Nucleotide Acid Notation
//                alphabet
//   @arg length: number of characters in the string to convert (might
//                be less than the actual string length)
//   @return: string containing the complement of the input string
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
552
553
554
555
char_t const* IUPAC_complement(char_t const* const string,
                               size_t const        length);


J.K. Vis's avatar
J.K. Vis committed
556
557
558
559
560
// *******************************************************************
// Amino Acid functions
//   These functions are useful for calculating frame shifts.
// *******************************************************************

J.K. Vis's avatar
J.K. Vis committed
561
562
563
564
565
566
567
568
569
570
571
572
573
// *******************************************************************
// frame_shift function
//   This function calculates the frame shift A reference amino acid
//   is checked against two possible partial overlaps between every
//   combination of two sample (observed) amino acids. Possible
//   results are: 0 (no frame shift), 1, 2, or 3 (both frame shifts
//   are possible).
//
//   @arg reference: reference amino acid
//   @arg sample_1: first sample amino acid
//   @arg sample_2: second sample amino acid
//   @return: frame shift
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
574
575
576
unsigned int frame_shift(char_t const reference,
                         char_t const sample_1,
                         char_t const sample_2);
J.K. Vis's avatar
J.K. Vis committed
577
578

// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
579
// annotate_frame_shift function
J.K. Vis's avatar
J.K. Vis committed
580
581
582
583
584
585
586
587
588
589
590
591
//   This function calculates frame shift annotations for
//   deletions/insertions.
//
//   @arg variant: vector of variants
//   @arg reference: reference string
//   @arg reference_start: starting position in the reference string
//   @arg reference_end: ending position in the reference string
//   @arg sample: sample string
//   @arg sample_start: starting position in the sample string
//   @arg sample_end: ending position in the sample string
//   @return: number of frame shift variants
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
592
593
594
595
596
597
598
size_t annotate_frame_shift(std::vector<Variant> &variant,
                            char_t const* const   reference,
                            size_t const          reference_start,
                            size_t const          reference_end,
                            char_t const* const   sample,
                            size_t const          sample_start,
                            size_t const          sample_end);
J.K. Vis's avatar
J.K. Vis committed
599
600


J.K. Vis's avatar
J.K. Vis committed
601
#if defined(__debug__)
602
603
604
605
606
607
608
609
610
611
612
613
// *******************************************************************
// Dprint_truncated function
//   Debug function for printing large strings in truncated form: a
//   prefix of a certain length ... a suffix of the same length.
//
//   @arg string: string to be printed
//   @arg start: starting position in the string
//   @arg end: ending position in the string
//   @arg length: length of the prefix and suffix
//   @arg stream: file stream to print to
//   @return: the length of the printed string
// *******************************************************************
J.K. Vis's avatar
J.K. Vis committed
614
615
616
617
618
619
size_t Dprint_truncated(char_t const* const string,
                        size_t const        start,
                        size_t const        end,
                        size_t const        length = 40,
                        FILE*               stream = stderr);
#endif
jkvis's avatar
jkvis committed
620

jkvis's avatar
jkvis committed
621

J.K. Vis's avatar
J.K. Vis committed
622
} // mutalyzer
jkvis's avatar
jkvis committed
623
624
625

#endif