Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
extractor
Commits
774a59db
Commit
774a59db
authored
Aug 21, 2014
by
J.K. Vis
Browse files
Updated cut-off scheme
parent
1172e248
Changes
4
Hide whitespace changes
Inline
Side-by-side
extractor/debug.cc
View file @
774a59db
...
...
@@ -8,8 +8,8 @@
// FILE INFORMATION:
// File: debug.cc
// Author: Jonathan K. Vis
// Revision: 2.1.
0
// Date: 2014/08/
06
// Revision: 2.1.
1
// Date: 2014/08/
21
// *******************************************************************
// DESCRIPTION:
// This source can be used to debug the Extractor library within
...
...
@@ -46,7 +46,8 @@ int main(int argc, char* argv[])
size_t
const
reference_length
=
ftell
(
file
);
rewind
(
file
);
char_t
*
reference
=
new
char_t
[
reference_length
];
fread
(
reference
,
sizeof
(
char_t
),
reference_length
,
file
);
size_t
const
ref_length
=
fread
(
reference
,
sizeof
(
char_t
),
reference_length
,
file
);
static_cast
<
void
>
(
ref_length
);
fclose
(
file
);
file
=
fopen
(
argv
[
2
],
"r"
);
...
...
@@ -60,7 +61,8 @@ int main(int argc, char* argv[])
size_t
const
sample_length
=
ftell
(
file
);
rewind
(
file
);
char_t
*
sample
=
new
char_t
[
sample_length
];
fread
(
sample
,
sizeof
(
char_t
),
sample_length
,
file
);
size_t
const
alt_length
=
fread
(
sample
,
sizeof
(
char_t
),
sample_length
,
file
);
static_cast
<
void
>
(
alt_length
);
fclose
(
file
);
...
...
extractor/extractor.cc
View file @
774a59db
...
...
@@ -8,8 +8,8 @@
// FILE INFORMATION:
// File: extractor.cc (depends on extractor.h)
// Author: Jonathan K. Vis
// Revision: 2.1.
2
// Date: 2014/08/1
3
// Revision: 2.1.
4
// Date: 2014/08/
2
1
// *******************************************************************
// DESCRIPTION:
// This library can be used to generete HGVS variant descriptions as
...
...
@@ -217,8 +217,9 @@ size_t extractor(std::vector<Variant> &variant,
// Calculate the LCS (possibly in reverse complement) of the two
// strings.
size_t
const
cut_off
=
reference_length
<
THRESHOLD_CUT_OFF
?
1
:
EXTRACTION_CUT_OFF
;
std
::
vector
<
Substring
>
substring
;
size_t
const
length
=
LCS
(
substring
,
reference
,
complement
,
reference_start
,
reference_end
,
sample
,
sample_start
,
sample_end
);
size_t
const
length
=
LCS
(
substring
,
reference
,
complement
,
reference_start
,
reference_end
,
sample
,
sample_start
,
sample_end
,
cut_off
);
// No LCS found: this is a transposition or a deletion/insertion.
...
...
@@ -456,8 +457,9 @@ size_t extractor_transposition(std::vector<Variant> &variant,
// Extract the LCS (from the whole reference string).
size_t
const
cut_off
=
static_cast
<
size_t
>
(
TRANSPOSITION_CUT_OFF
*
static_cast
<
double
>
(
sample_length
));
std
::
vector
<
Substring
>
substring
;
size_t
const
length
=
LCS
(
substring
,
reference
,
complement
,
0
,
global_reference_length
,
sample
,
sample_start
,
sample_end
);
size_t
const
length
=
LCS
(
substring
,
reference
,
complement
,
0
,
global_reference_length
,
sample
,
sample_start
,
sample_end
,
cut_off
);
// No LCS found: this is a deletion/insertion.
...
...
@@ -557,18 +559,12 @@ size_t LCS(std::vector<Substring> &substring,
size_t
const
reference_end
,
char_t
const
*
const
sample
,
size_t
const
sample_start
,
size_t
const
sample_end
)
size_t
const
sample_end
,
size_t
const
cut_off
)
{
size_t
const
reference_length
=
reference_end
-
reference_start
;
size_t
const
sample_length
=
sample_end
-
sample_start
;
// Always fully explore strings smaller than this threshold.
static
size_t
const
THRESHOLD
=
16000
;
// A dynamic cut-off for unevenly matched string lengths.
double
const
a
=
reference_length
>=
sample_length
?
reference_length
:
sample_length
;
double
const
b
=
reference_length
>=
sample_length
?
sample_length
:
reference_length
;
size_t
const
cut_off
=
(
reference_length
>
THRESHOLD
?
ceil
((
1.0
-
b
/
(
a
+
0.1
*
b
))
*
b
)
/
8
:
0
)
+
1
;
// The initial k.
size_t
k
=
reference_length
>
sample_length
?
sample_length
/
4
:
reference_length
/
4
;
...
...
extractor/extractor.h
View file @
774a59db
...
...
@@ -8,8 +8,8 @@
// FILE INFORMATION:
// File: extractor.h (implemented in extractor.cc)
// Author: Jonathan K. Vis
// Revision: 2.1.
2
// Date: 2014/08/1
3
// Revision: 2.1.
4
// Date: 2014/08/
2
1
// *******************************************************************
// DESCRIPTION:
// This library can be used to generate HGVS variant descriptions as
...
...
@@ -34,7 +34,7 @@ namespace mutalyzer
{
// Version string for run-time identification.
static
char
const
*
const
VERSION
=
"2.1.
2
"
;
static
char
const
*
const
VERSION
=
"2.1.
4
"
;
// The character type used for all strings. For now it should just be
...
...
@@ -90,6 +90,15 @@ static size_t const WEIGHT_SEPARATOR = 1; // i.e., _, [, ], ;
static
size_t
const
WEIGHT_SUBSTITUTION
=
1
;
// i.e., >
// Cut-off constants. For normal extraction use the threshold to
// specify the maximum reference length without any cut-off. Otherwise
// the extraction cut-off is used.
// The for transpositions is set at a fraction of the sample length.
static
size_t
const
THRESHOLD_CUT_OFF
=
16000
;
static
size_t
const
EXTRACTION_CUT_OFF
=
500
;
static
double
const
TRANSPOSITION_CUT_OFF
=
0.3
;
// This global variable is used to have access to the whole reference
// string at any point in the extraction process. Commonly used in
// transposition extraction.
...
...
@@ -317,6 +326,7 @@ struct Substring
// @arg sample: sample string
// @arg sample_start: starting position in the sample string
// @arg sample_end: ending position in the sample string
// @arg cut_off: optional cut-off value for the k in LCS_k
// @return: length of the LCS
// *******************************************************************
size_t
LCS
(
std
::
vector
<
Substring
>
&
substring
,
...
...
@@ -326,7 +336,8 @@ size_t LCS(std::vector<Substring> &substring,
size_t
const
reference_end
,
char_t
const
*
const
sample
,
size_t
const
sample_start
,
size_t
const
sample_end
);
size_t
const
sample_end
,
size_t
const
cut_off
=
1
);
// *******************************************************************
// LCS_1 function
...
...
extractor/extractor.i
View file @
774a59db
...
...
@@ -8,8 +8,8 @@
// FILE INFORMATION:
// File: extractor.i (SWIG interface file)
// Author: Jonathan K. Vis
// Revision: 2.1.
2
// Date: 2014/08/1
3
// Revision: 2.1.
4
// Date: 2014/08/
2
1
// *******************************************************************
// DESCRIPTION:
// Defines the SWIG interface for the Extractor library for use in
...
...
@@ -32,7 +32,7 @@ namespace mutalyzer
{
// Version string for run-time identification.
static
char
const
*
const
VERSION
=
"2.1.
2
"
;
static
char
const
*
const
VERSION
=
"2.1.
4
"
;
// The character type used for all strings. For now it should just be
// a char.
...
...
@@ -85,8 +85,6 @@ static size_t const WEIGHT_SUBSTITUTION = 1; // i.e., >
// sample string
// @member type: type of the variant described using the
// constants above
// @member weight: weight of the variant according to the weight
// constants above (used internally)
// @member transposition_start: starting position of a transposition
// withing the reference string
// @member transposition_end: ending position of a transposition
...
...
@@ -99,7 +97,6 @@ struct Variant
size_t
sample_start
;
size_t
sample_end
;
unsigned
int
type
;
size_t
weight
;
size_t
transposition_start
;
size_t
transposition_end
;
}
;
// Variant
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment