Commit a086676f authored by J.K. Vis's avatar J.K. Vis
Browse files

Reverse frame shift indexing fixed

parent 0bebab7d
......@@ -20,7 +20,7 @@ TARGET=_extractor.so
DEBUG=debug.cc
CXX=g++
CFLAGS=-c -fpic -Wall -Wextra -O3 -D__debug__
CFLAGS=-c -fpic -Wall -Wextra -O3 #-D__debug__
LDFLAGS=-Wall -O3 -shared
SWIG=swig
......
......@@ -23,16 +23,16 @@ using namespace mutalyzer;
#include <cstdio>
/*
#include <iostream>
#include <string>
using namespace std;
*/
// Entry point.
int main(int argc, char* argv[])
{
/*
if (argc < 3)
{
fprintf(stderr, "usage: %s reference sample\n", argv[0]);
......@@ -70,9 +70,8 @@ int main(int argc, char* argv[])
size_t const alt_length = fread(sample, sizeof(char_t), sample_length, file);
static_cast<void>(alt_length);
fclose(file);
*/
/*
string header[4305];
string protein[4305];
......@@ -82,27 +81,28 @@ int main(int argc, char* argv[])
getline(cin, protein[i]);
} // for
for (int i = 0; i < 10; ++i)
for (int i = 0; i < 4305; ++i)
{
cerr << i << endl;
for (int j = i + 1; j < 10; ++j)
double best = 1.f;
for (int j = i + 1; j < 4305; ++j)
{
vector<Variant> variant;
extract(variant, protein[i].c_str(), protein[i].length(), protein[j].c_str(), protein[j].length(), TYPE_PROTEIN, "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF");
fprintf(stdout, "%s %s\n", header[i].c_str(), header[j].c_str());
for (std::vector<Variant>::iterator it = variant.begin(); it != variant.end(); ++it)
{
if (it->type >= FRAME_SHIFT)
if (it->type >= FRAME_SHIFT && best > it->probability)
{
fprintf(stdout, "%ld--%ld, %ld--%ld, %d, %lf, %ld--%ld\n", it->reference_start, it->reference_end, it->sample_start, it->sample_end, it->type, 1.f - it->probability, it->transposition_start, it->transposition_end);
best = it->probability;
fprintf(stdout, "%.9s %.9s %ld--%ld, %ld--%ld, %d, %.10e\n", header[i].c_str(), header[j].c_str(), it->reference_start, it->reference_end, it->sample_start, it->sample_end, it->type, 1.f - it->probability);
} // if
} // for
} // for
fprintf(stdout, "\n");
} // for
*/
/*
// The actual extraction.
std::vector<Variant> variant;
size_t const weight = extract(variant, reference, reference_length - 1, sample, sample_length - 1, TYPE_PROTEIN, "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF");
......@@ -126,7 +126,7 @@ int main(int argc, char* argv[])
// Cleaning up.
delete[] reference;
delete[] sample;
*/
return 0;
} // main
......@@ -875,7 +875,6 @@ void extractor_frame_shift(std::vector<Variant> &annotation,
{
probability_compound += frame_shift_frequency[reference[lcs.reference_index + i] & 0x7f][reference[lcs.reference_index + i + 1] & 0x7f][4];
} // if
//fprintf(stderr, " probability_compound: %lf\n", probability_compound);
probability *= probability_compound;
} // for
......@@ -1386,15 +1385,15 @@ void LCS_frame_shift(std::vector<Substring> &substring,
} // if
if (lcs[i % 2][j][2] > fs_substring[2].length)
{
fs_substring[2] = Substring(reference_start + j - lcs[i % 2][j][2] + 1, sample_start + i - lcs[i % 2][j][2] + 1, lcs[i % 2][j][2], FRAME_SHIFT_REVERSE);
fs_substring[2] = Substring(reference_end - j - 1, sample_start + i - lcs[i % 2][j][2] + 1, lcs[i % 2][j][2], FRAME_SHIFT_REVERSE);
} // if
if (lcs[i % 2][j][3] > fs_substring[3].length)
{
fs_substring[3] = Substring(reference_start + j - lcs[i % 2][j][3], sample_start + i - lcs[i % 2][j][3] + 1, lcs[i % 2][j][3], FRAME_SHIFT_REVERSE_1);
fs_substring[3] = Substring(reference_end - j - 1, sample_start + i - lcs[i % 2][j][3] + 1, lcs[i % 2][j][3], FRAME_SHIFT_REVERSE_1);
} // if
if (lcs[i % 2][j][4] > fs_substring[4].length)
{
fs_substring[4] = Substring(reference_start + j - lcs[i % 2][j][4], sample_start + i - lcs[i % 2][j][4] + 1, lcs[i % 2][j][4], FRAME_SHIFT_REVERSE_2);
fs_substring[4] = Substring(reference_end - j - 1, sample_start + i - lcs[i % 2][j][4] + 1, lcs[i % 2][j][4], FRAME_SHIFT_REVERSE_2);
} // if
} // for
} // for
......@@ -1644,7 +1643,21 @@ static void initialize_acid_frequency(void)
// based on a given codon string.
void initialize_frame_shift_map(char_t const* const codon_string)
{
// FIXME:
for (size_t i = 0; i < 64; ++i)
{
acid_map[i] = 0x0ull;
} // for
for (size_t i = 0; i < 128; ++i)
{
for (size_t j = 0; j < 128; ++j)
{
for (size_t k = 0; k < 5; ++k)
{
frame_shift_count[i][j][k] = 0;
frame_shift_frequency[i][j][k] = .05f;
} // for
} // for
} // for
initialize_acid_frequency();
for (size_t i = 0; i < 64; ++i)
{
......
......@@ -284,6 +284,10 @@ void initialize_frame_shift_map(char_t const* const codon_string)
void initialize_acid_frequency(void)
{
for (size_t i = 0; i < 128; ++i)
{
acid_frequency[i] = .05f;
} // for
acid_frequency['A'] = .09515673f;
acid_frequency['C'] = .01157279f;
acid_frequency['D'] = .05151007f;
......@@ -342,8 +346,9 @@ void LCS_frame_shift(std::vector<Substring> &substring,
fprintf(stderr, "%4c%c", reference[reference_start + i - 1], reference[reference_start + i]);
} // for
fprintf(stderr, "\n");
*/
/*
fprintf(stderr, " LCS table (reverse):\n %c(%c)", reference[reference_end - 1], reference[reference_end - 1]);
for (size_t i = 1; i < reference_length; ++i)
{
......@@ -428,15 +433,15 @@ void LCS_frame_shift(std::vector<Substring> &substring,
} // if
if (lcs[i % 2][j][2] > fs_substring[2].length)
{
fs_substring[2] = Substring(reference_start + j - lcs[i % 2][j][2] + 1, sample_start + i - lcs[i % 2][j][2] + 1, lcs[i % 2][j][2], FRAME_SHIFT_REVERSE);
fs_substring[2] = Substring(reference_end - j - 1, sample_start + i - lcs[i % 2][j][2] + 1, lcs[i % 2][j][2], FRAME_SHIFT_REVERSE);
} // if
if (lcs[i % 2][j][3] > fs_substring[3].length)
{
fs_substring[3] = Substring(reference_start + j - lcs[i % 2][j][3], sample_start + i - lcs[i % 2][j][3] + 1, lcs[i % 2][j][3], FRAME_SHIFT_REVERSE_1);
fs_substring[3] = Substring(reference_end - j - 1, sample_start + i - lcs[i % 2][j][3] + 1, lcs[i % 2][j][3], FRAME_SHIFT_REVERSE_1);
} // if
if (lcs[i % 2][j][4] > fs_substring[4].length)
{
fs_substring[4] = Substring(reference_start + j - lcs[i % 2][j][4], sample_start + i - lcs[i % 2][j][4] + 1, lcs[i % 2][j][4], FRAME_SHIFT_REVERSE_2);
fs_substring[4] = Substring(reference_end - j - 1, sample_start + i - lcs[i % 2][j][4] + 1, lcs[i % 2][j][4], FRAME_SHIFT_REVERSE_2);
} // if
} // for
//fprintf(stderr, "\n");
......@@ -667,7 +672,6 @@ void extractor_frame_shift(std::vector<Variant> &annotation,
} // for
fprintf(stderr, "\n");
// Recursively apply this function to the prefixes of the strings.
std::vector<Variant> prefix;
extractor_frame_shift(prefix, reference, reference_start, lcs.reference_index, sample, sample_start, lcs.sample_index);
......@@ -693,6 +697,23 @@ int main(int, char* [])
initialize_acid_frequency();
initialize_frame_shift_map(CODON_STRING);
/*
std::vector<Variant> annotation;
extractor_frame_shift(annotation, "GQPVEFTWQSDDGISL",
0, 16,
"ATLIPVLNGVHQAGLS",
0, 16);
// Printing the variants.
fprintf(stdout, "Annotation (%ld):\n", annotation.size());
for (std::vector<Variant>::iterator it = annotation.begin(); it != annotation.end(); ++it)
{
fprintf(stdout, " %ld--%ld, %ld--%ld, %d, %lf, %ld--%ld\n", it->reference_start, it->reference_end, it->sample_start, it->sample_end, it->type, 1.f - it->probability, it->transposition_start, it->transposition_end);
} // for
return 1;
*/
{
fprintf(stdout, "\n%s %s\n", "MDYSL", "MALFP");
std::vector<Variant> annotation;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment