Commit e4ddcb3b authored by Florian Kurpicz's avatar Florian Kurpicz

Added original files

parent 2c6930a4
The sais-lite copyright is as follows:
Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
# Makefile for suftest and test
# options
CC = llvm-gcc
#CXX = g++
#OUTPUT_OPTION = -o $@
CFLAGS = -ffast-math -O9 -funroll-loops -DNDEBUG
#CFLAGS = -O3 -fomit-frame-pointer -funroll-loops
#CXXFLAGS = -O3 -fomit-frame-pointer
CPPFLAGS = -Wall -DNDEBUG
#CPPFLAGS = -Wall
LDFLAGS =
LDLIBS =
#TARGET_ARCH =
# targets
.PHONY: all
all: suftest
suftest: sais.o suftest.o
test:
$(CC) -O -g -Wall test.c sais.c -o test
./test
$(RM) test test.exe
distclean: clean
clean:
$(RM) suftest suftest.exe test test.exe sais.o suftest.o
# dependencies
sais.o suftest.o: sais.h Makefile
sais-lite-LCP-0.0.1
----------------------
This is an ad-hoc-implementation of the code described in
"Inducing the LCP-Array" (http://arxiv.org/abs/1101.3448)
by Johannes Fischer.
It is based on Yuta Mori's sais-lite 2.4.1 (http://sites.google.com/site/yuta256),
which, in turn, is an implementation of the following paper:
Ge Nong, Sen Zhang and Wai Hong Chan
Two Efficient Algorithms for Linear Suffix Array Construction
DCC 2009.
To compile, type 'make'. Then run ./suftest <testfile>.
----
Johannes Fischer (johannes.fischer@kit.edu)
\ No newline at end of file
This diff is collapsed.
/*
* sais.h for sais-lite
* Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _SAIS_H
#define _SAIS_H 1
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/* find the suffix array SA of T[0..n-1]
use a working space (excluding T and SA) of at most 2n+O(lg n) */
int sais(const unsigned char *T, int *SA, int *LCP, int n);
/* find the suffix array SA of T[0..n-1] in {0..k-1}^n
use a working space (excluding T and SA) of at most MAX(4k,2n) */
int
sais_int(const int *T, int *SA, int n, int k);
/* burrows-wheeler transform */
int
sais_bwt(const unsigned char *T, unsigned char *U, int *A, int n);
int
sais_int_bwt(const int *T, int *U, int *A, int n, int k);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* _SAIS_H */
This diff is collapsed.
/*
* suftest.c for sais-lite
* Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "sais.h"
/* Checks the suffix array SA of the string T. */
static
int
sufcheck(const unsigned char *T, const int *SA, int n, int verbose) {
int C[256];
int i, p, q, t;
int c;
if(verbose) { fprintf(stderr, "sufcheck: "); }
if(n == 0) {
if(verbose) { fprintf(stderr, "Done.\n"); }
return 0;
}
/* Check arguments. */
if((T == NULL) || (SA == NULL) || (n < 0)) {
if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
return -1;
}
/* check range: [0..n-1] */
for(i = 0; i < n; ++i) {
if((SA[i] < 0) || (n <= SA[i])) {
if(verbose) {
fprintf(stderr, "Out of the range [0,%d].\n"
" SA[%d]=%d\n",
n - 1, i, SA[i]);
}
return -2;
}
}
/* check first characters. */
for(i = 1; i < n; ++i) {
if(T[SA[i - 1]] > T[SA[i]]) {
if(verbose) {
fprintf(stderr, "Suffixes in wrong order.\n"
" T[SA[%d]=%d]=%d > T[SA[%d]=%d]=%d\n",
i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
}
return -3;
}
}
/* check suffixes. */
for(i = 0; i < 256; ++i) { C[i] = 0; }
for(i = 0; i < n; ++i) { ++C[T[i]]; }
for(i = 0, p = 0; i < 256; ++i) {
t = C[i];
C[i] = p;
p += t;
}
q = C[T[n - 1]];
C[T[n - 1]] += 1;
for(i = 0; i < n; ++i) {
p = SA[i];
if(0 < p) {
c = T[--p];
t = C[c];
} else {
c = T[p = n - 1];
t = q;
}
if((t < 0) || (p != SA[t])) {
if(verbose) {
fprintf(stderr, "Suffix in wrong position.\n"
" SA[%d]=%d or\n"
" SA[%d]=%d\n",
t, (0 <= t) ? SA[t] : -1, i, SA[i]);
}
return -4;
}
if(t != q) {
++C[c];
if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
}
}
if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
return 0;
}
static
void
print_help(const char *progname, int status) {
fprintf(stderr, "usage: %s FILE\n\n", progname);
exit(status);
}
int
main(int argc, const char *argv[]) {
FILE *fp;
const char *fname;
unsigned char *T;
int *SA;
int *LCP;
long n;
clock_t start, finish;
/* Check arguments. */
if((argc == 1) ||
(strcmp(argv[1], "-h") == 0) ||
(strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
if(argc != 2) { print_help(argv[0], EXIT_FAILURE); }
/* Open a file for reading. */
if((fp = fopen(fname = argv[1], "rb")) == NULL) {
fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
/* Get the file size. */
if(fseek(fp, 0, SEEK_END) == 0) {
n = ftell(fp);
rewind(fp);
if(n < 0) {
fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
} else {
fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
/* Allocate 9n bytes of memory. */
T = (unsigned char *)malloc((size_t)n * sizeof(unsigned char));
SA = (int *)malloc((size_t)(n+1) * sizeof(int)); // +1 for computing LCP
LCP = (int *)malloc((size_t)n * sizeof(int));
if((T == NULL) || (SA == NULL) || (LCP == NULL)) {
fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
exit(EXIT_FAILURE);
}
/* Read n bytes of data. */
if(fread(T, sizeof(unsigned char), (size_t)n, fp) != (size_t)n) {
fprintf(stderr, "%s: %s `%s': ",
argv[0],
(ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
argv[1]);
perror(NULL);
exit(EXIT_FAILURE);
}
fclose(fp);
if (n < 256) printf("%s\n", T);
T[n-1]=0;
/* int ii; */
/* for (ii=0;ii<n;++ii) printf("%i,", (int)T[ii]); printf("\n"); */
if (n < 256) printf("%s\n", T);
int j;
/* for (j = 0; j < n; j++) printf("%i,", (int) T[j]); printf("\n"); */
/* Construct the suffix array. */
fprintf(stderr, "%s: %ld bytes ... \n", fname, n);
start = clock();
if(sais(T, SA, LCP, (int)n) != 0) {
fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
exit(EXIT_FAILURE);
}
finish = clock();
fprintf(stderr, "induced: %.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
/* // check LCP: */
/* int i,l; */
/* for (i = 1; i < n; ++i) { */
/* l = 0; */
/* while (T[SA[i]+l]==T[SA[i-1]+l]) ++l; */
/* if (l != LCP[i]) { */
/* printf("Error at position %i\n", i); */
/* printf("%i vs. %i\n", l, LCP[i]); */
/* for (j = 0; j < 10; j++) printf("%c", T[SA[i]+j]); printf("\n"); */
/* for (j = 0; j < 10; j++) printf("%c", T[SA[i-1]+j]); printf("\n"); */
/* exit(-1); */
/* } */
/* } */
// naive LCP:
start = clock();
int i,l;
for (i = 1; i < n; ++i) {
l = 0;
while (T[SA[i]+l]==T[SA[i-1]+l]) ++l;
LCP[i] = l;
}
finish = clock();
fprintf(stderr, "naive: %.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
/* Deallocate memory. */
free(SA);
free(LCP);
free(T);
return 0;
}
/*
* test.c for sais-lite
* Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sais.h"
static
int
cmp_suf(const unsigned char *T, int n, int p1, int p2) {
int r, s = (p1 < p2) ? 1 : ((p1 > p2) ? -1 : 0);
for(r = 0; (p1 < n) && (p2 < n) && ((r = T[p1] - T[p2]) == 0); ++p1, ++p2) { }
return (r != 0) ? r : s;
}
int
main(int argc, const char *argv[]) {
unsigned char *T1;
int *T3;
unsigned char *T1BWT;
int *T3BWT;
int *SA1;
int *SA3;
int *A;
int i, j, n, p1, p3;
unsigned int bits;
fprintf(stdout, "start test...\n");
for(n = 1; n <= 24; ++n) {
T1 = malloc(n * sizeof(unsigned char));
T1BWT = malloc(n * sizeof(unsigned char));
T3 = malloc(n * sizeof(int));
T3BWT = malloc(n * sizeof(int));
SA1 = malloc(n * sizeof(int));
SA3 = malloc(n * sizeof(int));
A = malloc(n * sizeof(int));
for(bits = 0; bits < (1U << n); ++bits) {
if((bits & 4095) == 0) {
fprintf(stderr, " n=%2d : %3d%%\r", n, (int)((double)bits / (double)((1U << n) - 1) * 100.0));
}
for(i = 0; i < n; ++i) {
T1[i] = (bits >> i) & 1;
T3[i] = T1[i] * 511;
}
/* construct sa and bwt */
if(sais(T1, SA1, n) != 0) {
fprintf(stderr, " n=%2d, bits=%u : failure - sais\n", n, bits);
exit(EXIT_FAILURE);
}
if((p1 = sais_bwt(T1, T1BWT, A, n)) < 0) {
fprintf(stderr, " n=%2d, bits=%u : failure - sais_bwt\n", n, bits);
exit(EXIT_FAILURE);
}
if(sais_int(T3, SA3, n, 512) != 0) {
fprintf(stderr, " n=%2d, bits=%u : failure - sais_int\n", n, bits);
exit(EXIT_FAILURE);
}
if((p3 = sais_int_bwt(T3, T3BWT, A, n, 512)) < 0) {
fprintf(stderr, " n=%2d, bits=%u : failure - sais_int_bwt\n", n, bits);
exit(EXIT_FAILURE);
}
/* check SA1 */
for(i = 1; i < n; ++i) {
if(0 <= cmp_suf(T1, n, SA1[i - 1], SA1[i])) {
fprintf(stderr, " n=%2d, bits=%u : failure - SA1\n", n, bits);
for(i = 0; i < n; ++i) {
fprintf(stderr, " SA[%d]=%d: ", i, SA1[i]);
for(j = SA1[i]; j < n; ++j) { fprintf(stderr, "%d", T1[j]); }
fprintf(stderr, "\n");
}
exit(EXIT_FAILURE);
}
}
/* check SA3 */
for(i = 0; i < n; ++i) {
if(SA1[i] != SA3[i]) {
fprintf(stderr, " n=%2d, bits=%u : failure - SA3\n", n, bits);
for(i = 0; i < n; ++i) {
fprintf(stderr, " SA1[%d]=%d, SA3[%d]=%d: ", i, SA1[i], i, SA3[i]);
for(j = SA3[i]; j < n; ++j) { fprintf(stderr, "%d", T3[j] / 511); }
fprintf(stderr, "\n");
}
exit(EXIT_FAILURE);
}
}
/* check T1BWT */
for(i = 0, j = 0; i <= n; ++i) {
if(i != 0) {
if(SA1[i - 1] == 0) { if(p1 != i) { break; } }
else if(n <= j) { break; }
else { if(T1BWT[j++] != T1[SA1[i - 1] - 1]) { break; } }
} else {
if(T1BWT[j++] != T1[n - 1]) { break; }
}
}
if((i != (n + 1)) || (j != n)) {
fprintf(stderr, " n=%2d, bits=%u : failure - T1BWT\n", n, bits);
fprintf(stderr, " T1BWT=");
for(i = 0; i < n; ++i) { fprintf(stderr, "%d", T1BWT[i]); }
fprintf(stderr, ", p1=%d\n", p1);
for(i = 0; i < n; ++i) {
fprintf(stderr, " SA[%d]=%d: ", i, SA1[i]);
for(j = SA1[i]; j < n; ++j) { fprintf(stderr, "%d", T1[j]); }
fprintf(stderr, " ");
for(j = 0; j < SA1[i]; ++j) { fprintf(stderr, "%d", T1[j]); }
fprintf(stderr, "\n");
}
exit(EXIT_FAILURE);
}
/* check T3BWT */
for(i = 0; i < n; ++i) { if(T1BWT[i] != T3BWT[i] / 511) { break; } }
if((i != n) || (p1 != p3)) {
fprintf(stderr, " n=%2d, bits=%u : failure - T3BWT\n", n, bits);
fprintf(stderr, " T3BWT=");
for(i = 0; i < n; ++i) { fprintf(stderr, "%d", T3BWT[i]); }
fprintf(stderr, ", p3=%d\n", p3);
for(i = 0; i < n; ++i) {
fprintf(stderr, " SA[%d]=%d: ", i, SA3[i]);
for(j = SA3[i]; j < n; ++j) { fprintf(stderr, "%d", T3[j] / 511); }
fprintf(stderr, " ");
for(j = 0; j < SA3[i]; ++j) { fprintf(stderr, "%d", T3[j] / 511); }
fprintf(stderr, "\n");
}
exit(EXIT_FAILURE);
}
}
fprintf(stderr, " n=%2d : success\n", n);
free(T1);
free(T1BWT);
free(T3);
free(T3BWT);
free(SA1);
free(SA3);
free(A);
}
fprintf(stderr, "finish test\n");
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment