$(o)/lib/redblack-test: $(o)/lib/redblack-test.o $(LIBUCW)
$(o)/lib/binheap-test: $(o)/lib/binheap-test.o $(LIBUCW)
$(o)/lib/lizard-test: $(o)/lib/lizard-test.o $(LIBUCW)
+$(o)/lib/kmp-test: $(o)/lib/kmp-test.o $(LIBUCW) $(LIBCHARSET)
TESTS+=$(addprefix $(o)/lib/,regex.test unicode-utf8.test hash-test.test mempool.test stkstring.test slists.test)
$(o)/lib/regex.test: $(o)/lib/regex-t
--- /dev/null
+/*
+ * Knuth-Morris-Pratt's Substring Search for N given strings
+ *
+ * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
+ * (c) 2006, Pavel Charvat <pchar@ucw.cz>
+ *
+ * (In fact, the algorithm is usually referred to as Aho-McCorasick,
+ * but that's just an extension of KMP to multiple strings.)
+ */
+
+/*
+ * This is not a normal header file, it's a generator of KMP algorithm.
+ * Each time you include it with parameters set in the corresponding
+ * preprocessor macros, it generates KMP structures and functions
+ * with the parameters given.
+ *
+ *
+ * [*] KMP_PREFIX(x) macro to add a name prefix (used on all global names
+ * defined by the KMP generator).
+ *
+ * KMP_CHAR alphabet type, the default is u16
+ *
+ * KMP_SOURCE user-defined source; KMP_GET_CHAR must
+ * return next character from the input or zero at the end;
+ * if not defined, zero-terminated array of bytes is used as the input
+ * KMP_GET_CHAR(ctx,src,c)
+ *
+ * KMP_NODE user-defined data stored in each added string
+ *
+ * Parameters to default get_char():
+ * KMP_USE_ASCII reads single bytes from the input (default)
+ * KMP_USE_UTF8 reads UTF-8 characters from the input (valid UTF-8 needed)
+ * KMP_TOLOWER converts all to lowercase
+ * KMP_UNACCENT removes accents
+ * KMP_ONLYALPHA converts nonalphas to KMP_CONTROL_CHAR
+ * KMP_CONTROL_CHAR special control character (default is ':')
+ *
+ * Parameters to add():
+ * KMP_ADD_EXTRA_ARGS extra arguments
+ * KMP_ADD_EXTRA_VAR structure with extra local varriables
+ * KMP_ADD_INIT(ctx,src,v)
+ * KMP_ADD_NEW(ctx,src,v,s)
+ * KMP_ADD_DUP(ctx,src,v,s)
+ * KMP_NO_DUPS no support for duplicates
+ *
+ * Parameters to build():
+ * KMP_BUILD_STATE(ctx,s) called for all states (except null) in order of non-decreasing tree depth
+ *
+ * KMP_WANT_CLEANUP cleanup()
+ * KMP_WANT_SEARCH includes lib/kmp-search.h with the same prefix;
+ * there can be multiple search variants for a single KMP structure
+ *
+ * KMP_USE_POOL allocates on a given pool
+ */
+
+#ifndef KMP_PREFIX
+#error Missing KMP_PREFIX
+#endif
+
+#include "lib/mempool.h"
+#include <alloca.h>
+
+#define P(x) KMP_PREFIX(x)
+
+#ifdef KMP_CHAR
+typedef KMP_CHAR P(char_t);
+#else
+typedef u16 P(char_t);
+#endif
+
+typedef u32 P(len_t);
+
+#ifdef KMP_NODE
+typedef KMP_NODE P(node_t);
+#else
+typedef struct {} P(node_t);
+#endif
+
+struct P(state) {
+ struct P(state) *from; /* state with previous character */
+ struct P(state) *back; /* backwards edge to the largest shorter state */
+ struct P(state) *next; /* largest shorter match */
+ P(len_t) len; /* largest match, zero otherwise */
+ P(char_t) c; /* last character */
+ P(node_t) n; /* user-defined data */
+};
+
+/* Control char */
+static inline P(char_t)
+P(control_char) (void)
+{
+#ifdef KMP_CONTROL_CHAR
+ return KMP_CONTROL_CHAR;
+#else
+ return ':';
+#endif
+}
+
+/* User-defined source */
+struct P(hash_table);
+
+static inline uns
+P(hash_hash) (struct P(hash_table) *t UNUSED, struct P(state) *f, P(char_t) c)
+{
+ return (((uns)c) << 16) + (uns)(addr_int_t)f;
+}
+
+static inline int
+P(hash_eq) (struct P(hash_table) *t UNUSED, struct P(state) *f1, P(char_t) c1, struct P(state) *f2, P(char_t) c2)
+{
+ return f1 == f2 && c1 == c2;
+}
+
+static inline void
+P(hash_init_key) (struct P(hash_table) *t UNUSED, struct P(state) *s, struct P(state) *f, P(char_t) c)
+{
+ s->from = f;
+ s->c = c;
+ s->len = 0;
+ s->back = NULL;
+ s->next = f->back; /* the pointers hold the link-list of sons... change in build() */
+ f->back = s;
+}
+
+#undef P
+#define HASH_PREFIX(x) KMP_PREFIX(GLUE(hash_,x))
+#define HASH_NODE struct KMP_PREFIX(state)
+#define HASH_KEY_COMPLEX(x) x from, x c
+#define HASH_KEY_DECL struct KMP_PREFIX(state) *from, KMP_PREFIX(char_t) c
+#define HASH_WANT_NEW
+#define HASH_WANT_FIND
+#ifdef KMP_WANT_CLEANUP
+#define HASH_WANT_CLEANUP
+#endif
+#define HASH_GIVE_HASHFN
+#define HASH_GIVE_EQ
+#define HASH_GIVE_INIT_KEY
+#ifdef KMP_USE_POOL
+#define HASH_USE_POOL KMP_USE_POOL
+#else
+#define HASH_AUTO_POOL 4096
+#endif
+#define HASH_CONSERVE_SPACE
+#define HASH_TABLE_DYNAMIC
+#include "lib/hashtable.h"
+#define P(x) KMP_PREFIX(x)
+
+struct P(context) {
+ struct P(hash_table) hash; /* hash table*/
+ struct P(state) null; /* null state */
+};
+
+#ifdef KMP_SOURCE
+typedef KMP_SOURCE P(source_t);
+#else
+typedef byte *P(source_t);
+#endif
+
+#ifdef KMP_GET_CHAR
+static inline int
+P(get_char) (struct P(context) *ctx, P(source_t) *src, P(char_t) *c)
+{
+ return KMP_GET_CHAR(*ctx, *src, *c);
+}
+#else
+# if defined(KMP_USE_UTF8)
+# include "lib/unicode.h"
+# if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER) || defined(KMP_UNACCENT)
+# include "charset/unicat.h"
+# endif
+# elif defined(KMP_USE_ASCII)
+# if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER)
+# include "lib/chartype.h"
+# endif
+# endif
+static inline int
+P(get_char) (struct P(context) *ctx UNUSED, P(source_t) *src, P(char_t) *c)
+{
+# ifdef KMP_USE_UTF8
+ uns cc;
+ *src = (byte *)utf8_get(*src, &cc);
+# ifdef KMP_ONLYALPHA
+ if (unlikely(!cc)) {}
+ else if (!Ualpha(cc))
+ cc = P(control_char)();
+ else
+# endif
+ {
+# ifdef KMP_TOLOWER
+ cc = Utolower(cc);
+# endif
+# ifdef KMP_UNACCENT
+ cc = Uunaccent(cc);
+# endif
+ }
+# else
+ uns cc = *(*src)++;
+# ifdef KMP_ONLYALPHA
+ if (unlikely(!cc)) {}
+ else if (!Calpha(cc))
+ cc = P(control_char)();
+ else
+# endif
+# ifdef KMP_TOLOWER
+ cc = Clocase(c);
+# endif
+# endif
+ *c = cc;
+ return !!cc;
+}
+#endif
+
+static struct P(state) *
+P(add) (struct P(context) *ctx, P(source_t) src
+# ifdef KMP_ADD_EXTRA_ARGS
+ , KMP_ADD_EXTRA_ARGS
+# endif
+)
+{
+# ifdef KMP_ADD_EXTRA_VAR
+ KMP_ADD_EXTRA_VAR v;
+# endif
+# ifdef KMP_ADD_INIT
+ { KMP_ADD_INIT(ctx, src, v); }
+# endif
+
+ P(char_t) c;
+ if (unlikely(!P(get_char)(ctx, &src, &c)))
+ return NULL;
+ struct P(state) *p = &ctx->null, *s;
+ uns len = 0;
+ do
+ {
+ s = P(hash_find)(&ctx->hash, p, c);
+ if (!s)
+ for (;;)
+ {
+ s = P(hash_new)(&ctx->hash, p, c);
+ len++;
+ if (unlikely(!(P(get_char)(ctx, &src, &c))))
+ goto enter_new;
+ p = s;
+ }
+ p = s;
+ len++;
+ }
+ while (P(get_char)(ctx, &src, &c));
+# ifdef KMP_NO_DUPS
+ ASSERT(!s->len);
+# else
+ if (s->len)
+ {
+# ifdef KMP_ADD_DUP
+ { KMP_ADD_DUP(ctx, src, v, s); }
+# endif
+ return s;
+ }
+# endif
+enter_new:
+ s->len = len;
+# ifdef KMP_ADD_NEW
+ { KMP_ADD_NEW(ctx, src, v, s); }
+# endif
+ return s;
+}
+
+static void
+P(init) (struct P(context) *ctx)
+{
+ memset(ctx, 0, sizeof(*ctx));
+ P(hash_init)(&ctx->hash);
+}
+
+#ifdef KMP_WANT_CLEANUP
+static inline void
+P(cleanup) (struct P(context) *ctx)
+{
+ P(hash_cleanup)(&ctx->hash);
+}
+#endif
+
+static inline int
+P(empty) (struct P(context) *ctx)
+{
+ return !ctx->hash.hash_count;
+}
+
+static void
+P(build) (struct P(context) *ctx)
+{
+ if (P(empty)(ctx))
+ return;
+ uns read = 0, write = 0;
+ struct P(state) *fifo[ctx->hash.hash_count];
+ for (struct P(state) *s = ctx->null.back; s; s = s->next)
+ fifo[write++] = s;
+ ctx->null.back = NULL;
+ while (read != write)
+ {
+ struct P(state) *s = fifo[read++], *t;
+ for (t = s->back; t; t = t->next)
+ fifo[write++] = t;
+ for (t = s->from->back; 1; t = t->back)
+ {
+ if (!t)
+ {
+ s->back = &ctx->null;
+ s->next = NULL;
+ break;
+ }
+ s->back = P(hash_find)(&ctx->hash, t, s->c);
+ if (s->back)
+ {
+ s->next = s->back->len ? s->back : s->back->next;
+ break;
+ }
+ }
+#ifdef KMP_BUILD_STATE
+ { KMP_BUILD_STATE(ctx, s); }
+#endif
+ }
+}
+
+#undef P
+#undef KMP_CHAR
+#undef KMP_SOURCE
+#undef KMP_GET_CHAR
+#undef KMP_NODE
+#undef KMP_USE_ASCII
+#undef KMP_USE_UTF8
+#undef KMP_TOLOWER
+#undef KMP_UNACCENT
+#undef KMP_ONLYALPHA
+#undef KMP_CONTROL_CHAR
+#undef KMP_ADD_EXTRA_ARGS
+#undef KMP_ADD_EXTRA_VAR
+#undef KMP_ADD_INIT
+#undef KMP_ADD_NEW
+#undef KMP_ADD_DUP
+#undef KMP_NO_DUPS
+#undef KMP_BUILD_STATE
+#undef KMP_USE_POOL
+
+#ifdef KMP_WANT_SEARCH
+# undef KMP_WANT_SEARCH
+# define KMPS_PREFIX(x) KMP_PREFIX(x)
+# define KMPS_KMP_PREFIX(x) KMP_PREFIX(x)
+# include "lib/kmp-search.h"
+#endif
+
+#undef KMP_PREFIX
--- /dev/null
+/*
+ * Knuth-Morris-Pratt's Substring Search for N given strings
+ *
+ * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
+ * (c) 2006, Pavel Charvat <pchar@ucw.cz>
+ *
+ * (In fact, the algorithm is usually referred to as Aho-McCorasick,
+ * but that's just an extension of KMP to multiple strings.)
+ */
+
+/*
+ * This is not a normal header file, it's a generator of KMP algorithm.
+ * Each time you include it with parameters set in the corresponding
+ * preprocessor macros, it generates KMP structures and functions
+ * with the parameters given.
+ *
+ * [*] KMPS_PREFIX(x) macro to add a name prefix (used on all global names
+ * defined by the KMP search generator).
+ * [*] KMPS_KMP_PREFIX(x) prefix used for lib/kmp.h;
+ * more variants of kmp-search can be used for single lib/kmp.h
+ *
+ * KMPS_SOURCE user-defined search input (together with KMPS_GET_CHAR);
+ * if unset, the one from lib/kmp.h is used
+ * KMPS_GET_CHAR(ctx,src,s)
+ *
+ * KMPS_ADD_CONTROLS adds control characters to start and the end
+ * KMPS_MERGE_CONTROLS merges adjacent control characterss to a single one
+ *
+ * KMPS_EXTRA_ARGS extra arguments to the search routine
+ * KMPS_EXTRA_VAR extra user-defined structure in search structures
+ * KMPS_INIT(ctx,src,s)
+ * KMPS_EXIT(ctx,src,s)
+ * KMPS_FOUND(ctx,src,s)
+ * KMPS_FOUND_CHAIN(ctx,src,s)
+ * KMPS_STEP(ctx,src,s)
+ * KMPS_T
+ *
+ * KMPS_WANT_BEST
+ */
+
+#define P(x) KMPS_PREFIX(x)
+#define KP(x) KMPS_KMP_PREFIX(x)
+
+#ifdef KMPS_SOURCE
+typedef KMPS_SOURCE P(search_source_t);
+#else
+typedef KP(source_t) P(search_source_t);
+#endif
+
+#ifndef KMPS_GET_CHAR
+#define KMPS_GET_CHAR(ctx,src,s) ({ KP(get_char)(ctx, &src, &s.c); })
+#endif
+
+struct P(search) {
+ struct KP(state) *s; /* current state */
+ struct KP(state) *out; /* output state */
+# ifdef KMPS_WANT_BEST
+ struct KP(state) *best; /* largest match */
+# endif
+ KP(char_t) c; /* last character */
+# ifdef KMPS_EXTRA_VAR
+ KMPS_EXTRA_VAR v; /* user-defined */
+# endif
+# ifdef KMPS_ADD_CONTROLS
+ uns eof;
+# endif
+};
+
+#ifdef KMPS_T
+static KMPS_T
+#else
+static void
+#endif
+P(search) (struct KP(context) *ctx, P(search_source_t) src
+# ifdef KMPS_EXTRA_ARGS
+ , KMPS_EXTRA_ARGS
+# endif
+)
+{
+ struct P(search) s;
+ s.s = &ctx->null;
+# ifdef KMPS_WANT_BEST
+ s.best = &ctx->null;
+# endif
+# ifdef KMPS_ADD_CONTROLS
+ s.c = KP(control_char)();
+ s.eof = 0;
+# else
+ s.c = 0;
+# endif
+# ifdef KMPS_INIT
+ { KMPS_INIT(ctx, src, s); }
+# endif
+# ifndef KMPS_ADD_CONTROLS
+ goto start_read;
+#endif
+ for (;;)
+ {
+ for (struct KP(state) *t = s.s; t && !(s.s = KP(hash_find)(&ctx->hash, t, s.c)); t = t->back);
+ s.s = s.s ? : &ctx->null;
+
+# ifdef KMPS_STEP
+ { KMPS_STEP(ctx, src, s); }
+# endif
+
+# if defined(KMPS_FOUND) || defined(KMPS_FOUND_CHAIN) || defined(KMPS_WANT_BEST)
+ s.out = s.s->len ? s.s : s.s->next;
+ if (s.out)
+ {
+# ifdef KMPS_WANT_BEST
+ if (s.out->len > s.best->len)
+ s.best = s.out;
+# endif
+ #ifdef KMPS_FOUND_CHAIN
+ { KMPS_FOUND_CHAIN(ctx, src, s); }
+# endif
+# ifdef KMPS_FOUND
+ do
+ { KMPS_FOUND(ctx, src, s); }
+ while (s.out = s.out->next);
+# endif
+ }
+# endif
+
+# ifdef KMPS_ADD_CONTROLS
+ if (unlikely(s.eof))
+ break;
+# endif
+
+# ifndef KMPS_ADD_CONTROLS
+start_read: ;
+# endif
+# ifdef KMPS_MERGE_CONTROLS
+ KP(char_t) last_c = s.c;
+# endif
+
+ do
+ {
+ if (unlikely(!KMPS_GET_CHAR(ctx, src, s)))
+ {
+# ifdef KMPS_ADD_CONTROLS
+ if (s.c != KP(control_char)())
+ {
+ s.c = KP(control_char)();
+ s.eof = 1;
+ break;
+ }
+# endif
+ goto exit;
+ }
+ }
+ while (0
+# ifdef KMPS_MERGE_CONTROLS
+ || (last_c == KP(control_char)() && s.c == KP(control_char)())
+# endif
+ );
+ }
+exit: ;
+# ifdef KMPS_EXIT
+ { KMPS_EXIT(ctx, src, s); }
+# endif
+}
+
+#undef P
+#undef KMPS_PREFIX
+#undef KMPS_KMP_PREFIX
+#undef KMPS_SOURCE
+#undef KMPS_GET_CHAR
+#undef KMPS_ADD_CONTROLS
+#undef KMPS_MERGE_CONTROLS
+#undef KMPS_EXTRA_ARGS
+#undef KMPS_EXTRA_VAR
+#undef KMPS_INIT
+#undef KMPS_EXIT
+#undef KMPS_FOUND
+#undef KMPS_FOUND_CHAIN
+#undef KMPS_STEP
+#undef KMPS_T
+#undef KMPS_WANT_BEST
--- /dev/null
+#include "lib/lib.h"
+#include "lib/mempool.h"
+#include <string.h>
+
+#if 0
+#define TRACE(x...) do{log(L_DEBUG, x);}while(0)
+#else
+#define TRACE(x...) do{}while(0)
+#endif
+
+#define KMP_PREFIX(x) GLUE_(kmp1,x)
+#define KMP_WANT_CLEANUP
+#define KMP_WANT_SEARCH
+#define KMPS_WANT_BEST
+#define KMPS_T uns
+#define KMPS_EXIT(ctx,src,s) do{ return s.best->len; }while(0)
+#include "lib/kmp-new.h"
+
+static void
+test1(void)
+{
+ log(L_INFO, "Running test1");
+ struct kmp1_context ctx;
+ kmp1_init(&ctx);
+ kmp1_add(&ctx, "ahoj");
+ kmp1_add(&ctx, "hoj");
+ kmp1_add(&ctx, "aho");
+ kmp1_build(&ctx);
+ UNUSED uns best = kmp1_search(&ctx, "asjlahslhalahosjkjhojsas");
+ TRACE("Best match has %d characters", best);
+ ASSERT(best == 3);
+ kmp1_cleanup(&ctx);
+}
+
+#define KMP_PREFIX(x) GLUE_(kmp2,x)
+#define KMP_USE_UTF8
+#define KMP_TOLOWER
+#define KMP_ONLYALPHA
+#define KMP_NODE struct { byte *str; uns id; }
+#define KMP_ADD_EXTRA_ARGS uns id
+#define KMP_ADD_EXTRA_VAR byte *
+#define KMP_ADD_INIT(ctx,src,var) do{ var = src; }while(0)
+#define KMP_ADD_NEW(ctx,src,var,state) do{ TRACE("Inserting string %s with id %d", var, id); \
+ state->n.str = var; state->n.id = id; }while(0)
+#define KMP_ADD_DUP(ctx,src,var,state) do{ TRACE("String %s already inserted", var); }while(0)
+#define KMP_WANT_CLEANUP
+#define KMP_WANT_SEARCH
+#define KMPS_ADD_CONTROLS
+#define KMPS_MERGE_CONTROLS
+#define KMPS_WANT_BEST
+#define KMPS_FOUND(ctx,src,s) do{ TRACE("String %s with id %d found", s.out->n.str, s.out->n.id); }while(0)
+#define KMPS_STEP(ctx,src,s) do{ TRACE("Got to state %p after reading %d", s.s, s.c); }while(0)
+#define KMPS_EXIT(ctx,src,s) do{ if (s.best->len) TRACE("Best match is %s", s.best->n.str); } while(0)
+#include "lib/kmp-new.h"
+
+static void
+test2(void)
+{
+ log(L_INFO, "Running test2");
+ struct kmp2_context ctx;
+ kmp2_init(&ctx);
+ kmp2_add(&ctx, "ahoj", 1);
+ kmp2_add(&ctx, "ahoj", 2);
+ kmp2_add(&ctx, "hoj", 3);
+ kmp2_add(&ctx, "aho", 4);
+ kmp2_add(&ctx, "aba", 5);
+ kmp2_add(&ctx, "aba", 5);
+ kmp2_add(&ctx, "pěl", 5);
+ kmp2_build(&ctx);
+ kmp2_search(&ctx, "Šíleně žluťoučký kůň úpěl ďábelské ódy labababaks sdahojdhsaladsjhla");
+ kmp2_cleanup(&ctx);
+}
+
+#define KMP_PREFIX(x) GLUE_(kmp3,x)
+#define KMP_NODE uns
+#define KMP_ADD_EXTRA_ARGS uns index
+#define KMP_ADD_EXTRA_VAR byte *
+#define KMP_ADD_INIT(ctx,src,v) do{ v = src; }while(0)
+#define KMP_ADD_NEW(ctx,src,v,s) do{ s->n = index; }while(0)
+#define KMP_ADD_DUP(ctx,src,v,s) do{ *v = 0; }while(0)
+#define KMP_WANT_CLEANUP
+#define KMP_WANT_SEARCH
+#define KMPS_EXTRA_ARGS uns *cnt, uns *sum
+#define KMPS_FOUND(ctx,src,s) do{ ASSERT(cnt[s.out->n]); cnt[s.out->n]--; sum[0]--; }while(0)
+#include "lib/kmp-new.h"
+
+static void
+test3(void)
+{
+ log(L_INFO, "Running test3");
+ struct mempool *pool = mp_new(1024);
+ for (uns testn = 0; testn < 100; testn++)
+ {
+ mp_flush(pool);
+ uns n = random_max(100);
+ byte *s[n];
+ struct kmp3_context ctx;
+ kmp3_init(&ctx);
+ for (uns i = 0; i < n; i++)
+ {
+ uns m = random_max(10);
+ s[i] = mp_alloc(pool, m + 1);
+ for (uns j = 0; j < m; j++)
+ s[i][j] = 'a' + random_max(3);
+ s[i][m] = 0;
+ kmp3_add(&ctx, s[i], i);
+ }
+ kmp3_build(&ctx);
+ for (uns i = 0; i < 10; i++)
+ {
+ uns m = random_max(100);
+ byte b[m + 1];
+ for (uns j = 0; j < m; j++)
+ b[j] = 'a' + random_max(4);
+ b[m] = 0;
+ uns cnt[n], sum = 0;
+ for (uns j = 0; j < n; j++)
+ {
+ cnt[j] = 0;
+ if (*s[j])
+ for (uns k = 0; k < m; k++)
+ if (!strncmp(b + k, s[j], strlen(s[j])))
+ cnt[j]++, sum++;
+ }
+ kmp3_search(&ctx, b, cnt, &sum);
+ ASSERT(sum == 0);
+ }
+ kmp3_cleanup(&ctx);
+ }
+ mp_delete(pool);
+}
+
+int
+main(void)
+{
+ test1();
+ test2();
+ test3();
+ return 0;
+}