2 * Knuth-Morris-Pratt's Substring Search for N given strings
4 * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
5 * (c) 2006, Pavel Charvat <pchar@ucw.cz>
7 * (In fact, the algorithm is usually referred to as Aho-McCorasick,
8 * but that's just an extension of KMP to multiple strings.)
12 * This is not a normal header file, it's a generator of KMP algorithm.
13 * Each time you include it with parameters set in the corresponding
14 * preprocessor macros, it generates KMP structures and functions
15 * with the parameters given.
18 * [*] KMP_PREFIX(x) macro to add a name prefix (used on all global names
19 * defined by the KMP generator).
21 * KMP_CHAR alphabet type, the default is u16
23 * KMP_SOURCE user-defined source; KMP_GET_CHAR must
24 * return next character from the input or zero at the end;
25 * if not defined, zero-terminated array of bytes is used as the input
26 * KMP_GET_CHAR(ctx,src,c)
28 * KMP_NODE user-defined data stored in each added string
30 * Parameters to default get_char():
31 * KMP_USE_ASCII reads single bytes from the input (default)
32 * KMP_USE_UTF8 reads UTF-8 characters from the input (valid UTF-8 needed)
33 * KMP_TOLOWER converts all to lowercase
34 * KMP_UNACCENT removes accents
35 * KMP_ONLYALPHA converts nonalphas to KMP_CONTROL_CHAR
36 * KMP_CONTROL_CHAR special control character (default is ':')
38 * Parameters to add():
39 * KMP_ADD_EXTRA_ARGS extra arguments
40 * KMP_ADD_EXTRA_VAR structure with extra local varriables
41 * KMP_ADD_INIT(ctx,src,v)
42 * KMP_ADD_NEW(ctx,src,v,s)
43 * KMP_ADD_DUP(ctx,src,v,s)
44 * KMP_NO_DUPS no support for duplicates
46 * Parameters to build():
47 * KMP_BUILD_STATE(ctx,s) called for all states (including null) in order of non-decreasing tree depth
49 * KMP_WANT_CLEANUP cleanup()
50 * KMP_WANT_SEARCH includes lib/kmp-search.h with the same prefix;
51 * there can be multiple search variants for a single KMP structure
53 * KMP_USE_POOL allocates on a given pool
57 #error Missing KMP_PREFIX
60 #include "lib/mempool.h"
63 #define P(x) KMP_PREFIX(x)
66 typedef KMP_CHAR P(char_t);
68 typedef u16 P(char_t);
74 typedef KMP_NODE P(node_t);
76 typedef struct {} P(node_t);
80 struct P(state) *from; /* state with previous character */
81 struct P(state) *back; /* backwards edge to the largest shorter state */
82 struct P(state) *next; /* largest shorter match */
83 P(len_t) len; /* largest match, zero otherwise */
84 P(char_t) c; /* last character */
85 P(node_t) n; /* user-defined data */
89 static inline P(char_t)
90 P(control_char) (void)
92 #ifdef KMP_CONTROL_CHAR
93 return KMP_CONTROL_CHAR;
99 /* User-defined source */
100 struct P(hash_table);
103 P(hash_hash) (struct P(hash_table) *t UNUSED, struct P(state) *f, P(char_t) c)
105 return (((uns)c) << 16) + (uns)(addr_int_t)f;
109 P(hash_eq) (struct P(hash_table) *t UNUSED, struct P(state) *f1, P(char_t) c1, struct P(state) *f2, P(char_t) c2)
111 return f1 == f2 && c1 == c2;
115 P(hash_init_key) (struct P(hash_table) *t UNUSED, struct P(state) *s, struct P(state) *f, P(char_t) c)
117 memset(s, 0, sizeof(*s));
120 s->next = f->back; /* the pointers hold the link-list of sons... change in build() */
125 #define HASH_PREFIX(x) KMP_PREFIX(GLUE(hash_,x))
126 #define HASH_NODE struct KMP_PREFIX(state)
127 #define HASH_KEY_COMPLEX(x) x from, x c
128 #define HASH_KEY_DECL struct KMP_PREFIX(state) *from, KMP_PREFIX(char_t) c
129 #define HASH_WANT_NEW
130 #define HASH_WANT_FIND
131 #ifdef KMP_WANT_CLEANUP
132 #define HASH_WANT_CLEANUP
134 #define HASH_GIVE_HASHFN
136 #define HASH_GIVE_INIT_KEY
138 #define HASH_USE_POOL KMP_USE_POOL
140 #define HASH_AUTO_POOL 4096
142 #define HASH_CONSERVE_SPACE
143 #define HASH_TABLE_DYNAMIC
144 #include "lib/hashtable.h"
145 #define P(x) KMP_PREFIX(x)
148 struct P(hash_table) hash; /* hash table*/
149 struct P(state) null; /* null state */
153 typedef KMP_SOURCE P(source_t);
155 typedef byte *P(source_t);
160 P(get_char) (struct P(context) *ctx, P(source_t) *src, P(char_t) *c)
162 return KMP_GET_CHAR(ctx, *src, *c);
165 # if defined(KMP_USE_UTF8)
166 # include "lib/unicode.h"
167 # if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER) || defined(KMP_UNACCENT)
168 # include "charset/unicat.h"
170 # elif defined(KMP_USE_ASCII)
171 # if defined(KMP_ONLYALPHA) || defined(KMP_TOLOWER)
172 # include "lib/chartype.h"
176 P(get_char) (struct P(context) *ctx UNUSED, P(source_t) *src, P(char_t) *c)
180 *src = (byte *)utf8_get(*src, &cc);
181 # ifdef KMP_ONLYALPHA
182 if (unlikely(!cc)) {}
183 else if (!Ualpha(cc))
184 cc = P(control_char)();
197 # ifdef KMP_ONLYALPHA
198 if (unlikely(!cc)) {}
199 else if (!Calpha(cc))
200 cc = P(control_char)();
212 static struct P(state) *
213 P(add) (struct P(context) *ctx, P(source_t) src
214 # ifdef KMP_ADD_EXTRA_ARGS
219 # ifdef KMP_ADD_EXTRA_VAR
223 { KMP_ADD_INIT(ctx, src, v); }
227 if (unlikely(!P(get_char)(ctx, &src, &c)))
229 struct P(state) *p = &ctx->null, *s;
233 s = P(hash_find)(&ctx->hash, p, c);
237 s = P(hash_new)(&ctx->hash, p, c);
239 if (unlikely(!(P(get_char)(ctx, &src, &c))))
246 while (P(get_char)(ctx, &src, &c));
253 { KMP_ADD_DUP(ctx, src, v, s); }
261 { KMP_ADD_NEW(ctx, src, v, s); }
267 P(init) (struct P(context) *ctx)
269 memset(ctx, 0, sizeof(*ctx));
270 P(hash_init)(&ctx->hash);
273 #ifdef KMP_WANT_CLEANUP
275 P(cleanup) (struct P(context) *ctx)
277 P(hash_cleanup)(&ctx->hash);
282 P(empty) (struct P(context) *ctx)
284 return !ctx->hash.hash_count;
288 P(build) (struct P(context) *ctx)
292 uns read = 0, write = 0;
293 struct P(state) *fifo[ctx->hash.hash_count], *null = &ctx->null;
294 for (struct P(state) *s = null->back; s; s = s->next)
297 # ifdef KMP_BUILD_STATE
298 { KMP_BUILD_STATE(ctx, null); }
300 while (read != write)
302 struct P(state) *s = fifo[read++], *t;
303 for (t = s->back; t; t = t->next)
305 for (t = s->from->back; 1; t = t->back)
313 s->back = P(hash_find)(&ctx->hash, t, s->c);
316 s->next = s->back->len ? s->back : s->back->next;
320 # ifdef KMP_BUILD_STATE
321 { KMP_BUILD_STATE(ctx, s); }
336 #undef KMP_CONTROL_CHAR
337 #undef KMP_ADD_EXTRA_ARGS
338 #undef KMP_ADD_EXTRA_VAR
343 #undef KMP_BUILD_STATE
346 #ifdef KMP_WANT_SEARCH
347 # undef KMP_WANT_SEARCH
348 # define KMPS_PREFIX(x) KMP_PREFIX(x)
349 # define KMPS_KMP_PREFIX(x) KMP_PREFIX(x)
350 # include "lib/kmp-search.h"