2 * Knuth-Morris-Pratt's Substring Search for N given strings
4 * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
5 * (c) 2006, Pavel Charvat <pchar@ucw.cz>
7 * (In fact, the algorithm is usually referred to as Aho-McCorasick,
8 * but that's just an extension of KMP to multiple strings.)
12 * This is not a normal header file, it's a generator of KMP algorithm.
13 * Each time you include it with parameters set in the corresponding
14 * preprocessor macros, it generates KMP structures and functions
15 * with the parameters given. See ucw/kmp.h before reading this description.
19 * struct search structure with both the internal and the user-defined variables
20 * used during the search and accessible from all macros
22 * void search(kmp,search,src) executes the search; search structure is allocated by the caller (possible input/output)
24 * void run(kmp,src) the same, but automatically allocates search structre from the stack
27 * Parameters to the generator (these marked with [*] are mandatory):
29 * [*] KMPS_PREFIX(x) macro to add a name prefix (used on all global names
30 * defined by the KMP search generator)
31 * [*] KMPS_KMP_PREFIX(x) prefix used for ucw/kmp.h
33 * KMPS_SOURCE user-defined text source (together with KMPS_GET_CHAR);
34 * if unset, the one from ucw/kmp.h is taken
35 * KMPS_GET_CHAR(kmp,src,search) analogy to KMP_GET_CHAR, but it must store the next character to search->c
37 * KMPS_ADD_CONTROLS add control characters (see KMP_CONTROL_CHAR in kmp.h) at both ends of the input string
38 * KMPS_MERGE_CONTROLS merge adjacent control characters to a single one
40 * KMPS_VARS user-defined variables in struct search (in .u substructure to avoid collisions)
42 * KMPS_INIT(kmp,src,search) statement executed at the beginning of search()
43 * KMPS_EXIT(kmp,src,search) ... at the end
44 * KMPS_STEP(kmp,src,search) ... after each step (read of next character + current state update)
45 * of the algorithm, but before KMPS_FOUND[_CHAIN]
46 * KMPS_FOUND_CHAIN(kmp,src,search) ... for each state representing locally longest match
47 * (stored in search->out - NOT necessary search->s!);
48 * all matches form a NULL-terminated link list (search->out, search->out->next, ...)
49 * in order of decreasing length
50 * KMPS_FOUND(kmp,src,search) ... called for every match (in search->out)
51 * KMPS_WANT_BEST algorithm computes globally longest match, which is available
52 * in search->best in KMPS_EXIT; if there is no match, it points to the null state
55 #define P(x) KMPS_PREFIX(x)
56 #define KP(x) KMPS_KMP_PREFIX(x)
59 typedef KMPS_SOURCE P(search_source_t);
61 typedef KP(source_t) P(search_source_t);
65 #define KMPS_GET_CHAR(kmp,src,s) (KP(get_char)(kmp, &src, &s->c))
69 struct KP(state) *s; /* current state */
70 struct KP(state) *out; /* output state */
71 # ifdef KMPS_WANT_BEST
72 struct KP(state) *best; /* longest match */
74 KP(char_t) c; /* last character */
75 # ifdef KMPS_ADD_CONTROLS
81 } u; /* user-defined */
86 P(search) (struct KP(struct) *kmp, struct P(search) *s, P(search_source_t) src)
89 # ifdef KMPS_WANT_BEST
92 # ifdef KMPS_ADD_CONTROLS
99 { KMPS_INIT(kmp, src, s); }
101 # ifndef KMPS_ADD_CONTROLS
106 for (struct KP(state) *t = s->s; t && !(s->s = KP(hash_find)(&kmp->hash, t, s->c)); t = t->back);
107 s->s = s->s ? : &kmp->null;
110 { KMPS_STEP(kmp, src, s); }
113 # if defined(KMPS_FOUND) || defined(KMPS_FOUND_CHAIN) || defined(KMPS_WANT_BEST)
114 s->out = s->s->len ? s->s : s->s->next;
117 # ifdef KMPS_WANT_BEST
118 if (s->out->len > s->best->len)
121 # ifdef KMPS_FOUND_CHAIN
122 { KMPS_FOUND_CHAIN(kmp, src, s); }
126 { KMPS_FOUND(kmp, src, s); }
127 while (s->out = s->out->next);
132 # ifdef KMPS_ADD_CONTROLS
137 # ifndef KMPS_ADD_CONTROLS
140 # ifdef KMPS_MERGE_CONTROLS
141 KP(char_t) last_c = s->c;
146 if (!KMPS_GET_CHAR(kmp, src, s))
148 # ifdef KMPS_ADD_CONTROLS
149 if (!KP(is_control)(kmp, s->c))
151 s->c = KP(control)();
160 # ifdef KMPS_MERGE_CONTROLS
161 || (KP(is_control)(kmp, last_c) && KP(is_control)(kmp, s->c))
167 { KMPS_EXIT(kmp, src, s); }
172 P(run) (struct KP(struct) *kmp, P(search_source_t) src)
174 struct P(search) search;
175 P(search)(kmp, &search, src);
180 #undef KMPS_KMP_PREFIX
183 #undef KMPS_ADD_CONTROLS
184 #undef KMPS_MERGE_CONTROLS
189 #undef KMPS_FOUND_CHAIN
190 #undef KMPS_WANT_BEST