2 * Knuth-Morris-Pratt's Substring Search for N given strings
4 * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
8 #include "lib/bitops.h"
9 #include "lib/mempool.h"
10 #include "lib/lists.h"
11 #include "lib/unicode.h"
13 #define KMP_GET_CHAR(pos, c, flags) ASSERT(0)
20 #define TRACE(level, mask...) if (0) fprintf(stderr, mask)
23 kmp_new(struct mempool *mp, int words_len, uns modify_flags)
25 struct kmp *kmp = mp_alloc_zero(mp, sizeof(struct kmp));
27 kmp->modify_flags = modify_flags;
28 kmp->words_len = words_len;
32 kmp->g.sons = mp_alloc_zero(mp, size * sizeof(struct list));
33 init_list(kmp->g.sons + 0);
35 size = words_len * bit_fls(words_len);
38 kmp->g.hash_size = size;
39 kmp->g.chain = mp_alloc_zero(mp, size * sizeof(struct kmp_transition *));
40 kmp->f = mp_alloc_zero(mp, words_len * sizeof(kmp_state_t));
41 kmp->out = mp_alloc_zero(mp, words_len * sizeof(struct kmp_output *));
46 * The only merge operation is that son includes output of his father (and also
47 * his father,...), so we can merge the link-lists.
50 merge_output(struct kmp_output **target, struct kmp_output *src)
53 target = &(*target)->next;
57 static struct kmp_output *
58 new_output(struct kmp *kmp, uns id, uns len)
60 struct kmp_output *out = mp_alloc(kmp->mp, sizeof(struct kmp_output));
68 kmp_enter_raw_string(struct kmp *kmp, kmp_char_t *str, uns id)
70 struct kmp_transition tr = { .next=NULL, .from=0 }, **prev;
71 struct kmp_output *new_out;
75 TRACE(20, "kmp.c: Entering string");
83 prev = transition_search(&kmp->g, &tr);
86 tr.from = (*prev)->to;
92 *prev = mp_alloc_zero(kmp->mp, sizeof(struct kmp_transition));
93 tr.to = kmp->g.count++;
95 add_tail(kmp->g.sons + tr.from, &(*prev)->n);
96 init_list(kmp->g.sons + tr.to);
101 prev = transition_search(&kmp->g, &tr);
104 if (kmp->out[tr.from])
105 TRACE(5, "kmp.c: string is inserted more than once");
106 new_out = new_output(kmp, id, len-1);
107 merge_output(kmp->out + tr.from, new_out);
111 construct_f_out(struct kmp *kmp)
115 struct kmp_transition *son;
117 fifo = alloca(kmp->words_len * sizeof(kmp_state_t));
120 WALK_LIST(son, kmp->g.sons[0])
122 ASSERT(son->from == 0);
124 fifo[write++] = son->to;
126 while (read != write)
130 WALK_LIST(son, kmp->g.sons[r])
132 struct kmp_transition tr, **prev;
133 ASSERT(son->from == r);
141 prev = transition_search(&kmp->g, &tr);
142 if (*prev || !tr.from)
146 kmp->f[s] = *prev ? (*prev)->to : 0;
147 merge_output(kmp->out + s, kmp->out[ kmp->f[s] ]);
153 kmp_build(struct kmp *kmp)
155 ASSERT(kmp->g.count <= kmp->words_len);
156 construct_f_out(kmp);
157 if (kmp->words_len > 1)
158 TRACE(0, "Built KMP with modify flags %d for total words len %d, it has %d nodes", kmp->modify_flags, kmp->words_len, kmp->g.count);