2 * Knuth-Morris-Pratt's Substring Search for N given strings
4 * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
8 #include "lib/bitops.h"
9 #include "lib/mempool.h"
10 #include "lib/lists.h"
11 #include "lib/unicode.h"
13 #define KMP_GET_CHAR KMP_GET_RAW
20 #define TRACE(level, mask...) if (0) fprintf(stderr, mask)
23 kmp_new(struct mempool *mp, int words_len, uns modify_flags)
25 struct kmp *kmp = mp_alloc_zero(mp, sizeof(struct kmp));
27 kmp->modify_flags = modify_flags;
28 kmp->words_len = words_len;
32 kmp->g.sons = mp_alloc_zero(mp, size * sizeof(struct list));
33 init_list(kmp->g.sons + 0);
35 size = words_len * bit_fls(words_len);
38 kmp->g.hash_size = size;
39 kmp->g.chain = mp_alloc_zero(mp, size * sizeof(struct kmp_transition *));
40 kmp->f = mp_alloc_zero(mp, words_len * sizeof(kmp_state_t));
41 kmp->out = mp_alloc_zero(mp, words_len * sizeof(struct kmp_output *));
46 * The only merge operation is that son includes output of his father (and also
47 * his father,...), so we can merge the link-lists.
50 merge_output(struct kmp_output **target, struct kmp_output *src)
53 target = &(*target)->next;
57 static struct kmp_output *
58 new_output(struct kmp *kmp, uns id, uns len)
60 struct kmp_output *out = mp_alloc(kmp->mp, sizeof(struct kmp_output));
68 kmp_enter_raw_string(struct kmp *kmp, const byte *str, uns id)
70 struct kmp_transition tr = { .next=NULL, .from=0 }, **prev;
71 struct kmp_output *new_out;
72 const byte *orig_str = str;
76 TRACE(20, "kmp.c: Entering string %s", str);
77 kmp_get_char(&str, &c, 0);
84 prev = transition_search(&kmp->g, &tr);
87 tr.from = (*prev)->to;
88 kmp_get_char(&str, &c, 0);
93 *prev = mp_alloc_zero(kmp->mp, sizeof(struct kmp_transition));
94 tr.to = kmp->g.count++;
96 add_tail(kmp->g.sons + tr.from, &(*prev)->n);
97 init_list(kmp->g.sons + tr.to);
98 kmp_get_char(&str, &c, 0);
102 prev = transition_search(&kmp->g, &tr);
105 if (kmp->out[tr.from])
106 TRACE(5, "kmp.c: string %s is inserted more than once", orig_str);
107 new_out = new_output(kmp, id, len-1);
108 merge_output(kmp->out + tr.from, new_out);
112 construct_f_out(struct kmp *kmp)
116 struct kmp_transition *son;
118 fifo = alloca(kmp->words_len * sizeof(kmp_state_t));
121 WALK_LIST(son, kmp->g.sons[0])
123 ASSERT(son->from == 0);
125 fifo[write++] = son->to;
127 while (read != write)
131 WALK_LIST(son, kmp->g.sons[r])
133 struct kmp_transition tr, **prev;
134 ASSERT(son->from == r);
142 prev = transition_search(&kmp->g, &tr);
143 if (*prev || !tr.from)
147 kmp->f[s] = *prev ? (*prev)->to : 0;
148 merge_output(kmp->out + s, kmp->out[ kmp->f[s] ]);
154 kmp_build(struct kmp *kmp)
156 ASSERT(kmp->g.count <= kmp->words_len);
157 construct_f_out(kmp);
158 if (kmp->words_len > 1)
159 TRACE(0, "Built KMP with modify flags %d for total words len %d, it has %d nodes", kmp->modify_flags, kmp->words_len, kmp->g.count);