2 * Knuth-Morris-Pratt's Substring Search for N given strings
4 * (c) 1999--2005, Robert Spalek <robert@ucw.cz>
8 #include "lib/mempool.h"
10 #include "sherlock/tagged-text.h"
11 #include "lib/unicode.h"
13 #define KMP_GET_CHAR KMP_GET_RAW
21 #define TRACE(level, mask...) if (0) fprintf(stderr, mask)
24 kmp_new(struct mempool *mp, int words_len, uns modify_flags)
26 struct kmp *kmp = mp_alloc_zero(mp, sizeof(struct kmp));
28 kmp->modify_flags = modify_flags;
29 kmp->words_len = words_len;
33 kmp->g.sons = mp_alloc_zero(mp, size * sizeof(struct list));
34 init_list(kmp->g.sons + 0);
36 size = words_len * fls(words_len);
39 kmp->g.hash_size = size;
40 kmp->g.chain = mp_alloc_zero(mp, size * sizeof(struct kmp_transition *));
41 kmp->f = mp_alloc_zero(mp, words_len * sizeof(kmp_state_t));
42 kmp->out = mp_alloc_zero(mp, words_len * sizeof(struct kmp_output *));
47 * The only merge operation is that son includes output of his father (and also
48 * his father,...), so we can merge the link-lists.
51 merge_output(struct kmp_output **target, struct kmp_output *src)
54 target = &(*target)->next;
58 static struct kmp_output *
59 new_output(struct kmp *kmp, uns id, uns len)
61 struct kmp_output *out = mp_alloc(kmp->mp, sizeof(struct kmp_output));
69 kmp_enter_raw_string(struct kmp *kmp, const byte *str, uns id)
71 struct kmp_transition tr = { .next=NULL, .from=0 }, **prev;
72 struct kmp_output *new_out;
73 const byte *orig_str = str;
77 TRACE(20, "kmp.c: Entering string %s", str);
78 kmp_get_char(&str, &c, 0);
85 prev = transition_search(&kmp->g, &tr);
88 tr.from = (*prev)->to;
89 kmp_get_char(&str, &c, 0);
94 *prev = mp_alloc_zero(kmp->mp, sizeof(struct kmp_transition));
95 tr.to = kmp->g.count++;
97 add_tail(kmp->g.sons + tr.from, &(*prev)->n);
98 init_list(kmp->g.sons + tr.to);
99 kmp_get_char(&str, &c, 0);
103 prev = transition_search(&kmp->g, &tr);
106 if (kmp->out[tr.from])
107 TRACE(5, "kmp.c: string %s is inserted more than once", orig_str);
108 new_out = new_output(kmp, id, len-1);
109 merge_output(kmp->out + tr.from, new_out);
113 construct_f_out(struct kmp *kmp)
117 struct kmp_transition *son;
119 fifo = alloca(kmp->words_len * sizeof(kmp_state_t));
122 WALK_LIST(son, kmp->g.sons[0])
124 ASSERT(son->from == 0);
126 fifo[write++] = son->to;
128 while (read != write)
132 WALK_LIST(son, kmp->g.sons[r])
134 struct kmp_transition tr, **prev;
135 ASSERT(son->from == r);
143 prev = transition_search(&kmp->g, &tr);
144 if (*prev || !tr.from)
148 kmp->f[s] = *prev ? (*prev)->to : 0;
149 merge_output(kmp->out + s, kmp->out[ kmp->f[s] ]);
155 kmp_build(struct kmp *kmp)
157 ASSERT(kmp->g.count <= kmp->words_len);
158 construct_f_out(kmp);
159 if (kmp->words_len > 1)
160 TRACE(0, "Built KMP with modify flags %d for total words len %d, it has %d nodes", kmp->modify_flags, kmp->words_len, kmp->g.count);