2 * UCW Library -- Fast Pattern Matcher for Short Wildcard Patterns (only `?' and `*' supported)
4 * Traditional NFA -> DFA method with on-the-fly DFA construction.
6 * (c) 1999 Martin Mares <mj@ucw.cz>
8 * This software may be freely distributed and used according to the terms
9 * of the GNU Lesser General Public License.
13 #include <ucw/mempool.h>
14 #include <ucw/wildmatch.h>
19 #define MAX_STATES 32 /* Must be <= 32, state 0 is reserved, state 1 is initial */
20 #define MAX_CACHED 256 /* Maximum number of cached DFA states */
21 #define HASH_SIZE 512 /* Number of entries in DFA hash table (at least MAX_CACHED+MAX_STATES) */
25 char ch; /* 0 for non-matching state */
26 byte final; /* Accepting state */
27 u32 match_states; /* States to go to when input character == ch */
28 u32 default_states; /* States to go to whatever the input is */
32 uintptr_t edge[256]; /* Outgoing DFA edges. Bit 0 is set for incomplete edges which
33 * contain just state set and clear for complete ones which point
34 * to other states. NULL means `no match'.
36 u32 nfa_set; /* A set of NFA states this DFA state represents */
37 int final; /* This is an accepting state */
38 struct dfa_state *next; /* Next in the chain of free states */
42 struct nfa_state nfa[MAX_STATES];
43 struct dfa_state *hash[HASH_SIZE];
44 struct dfa_state *dfa_start;
46 uint dfa_cache_counter;
48 struct dfa_state *free_states;
56 return set % HASH_SIZE;
59 static struct dfa_state *
60 wp_new_state(struct wildpatt *w, u32 set)
62 uint h = wp_hash(set);
67 while (d = w->hash[h])
69 if (d->nfa_set == set)
71 h = (h + HASH_SKIP) % HASH_SIZE;
73 if (d = w->free_states)
74 w->free_states = d->next;
76 d = mp_alloc(w->pool, sizeof(*d));
81 for(bit=1; bit <= w->nfa_states; bit++)
84 struct nfa_state *n = &w->nfa[bit];
86 d->edge[(unsigned char)n->ch] |= n->match_states | 1;
88 def_set |= n->default_states;
95 d->edge[i] |= def_set;
97 w->dfa_cache_counter++;
102 wp_compile(const char *p, struct mempool *pool)
107 if (strlen(p) >= MAX_STATES) /* Too long */
109 w = mp_alloc_zero(pool, sizeof(*w));
113 struct nfa_state *n = w->nfa + i;
115 n->default_states |= 1 << (++i);/* Default edge to a new state */
117 n->default_states |= 1 << i; /* Default edge to the same state */
120 n->ch = *p; /* Edge to new state labelled with 'c' */
121 n->match_states = 1 << (++i);
126 w->dfa_start = wp_new_state(w, 1 << 1);
131 wp_prune_cache(struct wildpatt *w)
134 * I was unable to trigger cache overflow on my large set of
135 * test cases, so I decided to handle it in an extremely dumb
139 for(i=0; i<HASH_SIZE; i++)
140 if (w->hash[i] && w->hash[i]->nfa_set != (1 << 1))
142 struct dfa_state *d = w->hash[i];
144 d->next = w->free_states;
147 w->dfa_cache_counter = 1; /* Only the initial state remains */
151 wp_match(struct wildpatt *w, const char *s)
155 if (w->dfa_cache_counter >= MAX_CACHED)
160 uintptr_t next = d->edge[(unsigned char)*s];
163 /* Need to lookup/create the destination state */
164 struct dfa_state *new = wp_new_state(w, next & ~1);
165 d->edge[(unsigned char)*s] = (uintptr_t) new;
171 d = (struct dfa_state *) next;
178 wp_min_size(const char *p)
191 wp_dump(struct wildpatt *w)
196 for(i=1; i<=w->nfa_states; i++)
198 struct nfa_state *n = w->nfa + i;
199 printf("%2d: %d %02x %08x %08x\n", i, n->final, n->ch, n->match_states, n->default_states);
202 for(i=0; i<HASH_SIZE; i++)
204 printf("%3d: %08x\n", i, w->hash[i]->nfa_set);
205 printf("%d DFA states cached.\n", w->dfa_cache_counter);
208 int main(int argc, char **argv)
213 if (argc != 2) return 1;
214 w = wp_compile(argv[1], mp_new(65536));
217 puts("Compile error");
221 while (fgets(buf, sizeof(buf)-1, stdin))
223 char *c = strchr(buf, '\n');
227 printf("%d\n", wp_match(w, buf));
229 if (wp_match(w, buf))