X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Fkmp-search.h;h=b702acc3effb786af8aa174c3e36e4e28e814fd5;hb=39d5e9acc6d93c07cbb408fe27144c8a485499ac;hp=3452821d552b2bb3e3a52947264a8a3dd921b49f;hpb=e6b837d6612b501ac69ec5c2cc701417c03cff81;p=libucw.git diff --git a/lib/kmp-search.h b/lib/kmp-search.h index 3452821d..b702acc3 100644 --- a/lib/kmp-search.h +++ b/lib/kmp-search.h @@ -12,30 +12,44 @@ * This is not a normal header file, it's a generator of KMP algorithm. * Each time you include it with parameters set in the corresponding * preprocessor macros, it generates KMP structures and functions - * with the parameters given. + * with the parameters given. See lib/kmp.h before reading this description. + * + * This file defines: + * + * struct search structure with both the internal and the user-defined variables + * used during the search and accessible from all macros + * + * void search(kmp,search,src) executes the search; search structure is allocated by the caller (possible input/output) + * + * void run(kmp,src) the same, but automatically allocates search structre from the stack + * + * + * Parameters to the generator (these marked with [*] are mandatory): * * [*] KMPS_PREFIX(x) macro to add a name prefix (used on all global names - * defined by the KMP search generator). - * [*] KMPS_KMP_PREFIX(x) prefix used for lib/kmp.h; - * more variants of kmp-search can be used for single lib/kmp.h + * defined by the KMP search generator) + * [*] KMPS_KMP_PREFIX(x) prefix used for lib/kmp.h * - * KMPS_SOURCE user-defined search input (together with KMPS_GET_CHAR); - * if unset, the one from lib/kmp.h is used - * KMPS_GET_CHAR(ctx,src,s) + * KMPS_SOURCE user-defined text source (together with KMPS_GET_CHAR); + * if unset, the one from lib/kmp.h is taken + * KMPS_GET_CHAR(kmp,src,search) analogy to KMP_GET_CHAR, but it must store the next character to search->c * - * KMPS_ADD_CONTROLS adds control characters to start and the end - * KMPS_MERGE_CONTROLS merges adjacent control characters to a single one + * KMPS_ADD_CONTROLS add control characters (see KMP_CONTROL_CHAR in kmp.h) at both ends of the input string + * KMPS_MERGE_CONTROLS merge adjacent control characters to a single one * - * KMPS_EXTRA_ARGS extra arguments to the search routine - * KMPS_EXTRA_VAR extra user-defined structure in search structures - * KMPS_INIT(ctx,src,s) - * KMPS_EXIT(ctx,src,s) - * KMPS_FOUND(ctx,src,s) - * KMPS_FOUND_CHAIN(ctx,src,s) - * KMPS_STEP(ctx,src,s) - * KMPS_T + * KMPS_VARS user-defined variables in struct search (in .u substructure to avoid collisions) * - * KMPS_WANT_BEST + * KMPS_INIT(kmp,src,search) statement executed at the beginning of search() + * KMPS_EXIT(kmp,src,search) ... at the end + * KMPS_STEP(kmp,src,search) ... after each step (read of next character + current state update) + * of the algorithm, but before KMPS_FOUND[_CHAIN] + * KMPS_FOUND_CHAIN(kmp,src,search) ... for each state representing locally longest match + * (stored in search->out - NOT necessary search->s!); + * all matches form a NULL-terminated link list (search->out, search->out->next, ...) + * in order of decreasing length + * KMPS_FOUND(kmp,src,search) ... called for every match (in search->out) + * KMPS_WANT_BEST algorithm computes globally longest match, which is available + * in search->best in KMPS_EXIT; if there is no match, it points to the null state */ #define P(x) KMPS_PREFIX(x) @@ -48,101 +62,94 @@ typedef KP(source_t) P(search_source_t); #endif #ifndef KMPS_GET_CHAR -#define KMPS_GET_CHAR(ctx,src,s) ({ KP(get_char)(ctx, &src, &s.c); }) +#define KMPS_GET_CHAR(kmp,src,s) (KP(get_char)(kmp, &src, &s->c)) #endif struct P(search) { struct KP(state) *s; /* current state */ struct KP(state) *out; /* output state */ # ifdef KMPS_WANT_BEST - struct KP(state) *best; /* largest match */ + struct KP(state) *best; /* longest match */ # endif KP(char_t) c; /* last character */ -# ifdef KMPS_EXTRA_VAR - KMPS_EXTRA_VAR v; /* user-defined */ -# endif # ifdef KMPS_ADD_CONTROLS uns eof; # endif +# ifdef KMPS_VARS + struct { + KMPS_VARS + } u; /* user-defined */ +# endif }; -#ifdef KMPS_T -static KMPS_T -#else static void -#endif -P(search) (struct KP(context) *ctx, P(search_source_t) src -# ifdef KMPS_EXTRA_ARGS - , KMPS_EXTRA_ARGS -# endif -) +P(search) (struct KP(struct) *kmp, struct P(search) *s, P(search_source_t) src) { - struct P(search) s; - s.s = &ctx->null; + s->s = &kmp->null; # ifdef KMPS_WANT_BEST - s.best = &ctx->null; + s->best = &kmp->null; # endif -# ifdef KMPS_ADD_CONTROLS - s.c = KP(control_char)(); - s.eof = 0; +# ifdef KMPS_ADD_CONTROLS + s->c = KP(control)(); + s->eof = 0; # else - s.c = 0; -# endif + s->c = 0; +# endif # ifdef KMPS_INIT - { KMPS_INIT(ctx, src, s); } + { KMPS_INIT(kmp, src, s); } # endif -# ifndef KMPS_ADD_CONTROLS +# ifndef KMPS_ADD_CONTROLS goto start_read; -#endif +# endif for (;;) { - for (struct KP(state) *t = s.s; t && !(s.s = KP(hash_find)(&ctx->hash, t, s.c)); t = t->back); - s.s = s.s ? : &ctx->null; + for (struct KP(state) *t = s->s; t && !(s->s = KP(hash_find)(&kmp->hash, t, s->c)); t = t->back); + s->s = s->s ? : &kmp->null; # ifdef KMPS_STEP - { KMPS_STEP(ctx, src, s); } + { KMPS_STEP(kmp, src, s); } # endif # if defined(KMPS_FOUND) || defined(KMPS_FOUND_CHAIN) || defined(KMPS_WANT_BEST) - s.out = s.s->len ? s.s : s.s->next; - if (s.out) + s->out = s->s->len ? s->s : s->s->next; + if (s->out) { -# ifdef KMPS_WANT_BEST - if (s.out->len > s.best->len) - s.best = s.out; -# endif - #ifdef KMPS_FOUND_CHAIN - { KMPS_FOUND_CHAIN(ctx, src, s); } +# ifdef KMPS_WANT_BEST + if (s->out->len > s->best->len) + s->best = s->out; +# endif +# ifdef KMPS_FOUND_CHAIN + { KMPS_FOUND_CHAIN(kmp, src, s); } # endif # ifdef KMPS_FOUND do - { KMPS_FOUND(ctx, src, s); } - while (s.out = s.out->next); -# endif + { KMPS_FOUND(kmp, src, s); } + while (s->out = s->out->next); +# endif } -# endif +# endif -# ifdef KMPS_ADD_CONTROLS - if (s.eof) +# ifdef KMPS_ADD_CONTROLS + if (s->eof) break; -# endif +# endif -# ifndef KMPS_ADD_CONTROLS +# ifndef KMPS_ADD_CONTROLS start_read: ; -# endif +# endif # ifdef KMPS_MERGE_CONTROLS - KP(char_t) last_c = s.c; + KP(char_t) last_c = s->c; # endif do { - if (!KMPS_GET_CHAR(ctx, src, s)) + if (!KMPS_GET_CHAR(kmp, src, s)) { # ifdef KMPS_ADD_CONTROLS - if (s.c != KP(control_char)()) + if (!KP(is_control)(kmp, s->c)) { - s.c = KP(control_char)(); - s.eof = 1; + s->c = KP(control)(); + s->eof = 1; break; } # endif @@ -151,16 +158,23 @@ start_read: ; } while (0 # ifdef KMPS_MERGE_CONTROLS - || (last_c == KP(control_char)() && s.c == KP(control_char)()) + || (KP(is_control)(kmp, last_c) && KP(is_control)(kmp, s->c)) # endif ); } exit: ; # ifdef KMPS_EXIT - { KMPS_EXIT(ctx, src, s); } + { KMPS_EXIT(kmp, src, s); } # endif } +static inline void +P(run) (struct KP(struct) *kmp, P(search_source_t) src) +{ + struct P(search) search; + P(search)(kmp, &search, src); +} + #undef P #undef KMPS_PREFIX #undef KMPS_KMP_PREFIX @@ -168,12 +182,10 @@ exit: ; #undef KMPS_GET_CHAR #undef KMPS_ADD_CONTROLS #undef KMPS_MERGE_CONTROLS -#undef KMPS_EXTRA_ARGS -#undef KMPS_EXTRA_VAR +#undef KMPS_VARS #undef KMPS_INIT #undef KMPS_EXIT #undef KMPS_FOUND #undef KMPS_FOUND_CHAIN -#undef KMPS_STEP -#undef KMPS_T #undef KMPS_WANT_BEST +#undef KMPS_STEP