From: Pavel Charvat Date: Tue, 4 Apr 2006 10:48:59 +0000 (+0200) Subject: kmp_enter_raw_string uses zero-terminated array of kmp_char_t characters X-Git-Tag: holmes-import~650^2~17 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=8505e70cd3f7e28371cb203c8f56398b752ca274;p=libucw.git kmp_enter_raw_string uses zero-terminated array of kmp_char_t characters instead of bytes --- diff --git a/lib/kmp.c b/lib/kmp.c index c2e2dd10..e30eb62c 100644 --- a/lib/kmp.c +++ b/lib/kmp.c @@ -10,7 +10,7 @@ #include "lib/lists.h" #include "lib/unicode.h" -#define KMP_GET_CHAR KMP_GET_RAW +#define KMP_GET_CHAR(pos, c, flags) do{}while(0) #include "lib/kmp.h" #include @@ -65,16 +65,15 @@ new_output(struct kmp *kmp, uns id, uns len) } void -kmp_enter_raw_string(struct kmp *kmp, const byte *str, uns id) +kmp_enter_raw_string(struct kmp *kmp, kmp_char_t *str, uns id) { struct kmp_transition tr = { .next=NULL, .from=0 }, **prev; struct kmp_output *new_out; - const byte *orig_str = str; uns len = 0; kmp_char_t c = 'a'; - TRACE(20, "kmp.c: Entering string %s", str); - kmp_get_char(&str, &c, 0); + TRACE(20, "kmp.c: Entering string"); + c = *str++; len++; if (!c) return; @@ -85,7 +84,7 @@ kmp_enter_raw_string(struct kmp *kmp, const byte *str, uns id) if (!*prev) break; tr.from = (*prev)->to; - kmp_get_char(&str, &c, 0); + c = *str++; len++; } while (c) @@ -95,7 +94,7 @@ kmp_enter_raw_string(struct kmp *kmp, const byte *str, uns id) **prev = tr; add_tail(kmp->g.sons + tr.from, &(*prev)->n); init_list(kmp->g.sons + tr.to); - kmp_get_char(&str, &c, 0); + c = *str++; len++; tr.from = tr.to; tr.c = c; @@ -103,7 +102,7 @@ kmp_enter_raw_string(struct kmp *kmp, const byte *str, uns id) ASSERT(!*prev); } if (kmp->out[tr.from]) - TRACE(5, "kmp.c: string %s is inserted more than once", orig_str); + TRACE(5, "kmp.c: string is inserted more than once"); new_out = new_output(kmp, id, len-1); merge_output(kmp->out + tr.from, new_out); } diff --git a/lib/kmp.h b/lib/kmp.h index 7d392bf9..7264b954 100644 --- a/lib/kmp.h +++ b/lib/kmp.h @@ -33,7 +33,7 @@ /* Pre-defined input functions */ -#define KMP_GET_RAW(pos, c, flags) do { uns cc; pos = utf8_get(pos, &cc); c=cc; } while(0) +#define KMP_GET_UTF8(pos, c, flags) do { uns cc; pos = utf8_get(pos, &cc); c = cc; } while(0) #define KMP_GET_ASCII(pos, c, flags) do { \ c = *pos++; \ @@ -86,11 +86,11 @@ struct kmp_result { /* kmp.c */ struct kmp *kmp_new(struct mempool *mp, int words_len, uns modify_flags); -void kmp_enter_raw_string(struct kmp *kmp, const byte *str, uns id); +void kmp_enter_raw_string(struct kmp *kmp, kmp_char_t *str, uns id); void kmp_build(struct kmp *kmp); static inline void -kmp_get_char(const byte **str, kmp_char_t *c, uns modify_flags UNUSED) +kmp_get_char(const byte **str UNUSED, kmp_char_t *c, uns modify_flags UNUSED) { while (1) { @@ -112,12 +112,11 @@ kmp_enter_string(struct kmp *kmp, const byte *str, uns id) * to a conversion wrapper (this function) and the rest, which resides in kmp.c * and uses KMP_GET_RAW to read its input. */ - byte buf[3*strlen(str)+1], *str2 = buf; - kmp_char_t c = 0; + kmp_char_t buf[strlen(str)+1], *str2 = buf, c = 0; do { kmp_get_char(&str, &c, kmp->modify_flags); - str2 = utf8_put(str2, c); + *str2++ = c; } while (c); kmp_enter_raw_string(kmp, buf, id);