1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 static void re_string_construct_common (const char *str, int len,
23 RE_TRANSLATE_TYPE trans, int icase);
25 static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
27 #endif /* RE_ENABLE_I18N */
28 static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
29 const re_node_set *nodes,
31 static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
33 static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
34 const re_node_set *nodes,
36 static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
37 const re_node_set *nodes,
40 static unsigned int inline calc_state_hash (const re_node_set *nodes,
41 unsigned int context);
43 /* Functions for string operation. */
45 /* This function allocate the buffers. It is necessary to call
46 re_string_reconstruct before using the object. */
49 re_string_allocate (pstr, str, len, init_len, trans, icase)
52 int len, init_len, icase;
53 RE_TRANSLATE_TYPE trans;
56 int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
57 re_string_construct_common (str, len, pstr, trans, icase);
58 pstr->stop = pstr->len;
60 ret = re_string_realloc_buffers (pstr, init_buf_len);
61 if (BE (ret != REG_NOERROR, 0))
64 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
65 : (unsigned char *) str);
66 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
67 pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
68 || MB_CUR_MAX > 1) ? pstr->valid_len : len;
72 /* This function allocate the buffers, and initialize them. */
75 re_string_construct (pstr, str, len, trans, icase)
79 RE_TRANSLATE_TYPE trans;
82 re_string_construct_common (str, len, pstr, trans, icase);
83 pstr->stop = pstr->len;
84 /* Set 0 so that this function can initialize whole buffers. */
89 ret = re_string_realloc_buffers (pstr, len + 1);
90 if (BE (ret != REG_NOERROR, 0))
93 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
94 : (unsigned char *) str);
95 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
101 build_wcs_upper_buffer (pstr);
103 #endif /* RE_ENABLE_I18N */
104 build_upper_buffer (pstr);
108 #ifdef RE_ENABLE_I18N
110 build_wcs_buffer (pstr);
112 #endif /* RE_ENABLE_I18N */
115 re_string_translate_buffer (pstr);
117 pstr->valid_len = len;
121 /* Initialized whole buffers, then valid_len == bufs_len. */
122 pstr->valid_len = pstr->bufs_len;
126 /* Helper functions for re_string_allocate, and re_string_construct. */
129 re_string_realloc_buffers (pstr, new_buf_len)
133 #ifdef RE_ENABLE_I18N
136 wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
137 if (BE (new_array == NULL, 0))
139 pstr->wcs = new_array;
141 #endif /* RE_ENABLE_I18N */
142 if (MBS_ALLOCATED (pstr))
144 unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
146 if (BE (new_array == NULL, 0))
148 pstr->mbs = new_array;
150 if (MBS_CASE_ALLOCATED (pstr))
152 unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
154 if (BE (new_array == NULL, 0))
156 pstr->mbs_case = new_array;
157 if (!MBS_ALLOCATED (pstr))
158 pstr->mbs = pstr->mbs_case;
160 pstr->bufs_len = new_buf_len;
166 re_string_construct_common (str, len, pstr, trans, icase)
170 RE_TRANSLATE_TYPE trans;
173 memset (pstr, '\0', sizeof (re_string_t));
174 pstr->raw_mbs = (const unsigned char *) str;
177 pstr->icase = icase ? 1 : 0;
180 #ifdef RE_ENABLE_I18N
182 /* Build wide character buffer PSTR->WCS.
183 If the byte sequence of the string are:
184 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
185 Then wide character buffer will be:
186 <wc1> , WEOF , <wc2> , WEOF , <wc3>
187 We use WEOF for padding, they indicate that the position isn't
188 a first byte of a multibyte character.
190 Note that this function assumes PSTR->VALID_LEN elements are already
191 built and starts from PSTR->VALID_LEN. */
194 build_wcs_buffer (pstr)
198 int byte_idx, end_idx, mbclen, remain_len;
199 /* Build the buffers from pstr->valid_len to either pstr->len or
201 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
202 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
205 remain_len = end_idx - byte_idx;
206 prev_st = pstr->cur_state;
207 mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
208 + byte_idx), remain_len, &pstr->cur_state);
209 if (BE (mbclen == (size_t) -2, 0))
211 /* The buffer doesn't have enough space, finish to build. */
212 pstr->cur_state = prev_st;
215 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
217 /* We treat these cases as a singlebyte character. */
219 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
220 pstr->cur_state = prev_st;
223 /* Apply the translateion if we need. */
224 if (pstr->trans != NULL && mbclen == 1)
226 int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
227 pstr->mbs_case[byte_idx] = ch;
229 /* Write wide character and padding. */
230 pstr->wcs[byte_idx++] = wc;
231 /* Write paddings. */
232 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
233 pstr->wcs[byte_idx++] = WEOF;
235 pstr->valid_len = byte_idx;
238 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
239 but for REG_ICASE. */
242 build_wcs_upper_buffer (pstr)
246 int byte_idx, end_idx, mbclen, remain_len;
247 /* Build the buffers from pstr->valid_len to either pstr->len or
249 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
250 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
253 remain_len = end_idx - byte_idx;
254 prev_st = pstr->cur_state;
255 mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
256 + byte_idx), remain_len, &pstr->cur_state);
257 if (BE (mbclen == (size_t) -2, 0))
259 /* The buffer doesn't have enough space, finish to build. */
260 pstr->cur_state = prev_st;
263 else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
265 /* In case of a singlebyte character. */
266 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
267 /* Apply the translateion if we need. */
268 if (pstr->trans != NULL && mbclen == 1)
270 ch = pstr->trans[ch];
271 pstr->mbs_case[byte_idx] = ch;
273 pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
274 pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
275 if (BE (mbclen == (size_t) -1, 0))
276 pstr->cur_state = prev_st;
278 else /* mbclen > 1 */
281 wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
283 memcpy (pstr->mbs + byte_idx,
284 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
285 pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
286 /* Write paddings. */
287 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
288 pstr->wcs[byte_idx++] = WEOF;
291 pstr->valid_len = byte_idx;
294 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
298 re_string_skip_chars (pstr, new_raw_idx, last_wc)
304 int rawbuf_idx, mbclen;
307 /* Skip the characters which are not necessary to check. */
308 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
309 rawbuf_idx < new_raw_idx;)
312 remain_len = pstr->len - rawbuf_idx;
313 prev_st = pstr->cur_state;
314 mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
315 remain_len, &pstr->cur_state);
316 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
318 /* We treat these cases as a singlebyte character. */
320 pstr->cur_state = prev_st;
322 /* Then proceed the next character. */
323 rawbuf_idx += mbclen;
325 *last_wc = (wint_t) wc;
328 #endif /* RE_ENABLE_I18N */
330 /* Build the buffer PSTR->MBS, and apply the translation if we need.
331 This function is used in case of REG_ICASE. */
334 build_upper_buffer (pstr)
337 int char_idx, end_idx;
338 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
340 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
342 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
343 if (pstr->trans != NULL)
345 ch = pstr->trans[ch];
346 pstr->mbs_case[char_idx] = ch;
349 pstr->mbs[char_idx] = toupper (ch);
351 pstr->mbs[char_idx] = ch;
353 pstr->valid_len = char_idx;
356 /* Apply TRANS to the buffer in PSTR. */
359 re_string_translate_buffer (pstr)
362 int buf_idx, end_idx;
363 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
365 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
367 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
368 pstr->mbs_case[buf_idx] = pstr->trans[ch];
371 pstr->valid_len = buf_idx;
374 /* This function re-construct the buffers.
375 Concretely, convert to wide character in case of MB_CUR_MAX > 1,
376 convert to upper case in case of REG_ICASE, apply translation. */
379 re_string_reconstruct (pstr, idx, eflags, newline)
381 int idx, eflags, newline;
383 int offset = idx - pstr->raw_mbs_idx;
387 #ifdef RE_ENABLE_I18N
389 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
390 #endif /* RE_ENABLE_I18N */
391 pstr->len += pstr->raw_mbs_idx;
392 pstr->stop += pstr->raw_mbs_idx;
393 pstr->valid_len = pstr->raw_mbs_idx = 0;
394 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
395 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
396 if (!MBS_CASE_ALLOCATED (pstr))
397 pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
398 if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
399 pstr->mbs = (unsigned char *) pstr->raw_mbs;
405 /* Are the characters which are already checked remain? */
406 if (offset < pstr->valid_len)
408 /* Yes, move them to the front of the buffer. */
409 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
411 #ifdef RE_ENABLE_I18N
413 memmove (pstr->wcs, pstr->wcs + offset,
414 (pstr->valid_len - offset) * sizeof (wint_t));
415 #endif /* RE_ENABLE_I18N */
416 if (MBS_ALLOCATED (pstr))
417 memmove (pstr->mbs, pstr->mbs + offset,
418 pstr->valid_len - offset);
419 if (MBS_CASE_ALLOCATED (pstr))
420 memmove (pstr->mbs_case, pstr->mbs_case + offset,
421 pstr->valid_len - offset);
422 pstr->valid_len -= offset;
424 assert (pstr->valid_len > 0);
429 /* No, skip all characters until IDX. */
431 #ifdef RE_ENABLE_I18N
436 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
437 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
438 pstr->wcs[wcs_idx] = WEOF;
439 if (pstr->trans && wc <= 0xff)
440 wc = pstr->trans[wc];
441 pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
442 : ((newline && IS_WIDE_NEWLINE (wc))
443 ? CONTEXT_NEWLINE : 0));
446 #endif /* RE_ENABLE_I18N */
448 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
451 pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD
452 : ((newline && IS_NEWLINE (c))
453 ? CONTEXT_NEWLINE : 0));
456 if (!MBS_CASE_ALLOCATED (pstr))
458 pstr->mbs_case += offset;
459 /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
460 if (!MBS_ALLOCATED (pstr))
464 pstr->raw_mbs_idx = idx;
466 pstr->stop -= offset;
468 /* Then build the buffers. */
469 #ifdef RE_ENABLE_I18N
473 build_wcs_upper_buffer (pstr);
475 build_wcs_buffer (pstr);
478 #endif /* RE_ENABLE_I18N */
481 build_upper_buffer (pstr);
482 else if (pstr->trans != NULL)
483 re_string_translate_buffer (pstr);
491 re_string_destruct (pstr)
494 #ifdef RE_ENABLE_I18N
496 #endif /* RE_ENABLE_I18N */
497 if (MBS_ALLOCATED (pstr))
499 if (MBS_CASE_ALLOCATED (pstr))
500 re_free (pstr->mbs_case);
503 /* Return the context at IDX in INPUT. */
506 re_string_context_at (input, idx, eflags, newline_anchor)
507 const re_string_t *input;
508 int idx, eflags, newline_anchor;
511 if (idx < 0 || idx == input->len)
514 /* In this case, we use the value stored in input->tip_context,
515 since we can't know the character in input->mbs[-1] here. */
516 return input->tip_context;
517 else /* (idx == input->len) */
518 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
519 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
521 #ifdef RE_ENABLE_I18N
526 while(input->wcs[wc_idx] == WEOF)
529 /* It must not happen. */
530 assert (wc_idx >= 0);
534 return input->tip_context;
536 wc = input->wcs[wc_idx];
537 if (IS_WIDE_WORD_CHAR (wc))
539 return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0;
544 c = re_string_byte_at (input, idx);
545 if (IS_WORD_CHAR (c))
547 return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
551 /* Functions for set operation. */
554 re_node_set_alloc (set, size)
560 set->elems = re_malloc (int, size);
561 if (BE (set->elems == NULL, 0))
567 re_node_set_init_1 (set, elem)
573 set->elems = re_malloc (int, 1);
574 if (BE (set->elems == NULL, 0))
576 set->alloc = set->nelem = 0;
579 set->elems[0] = elem;
584 re_node_set_init_2 (set, elem1, elem2)
589 set->elems = re_malloc (int, 2);
590 if (BE (set->elems == NULL, 0))
595 set->elems[0] = elem1;
602 set->elems[0] = elem1;
603 set->elems[1] = elem2;
607 set->elems[0] = elem2;
608 set->elems[1] = elem1;
615 re_node_set_init_copy (dest, src)
617 const re_node_set *src;
619 dest->nelem = src->nelem;
622 dest->alloc = dest->nelem;
623 dest->elems = re_malloc (int, dest->alloc);
624 if (BE (dest->elems == NULL, 0))
626 dest->alloc = dest->nelem = 0;
629 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
632 re_node_set_init_empty (dest);
636 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
637 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
638 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
641 re_node_set_add_intersect (dest, src1, src2)
643 const re_node_set *src1, *src2;
646 if (src1->nelem > 0 && src2->nelem > 0)
648 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
650 dest->alloc = src1->nelem + src2->nelem + dest->nelem;
651 dest->elems = re_realloc (dest->elems, int, dest->alloc);
652 if (BE (dest->elems == NULL, 0))
659 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
661 if (src1->elems[i1] > src2->elems[i2])
666 if (src1->elems[i1] == src2->elems[i2])
668 while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
670 if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
674 memmove (dest->elems + id + 1, dest->elems + id,
675 sizeof (int) * (dest->nelem - id));
676 dest->elems[id++] = src2->elems[i2++];
685 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
686 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
689 re_node_set_init_union (dest, src1, src2)
691 const re_node_set *src1, *src2;
694 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
696 dest->alloc = src1->nelem + src2->nelem;
697 dest->elems = re_malloc (int, dest->alloc);
698 if (BE (dest->elems == NULL, 0))
703 if (src1 != NULL && src1->nelem > 0)
704 return re_node_set_init_copy (dest, src1);
705 else if (src2 != NULL && src2->nelem > 0)
706 return re_node_set_init_copy (dest, src2);
708 re_node_set_init_empty (dest);
711 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
713 if (src1->elems[i1] > src2->elems[i2])
715 dest->elems[id++] = src2->elems[i2++];
718 if (src1->elems[i1] == src2->elems[i2])
720 dest->elems[id++] = src1->elems[i1++];
722 if (i1 < src1->nelem)
724 memcpy (dest->elems + id, src1->elems + i1,
725 (src1->nelem - i1) * sizeof (int));
726 id += src1->nelem - i1;
728 else if (i2 < src2->nelem)
730 memcpy (dest->elems + id, src2->elems + i2,
731 (src2->nelem - i2) * sizeof (int));
732 id += src2->nelem - i2;
738 /* Calculate the union set of the sets DEST and SRC. And store it to
739 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
742 re_node_set_merge (dest, src)
744 const re_node_set *src;
747 if (src == NULL || src->nelem == 0)
749 if (dest->alloc < src->nelem + dest->nelem)
752 dest->alloc = 2 * (src->nelem + dest->alloc);
753 new_buffer = re_realloc (dest->elems, int, dest->alloc);
754 if (BE (new_buffer == NULL, 0))
756 dest->elems = new_buffer;
759 for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
761 int cp_from, ncp, mid, right, src_elem = src->elems[si];
762 /* Binary search the spot we will add the new element. */
766 mid = (di + right) / 2;
767 if (dest->elems[mid] < src_elem)
772 if (di >= dest->nelem)
775 if (dest->elems[di] == src_elem)
777 /* Skip since, DEST already has the element. */
783 /* Skip the src elements which are less than dest->elems[di]. */
785 while (si < src->nelem && src->elems[si] < dest->elems[di])
787 /* Copy these src elements. */
789 memmove (dest->elems + di + ncp, dest->elems + di,
790 sizeof (int) * (dest->nelem - di));
791 memcpy (dest->elems + di, src->elems + cp_from,
793 /* Update counters. */
798 /* Copy remaining src elements. */
801 memcpy (dest->elems + di, src->elems + si,
802 sizeof (int) * (src->nelem - si));
803 dest->nelem += src->nelem - si;
808 /* Insert the new element ELEM to the re_node_set* SET.
809 return 0 if SET already has ELEM,
810 return -1 if an error is occured, return 1 otherwise. */
813 re_node_set_insert (set, elem)
818 /* In case of the set is empty. */
819 if (set->elems == NULL || set->alloc == 0)
821 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
827 /* Binary search the spot we will add the new element. */
832 mid = (idx + right) / 2;
833 if (set->elems[mid] < elem)
839 /* Realloc if we need. */
840 if (set->alloc < set->nelem + 1)
843 set->alloc = set->alloc * 2;
844 new_array = re_malloc (int, set->alloc);
845 if (BE (new_array == NULL, 0))
847 /* Copy the elements they are followed by the new element. */
849 memcpy (new_array, set->elems, sizeof (int) * (idx));
850 /* Copy the elements which follows the new element. */
851 if (set->nelem - idx > 0)
852 memcpy (new_array + idx + 1, set->elems + idx,
853 sizeof (int) * (set->nelem - idx));
854 re_free (set->elems);
855 set->elems = new_array;
859 /* Move the elements which follows the new element. */
860 if (set->nelem - idx > 0)
861 memmove (set->elems + idx + 1, set->elems + idx,
862 sizeof (int) * (set->nelem - idx));
864 /* Insert the new element. */
865 set->elems[idx] = elem;
870 /* Compare two node sets SET1 and SET2.
871 return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */
874 re_node_set_compare (set1, set2)
875 const re_node_set *set1, *set2;
878 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
880 for (i = 0 ; i < set1->nelem ; i++)
881 if (set1->elems[i] != set2->elems[i])
886 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
889 re_node_set_contains (set, elem)
890 const re_node_set *set;
897 /* Binary search the element. */
899 right = set->nelem - 1;
902 mid = (idx + right) / 2;
903 if (set->elems[mid] < elem)
908 return set->elems[idx] == elem ? idx + 1 : 0;
912 re_node_set_remove_at (set, idx)
916 if (idx < 0 || idx >= set->nelem)
918 if (idx < set->nelem - 1)
919 memmove (set->elems + idx, set->elems + idx + 1,
920 sizeof (int) * (set->nelem - idx - 1));
925 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
926 Or return -1, if an error will be occured. */
929 re_dfa_add_node (dfa, token, mode)
934 if (dfa->nodes_len >= dfa->nodes_alloc)
936 re_token_t *new_array;
937 dfa->nodes_alloc *= 2;
938 new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
939 if (BE (new_array == NULL, 0))
942 dfa->nodes = new_array;
945 int *new_nexts, *new_indices;
946 re_node_set *new_edests, *new_eclosures, *new_inveclosures;
948 new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
949 new_indices = re_realloc (dfa->org_indices, int, dfa->nodes_alloc);
950 new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
951 new_eclosures = re_realloc (dfa->eclosures, re_node_set,
953 new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
955 if (BE (new_nexts == NULL || new_indices == NULL
956 || new_edests == NULL || new_eclosures == NULL
957 || new_inveclosures == NULL, 0))
959 dfa->nexts = new_nexts;
960 dfa->org_indices = new_indices;
961 dfa->edests = new_edests;
962 dfa->eclosures = new_eclosures;
963 dfa->inveclosures = new_inveclosures;
966 dfa->nodes[dfa->nodes_len] = token;
967 dfa->nodes[dfa->nodes_len].duplicated = 0;
968 dfa->nodes[dfa->nodes_len].constraint = 0;
969 return dfa->nodes_len++;
972 static unsigned int inline
973 calc_state_hash (nodes, context)
974 const re_node_set *nodes;
975 unsigned int context;
977 unsigned int hash = nodes->nelem + context;
979 for (i = 0 ; i < nodes->nelem ; i++)
980 hash += nodes->elems[i];
984 /* Search for the state whose node_set is equivalent to NODES.
985 Return the pointer to the state, if we found it in the DFA.
986 Otherwise create the new one and return it. In case of an error
987 return NULL and set the error code in ERR.
988 Note: - We assume NULL as the invalid state, then it is possible that
989 return value is NULL and ERR is REG_NOERROR.
990 - We never return non-NULL value in case of any errors, it is for
993 static re_dfastate_t*
994 re_acquire_state (err, dfa, nodes)
997 const re_node_set *nodes;
1000 re_dfastate_t *new_state;
1001 struct re_state_table_entry *spot;
1003 if (BE (nodes->nelem == 0, 0))
1008 hash = calc_state_hash (nodes, 0);
1009 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1011 for (i = 0 ; i < spot->num ; i++)
1013 re_dfastate_t *state = spot->array[i];
1014 if (hash != state->hash)
1016 if (re_node_set_compare (&state->nodes, nodes))
1020 /* There are no appropriate state in the dfa, create the new one. */
1021 new_state = create_ci_newstate (dfa, nodes, hash);
1022 if (BE (new_state != NULL, 1))
1031 /* Search for the state whose node_set is equivalent to NODES and
1032 whose context is equivalent to CONTEXT.
1033 Return the pointer to the state, if we found it in the DFA.
1034 Otherwise create the new one and return it. In case of an error
1035 return NULL and set the error code in ERR.
1036 Note: - We assume NULL as the invalid state, then it is possible that
1037 return value is NULL and ERR is REG_NOERROR.
1038 - We never return non-NULL value in case of any errors, it is for
1041 static re_dfastate_t*
1042 re_acquire_state_context (err, dfa, nodes, context)
1045 const re_node_set *nodes;
1046 unsigned int context;
1049 re_dfastate_t *new_state;
1050 struct re_state_table_entry *spot;
1052 if (nodes->nelem == 0)
1057 hash = calc_state_hash (nodes, context);
1058 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1060 for (i = 0 ; i < spot->num ; i++)
1062 re_dfastate_t *state = spot->array[i];
1063 if (hash != state->hash)
1065 if (re_node_set_compare (state->entrance_nodes, nodes)
1066 && state->context == context)
1069 /* There are no appropriate state in `dfa', create the new one. */
1070 new_state = create_cd_newstate (dfa, nodes, context, hash);
1071 if (BE (new_state != NULL, 1))
1080 /* Allocate memory for DFA state and initialize common properties.
1081 Return the new state if succeeded, otherwise return NULL. */
1083 static re_dfastate_t *
1084 create_newstate_common (dfa, nodes, hash)
1086 const re_node_set *nodes;
1089 re_dfastate_t *newstate;
1091 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1092 if (BE (newstate == NULL, 0))
1094 err = re_node_set_init_copy (&newstate->nodes, nodes);
1095 if (BE (err != REG_NOERROR, 0))
1100 newstate->trtable = NULL;
1101 newstate->trtable_search = NULL;
1102 newstate->hash = hash;
1106 /* Store the new state NEWSTATE whose hash value is HASH in appropriate
1107 position. Return value indicate the error code if failed. */
1109 static reg_errcode_t
1110 register_state (dfa, newstate, hash)
1112 re_dfastate_t *newstate;
1115 struct re_state_table_entry *spot;
1116 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1118 if (spot->alloc <= spot->num)
1120 re_dfastate_t **new_array;
1121 spot->alloc = 2 * spot->num + 2;
1122 new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
1123 if (BE (new_array == NULL, 0))
1125 spot->array = new_array;
1127 spot->array[spot->num++] = newstate;
1131 /* Create the new state which is independ of contexts.
1132 Return the new state if succeeded, otherwise return NULL. */
1134 static re_dfastate_t *
1135 create_ci_newstate (dfa, nodes, hash)
1137 const re_node_set *nodes;
1142 re_dfastate_t *newstate;
1143 newstate = create_newstate_common (dfa, nodes, hash);
1144 if (BE (newstate == NULL, 0))
1146 newstate->entrance_nodes = &newstate->nodes;
1148 for (i = 0 ; i < nodes->nelem ; i++)
1150 re_token_t *node = dfa->nodes + nodes->elems[i];
1151 re_token_type_t type = node->type;
1152 if (type == CHARACTER && !node->constraint)
1155 /* If the state has the halt node, the state is a halt state. */
1156 else if (type == END_OF_RE)
1158 #ifdef RE_ENABLE_I18N
1159 else if (type == COMPLEX_BRACKET
1160 || (type == OP_PERIOD && MB_CUR_MAX > 1))
1161 newstate->accept_mb = 1;
1162 #endif /* RE_ENABLE_I18N */
1163 else if (type == OP_BACK_REF)
1164 newstate->has_backref = 1;
1165 else if (type == ANCHOR || node->constraint)
1166 newstate->has_constraint = 1;
1168 err = register_state (dfa, newstate, hash);
1169 if (BE (err != REG_NOERROR, 0))
1171 free_state (newstate);
1177 /* Create the new state which is depend on the context CONTEXT.
1178 Return the new state if succeeded, otherwise return NULL. */
1180 static re_dfastate_t *
1181 create_cd_newstate (dfa, nodes, context, hash)
1183 const re_node_set *nodes;
1184 unsigned int context, hash;
1186 int i, nctx_nodes = 0;
1188 re_dfastate_t *newstate;
1190 newstate = create_newstate_common (dfa, nodes, hash);
1191 if (BE (newstate == NULL, 0))
1193 newstate->context = context;
1194 newstate->entrance_nodes = &newstate->nodes;
1196 for (i = 0 ; i < nodes->nelem ; i++)
1198 unsigned int constraint = 0;
1199 re_token_t *node = dfa->nodes + nodes->elems[i];
1200 re_token_type_t type = node->type;
1201 if (node->constraint)
1202 constraint = node->constraint;
1204 if (type == CHARACTER && !constraint)
1206 /* If the state has the halt node, the state is a halt state. */
1207 else if (type == END_OF_RE)
1209 #ifdef RE_ENABLE_I18N
1210 else if (type == COMPLEX_BRACKET
1211 || (type == OP_PERIOD && MB_CUR_MAX > 1))
1212 newstate->accept_mb = 1;
1213 #endif /* RE_ENABLE_I18N */
1214 else if (type == OP_BACK_REF)
1215 newstate->has_backref = 1;
1216 else if (type == ANCHOR)
1217 constraint = node->opr.ctx_type;
1221 if (newstate->entrance_nodes == &newstate->nodes)
1223 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1224 if (BE (newstate->entrance_nodes == NULL, 0))
1226 free_state (newstate);
1229 re_node_set_init_copy (newstate->entrance_nodes, nodes);
1231 newstate->has_constraint = 1;
1234 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1236 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1241 err = register_state (dfa, newstate, hash);
1242 if (BE (err != REG_NOERROR, 0))
1244 free_state (newstate);
1252 re_dfastate_t *state;
1254 if (state->entrance_nodes != &state->nodes)
1256 re_node_set_free (state->entrance_nodes);
1257 re_free (state->entrance_nodes);
1259 re_node_set_free (&state->nodes);
1260 re_free (state->trtable);
1261 re_free (state->trtable_search);