From: Paul Eggert Date: Sun, 30 Dec 2012 07:31:08 +0000 (-0800) Subject: regex: implement rational ranges X-Git-Tag: v0.1~292 X-Git-Url: http://erislabs.org.uk/gitweb/?a=commitdiff_plain;h=6410c7a6ea65cceb588aadb497715279d4ec5daa;p=gnulib.git regex: implement rational ranges Reported by Aharon Robbins in . * lib/regcomp.c (build_range_exp) [!_LIBC]: * lib/regexec.c (check_node_accept_bytes) [!_LIBC]: Implement rational ranges. --- diff --git a/ChangeLog b/ChangeLog index 0824b0835..8c61adb68 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2012-12-29 Paul Eggert + regex: implement rational ranges + Reported by Aharon Robbins in + . + * lib/regcomp.c (build_range_exp) [!_LIBC]: + * lib/regexec.c (check_node_accept_bytes) [!_LIBC]: + Implement rational ranges. + regex: avoid redefining __wctype Reported by Aharon Robbins in . diff --git a/lib/regcomp.c b/lib/regcomp.c index 398df66ce..5cc2c67b3 100644 --- a/lib/regcomp.c +++ b/lib/regcomp.c @@ -2712,7 +2712,6 @@ build_range_exp (const reg_syntax_t syntax, wchar_t wc; wint_t start_wc; wint_t end_wc; - wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] @@ -2726,11 +2725,7 @@ build_range_exp (const reg_syntax_t syntax, ? __btowc (end_ch) : end_elem->opr.wch); if (start_wc == WEOF || end_wc == WEOF) return REG_ECOLLATE; - cmp_buf[0] = start_wc; - cmp_buf[4] = end_wc; - - if (BE ((syntax & RE_NO_EMPTY_RANGES) - && wcscoll (cmp_buf, cmp_buf + 4) > 0, 0)) + else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0)) return REG_ERANGE; /* Got valid collation sequence values, add them as a new entry. @@ -2771,9 +2766,7 @@ build_range_exp (const reg_syntax_t syntax, /* Build the table for single byte characters. */ for (wc = 0; wc < SBC_MAX; ++wc) { - cmp_buf[2] = wc; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + if (start_wc <= wc && wc <= end_wc) bitset_set (sbcset, wc); } } @@ -2970,6 +2963,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, 0)) return REG_ERANGE; + /* FIXME: Implement rational ranges here, too. */ start_collseq = lookup_collation_sequence_value (start_elem); end_collseq = lookup_collation_sequence_value (end_elem); /* Check start/end collation sequence values. */ diff --git a/lib/regexec.c b/lib/regexec.c index 4e4b295b7..22e8dd67c 100644 --- a/lib/regexec.c +++ b/lib/regexec.c @@ -3936,6 +3936,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, in_collseq = find_collation_sequence_value (pin, elem_len); } /* match with range expression? */ + /* FIXME: Implement rational ranges here, too. */ for (i = 0; i < cset->nranges; ++i) if (cset->range_starts[i] <= in_collseq && in_collseq <= cset->range_ends[i]) @@ -3987,18 +3988,9 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, # endif /* _LIBC */ { /* match with range expression? */ -#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && defined __STRICT_ANSI__) - wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; -#else - wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; - cmp_buf[2] = wc; -#endif for (i = 0; i < cset->nranges; ++i) { - cmp_buf[0] = cset->range_starts[i]; - cmp_buf[4] = cset->range_ends[i]; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i]) { match_len = char_len; goto check_node_accept_bytes_match;