From: Paul Eggert Date: Thu, 15 Feb 2007 00:16:55 +0000 (+0000) Subject: Fix regex code so it doesn't rely on strcasecmp. X-Git-Tag: cvs-readonly~1065 X-Git-Url: http://erislabs.org.uk/gitweb/?a=commitdiff_plain;h=dea6f7089df0538e8bc7971dfbdd84e523a79d2a;p=gnulib.git Fix regex code so it doesn't rely on strcasecmp. * lib/regex_internal.h: Include only if _LIBC is defined. Otherwise, include gnulib's langinfo.h. * lib/regcomp.c (init_dfa): Don't use strcasecmp, as it can have undesirable behavior in non-C locales. Instead, rely on locale_charset. * m4/regex.m4 (gl_PREREQ_REGEX): Don't require AM_LANGINFO_CODESET. * modules/regex (FILES): Remove m4/codeset.m4. (Depends-on): Add localcharset. Remove strcase. --- diff --git a/ChangeLog b/ChangeLog index 7b1ab0729..058132f69 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2007-02-14 Paul Eggert + + Fix regex code so it doesn't rely on strcasecmp. + * lib/regex_internal.h: Include only if _LIBC is defined. + Otherwise, include gnulib's langinfo.h. + * lib/regcomp.c (init_dfa): Don't use strcasecmp, as it can have + undesirable behavior in non-C locales. Instead, rely on locale_charset. + * m4/regex.m4 (gl_PREREQ_REGEX): Don't require AM_LANGINFO_CODESET. + * modules/regex (FILES): Remove m4/codeset.m4. + (Depends-on): Add localcharset. Remove strcase. + 2007-02-13 Ralf Wildenhues * m4/unlinkdir.m4 (gl_UNLINKDIR): Fix m4 quoting bug. diff --git a/lib/regcomp.c b/lib/regcomp.c index 0209bb130..fe4d243d5 100644 --- a/lib/regcomp.c +++ b/lib/regcomp.c @@ -829,9 +829,6 @@ static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len) { __re_size_t table_size; -#ifndef _LIBC - char *codeset_name; -#endif #ifdef RE_ENABLE_I18N size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); #else @@ -875,22 +872,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) != 0); #else -# ifdef HAVE_LANGINFO_CODESET - codeset_name = nl_langinfo (CODESET); -# else - codeset_name = getenv ("LC_ALL"); - if (codeset_name == NULL || codeset_name[0] == '\0') - codeset_name = getenv ("LC_CTYPE"); - if (codeset_name == NULL || codeset_name[0] == '\0') - codeset_name = getenv ("LANG"); - if (codeset_name == NULL) - codeset_name = ""; - else if (strchr (codeset_name, '.') != NULL) - codeset_name = strchr (codeset_name, '.') + 1; -# endif - - if (strcasecmp (codeset_name, "UTF-8") == 0 - || strcasecmp (codeset_name, "UTF8") == 0) + if (strcmp (locale_charset (), "UTF-8") == 0) dfa->is_utf8 = 1; /* We check exhaustively in the loop below if this charset is a diff --git a/lib/regex_internal.h b/lib/regex_internal.h index a19c9a7fa..9bbc6ac3b 100644 --- a/lib/regex_internal.h +++ b/lib/regex_internal.h @@ -27,8 +27,10 @@ #include #include -#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC +#ifdef _LIBC # include +#else +# include "localcharset.h" #endif #if defined HAVE_LOCALE_H || defined _LIBC # include diff --git a/m4/regex.m4 b/m4/regex.m4 index f694bac84..4b421bd17 100644 --- a/m4/regex.m4 +++ b/m4/regex.m4 @@ -1,4 +1,4 @@ -#serial 44 +#serial 45 # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, # 2006, 2007 Free Software Foundation, Inc. @@ -203,7 +203,6 @@ AC_DEFUN([gl_PREREQ_REGEX], [ AC_REQUIRE([AC_GNU_SOURCE]) AC_REQUIRE([AC_C_RESTRICT]) - AC_REQUIRE([AM_LANGINFO_CODESET]) AC_CHECK_FUNCS_ONCE([iswctype mbrtowc wcrtomb wcscoll]) AC_CHECK_DECLS([isblank], [], [], [#include ]) ]) diff --git a/modules/regex b/modules/regex index 3f94248d4..a1a681160 100644 --- a/modules/regex +++ b/modules/regex @@ -8,17 +8,16 @@ lib/regex_internal.c lib/regex_internal.h lib/regexec.c lib/regcomp.c -m4/codeset.m4 m4/regex.m4 Depends-on: alloca extensions gettext-h +localcharset malloc stdbool stdint -strcase ssize_t wchar wctype