From: Bruno Haible Date: Mon, 5 Feb 2007 01:57:07 +0000 (+0000) Subject: New module 'mbscasecmp'. strcasecmp is not expected to work with multibyte X-Git-Tag: cvs-readonly~1127 X-Git-Url: http://erislabs.org.uk/gitweb/?a=commitdiff_plain;h=623e3b9edbc0d95dc7d5c7058e819f7f3293339c;p=gnulib.git New module 'mbscasecmp'. strcasecmp is not expected to work with multibyte strings. --- diff --git a/ChangeLog b/ChangeLog index 7edacf88e..03a18d959 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,22 @@ 2007-02-04 Bruno Haible + New module mbscasecmp, reduced goal of strcasecmp. + * modules/mbscasecmp: New file. + * lib/mbscasecmp.c: New file, copied from lib/strcasecmp.c. + (mbscasecmp): Renamed from strcasecmp. + * lib/strcasecmp.c: Don't include mbuiter.h. + (strcasecmp): Remove support for multibyte locales. + * lib/string_.h (strcasecmp): Don`t rename. Declare only if missing. + Change the conditional link warning. + (mbscasecmp): New declaration. + * m4/mbscasecmp.m4: New file. + * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize + GNULIB_MBSCASECMP. + * modules/string (string.h): Also substitute GNULIB_MBSCASECMP. + * MODULES.html.sh (Internationalization functions): Add mbscasecmp. + +2007-02-04 Bruno Haible + New module mbsstr. Remove module strstr. * modules/mbsstr: New file. * modules/strstr: Remove file. diff --git a/MODULES.html.sh b/MODULES.html.sh index b11f90fa7..1de85543d 100755 --- a/MODULES.html.sh +++ b/MODULES.html.sh @@ -2163,6 +2163,7 @@ func_all_modules () func_module mbschr func_module mbsrchr func_module mbsstr + func_module mbscasecmp func_module mbswidth func_module memcasecmp func_module memcoll diff --git a/lib/mbscasecmp.c b/lib/mbscasecmp.c index f63e64e3b..8a2f434b1 100644 --- a/lib/mbscasecmp.c +++ b/lib/mbscasecmp.c @@ -31,13 +31,13 @@ #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) -/* Compare strings S1 and S2, ignoring case, returning less than, equal to or - greater than zero if S1 is lexicographically less than, equal to or greater - than S2. +/* Compare the character strings S1 and S2, ignoring case, returning less than, + equal to or greater than zero if S1 is lexicographically less than, equal to + or greater than S2. Note: This function may, in multibyte locales, return 0 for strings of different lengths! */ int -strcasecmp (const char *s1, const char *s2) +mbscasecmp (const char *s1, const char *s2) { if (s1 == s2) return 0; diff --git a/lib/strcasecmp.c b/lib/strcasecmp.c index f63e64e3b..c605fb0f5 100644 --- a/lib/strcasecmp.c +++ b/lib/strcasecmp.c @@ -1,7 +1,5 @@ /* Case-insensitive string comparison function. Copyright (C) 1998-1999, 2005-2007 Free Software Foundation, Inc. - Written by Bruno Haible , 2005, - based on earlier glibc code. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,79 +23,41 @@ #include #include -#if HAVE_MBRTOWC -# include "mbuiter.h" -#endif - #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) /* Compare strings S1 and S2, ignoring case, returning less than, equal to or greater than zero if S1 is lexicographically less than, equal to or greater than S2. - Note: This function may, in multibyte locales, return 0 for strings of - different lengths! */ + Note: This function does not work with multibyte strings! */ + int strcasecmp (const char *s1, const char *s2) { - if (s1 == s2) + const unsigned char *p1 = (const unsigned char *) s1; + const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + + if (p1 == p2) return 0; - /* Be careful not to look at the entire extent of s1 or s2 until needed. - This is useful because when two strings differ, the difference is - most often already in the very few first characters. */ -#if HAVE_MBRTOWC - if (MB_CUR_MAX > 1) + do { - mbui_iterator_t iter1; - mbui_iterator_t iter2; + c1 = TOLOWER (*p1); + c2 = TOLOWER (*p2); - mbui_init (iter1, s1); - mbui_init (iter2, s2); + if (c1 == '\0') + break; - while (mbui_avail (iter1) && mbui_avail (iter2)) - { - int cmp = mb_casecmp (mbui_cur (iter1), mbui_cur (iter2)); - - if (cmp != 0) - return cmp; - - mbui_advance (iter1); - mbui_advance (iter2); - } - if (mbui_avail (iter1)) - /* s2 terminated before s1. */ - return 1; - if (mbui_avail (iter2)) - /* s1 terminated before s2. */ - return -1; - return 0; + ++p1; + ++p2; } - else -#endif - { - const unsigned char *p1 = (const unsigned char *) s1; - const unsigned char *p2 = (const unsigned char *) s2; - unsigned char c1, c2; - - do - { - c1 = TOLOWER (*p1); - c2 = TOLOWER (*p2); - - if (c1 == '\0') - break; + while (c1 == c2); - ++p1; - ++p2; - } - while (c1 == c2); - - if (UCHAR_MAX <= INT_MAX) - return c1 - c2; - else - /* On machines where 'char' and 'int' are types of the same size, the - difference of two 'unsigned char' values - including the sign bit - - doesn't fit in an 'int'. */ - return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); - } + if (UCHAR_MAX <= INT_MAX) + return c1 - c2; + else + /* On machines where 'char' and 'int' are types of the same size, the + difference of two 'unsigned char' values - including the sign bit - + doesn't fit in an 'int'. */ + return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); } diff --git a/lib/string_.h b/lib/string_.h index 6b99fb2d4..59a14ca24 100644 --- a/lib/string_.h +++ b/lib/string_.h @@ -115,20 +115,17 @@ extern char *stpncpy (char *restrict __dst, char const *restrict __src, /* Compare strings S1 and S2, ignoring case, returning less than, equal to or greater than zero if S1 is lexicographically less than, equal to or greater than S2. - Note: This function may, in multibyte locales, return 0 for strings of - different lengths! - No known system has a strcasecmp() function that works correctly in - multibyte locales. Therefore use our version always, if the - strcase module is available. */ -#if @GNULIB_STRCASE@ -# if @REPLACE_STRCASECMP@ -# define strcasecmp rpl_strcasecmp -extern int strcasecmp (char const *__s1, char const *__s2); -# endif -#elif defined GNULIB_POSIXCHECK + Note: This function does not work in multibyte locales. */ +#if ! @HAVE_STRCASECMP@ +extern int strcasecmp (char const *s1, char const *s2); +#endif +#if defined GNULIB_POSIXCHECK +/* strcasecmp() does not work with multibyte strings: + POSIX says that it operates on "strings", and "string" in POSIX is defined + as a sequence of bytes, not of characters. */ # undef strcasecmp # define strcasecmp(a,b) \ - (GL_LINK_WARNING ("strcasecmp is often incorrectly implemented for multibyte locales - use gnulib module 'strcase' for correct and portable internationalization"), \ + (GL_LINK_WARNING ("strcasecmp cannot work correctly on character strings in multibyte locales - use mbscasecmp if you care about internationalization, or use c_strcasecmp (from gnulib module c-strcase) if you want a locale independent function"), \ strcasecmp (a, b)) #endif @@ -337,6 +334,16 @@ extern char * mbsrchr (const char *string, int c); extern char * mbsstr (const char *haystack, const char *needle); #endif +#if @GNULIB_MBSCASECMP@ +/* Compare the character strings S1 and S2, ignoring case, returning less than, + equal to or greater than zero if S1 is lexicographically less than, equal to + or greater than S2. + Note: This function may, in multibyte locales, return 0 for strings of + different lengths! + Unlike strcasecmp(), this function works correctly in multibyte locales. */ +extern int mbscasecmp (const char *s1, const char *s2); +#endif + #ifdef __cplusplus } diff --git a/m4/string_h.m4 b/m4/string_h.m4 index 152a6ad71..8dfe95f0f 100644 --- a/m4/string_h.m4 +++ b/m4/string_h.m4 @@ -71,4 +71,5 @@ AC_DEFUN([gl_STRING_MODULE_INDICATOR_DEFAULTS], GNULIB_MBSCHR=0; AC_SUBST([GNULIB_MBSCHR]) GNULIB_MBSRCHR=0; AC_SUBST([GNULIB_MBSRCHR]) GNULIB_MBSSTR=0; AC_SUBST([GNULIB_MBSSTR]) + GNULIB_MBSCASECMP=0; AC_SUBST([GNULIB_MBSCASECMP]) ]) diff --git a/modules/string b/modules/string index 3eac82973..f48369f34 100644 --- a/modules/string +++ b/modules/string @@ -24,6 +24,7 @@ string.h: string_.h -e 's|@''GNULIB_MBSCHR''@|$(GNULIB_MBSCHR)|g' \ -e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \ -e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \ + -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \ -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \ -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \ -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \