From: Bruno Haible <bruno@clisp.org>
Date: Mon, 5 Feb 2007 01:57:07 +0000 (+0000)
Subject: New module 'mbscasecmp'. strcasecmp is not expected to work with multibyte
X-Git-Tag: cvs-readonly~1127
X-Git-Url: http://erislabs.org.uk/gitweb/?a=commitdiff_plain;h=623e3b9edbc0d95dc7d5c7058e819f7f3293339c;p=gnulib.git

New module 'mbscasecmp'. strcasecmp is not expected to work with multibyte
strings.
---

diff --git a/ChangeLog b/ChangeLog
index 7edacf88e..03a18d959 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
 2007-02-04  Bruno Haible  <bruno@clisp.org>
 
+	New module mbscasecmp, reduced goal of strcasecmp.
+	* modules/mbscasecmp: New file.
+	* lib/mbscasecmp.c: New file, copied from lib/strcasecmp.c.
+	(mbscasecmp): Renamed from strcasecmp.
+	* lib/strcasecmp.c: Don't include mbuiter.h.
+	(strcasecmp): Remove support for multibyte locales.
+	* lib/string_.h (strcasecmp): Don`t rename. Declare only if missing.
+	Change the conditional link warning.
+	(mbscasecmp): New declaration.
+	* m4/mbscasecmp.m4: New file.
+	* m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize
+	GNULIB_MBSCASECMP.
+	* modules/string (string.h): Also substitute GNULIB_MBSCASECMP.
+	* MODULES.html.sh (Internationalization functions): Add mbscasecmp.
+
+2007-02-04  Bruno Haible  <bruno@clisp.org>
+
 	New module mbsstr. Remove module strstr.
 	* modules/mbsstr: New file.
 	* modules/strstr: Remove file.
diff --git a/MODULES.html.sh b/MODULES.html.sh
index b11f90fa7..1de85543d 100755
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -2163,6 +2163,7 @@ func_all_modules ()
   func_module mbschr
   func_module mbsrchr
   func_module mbsstr
+  func_module mbscasecmp
   func_module mbswidth
   func_module memcasecmp
   func_module memcoll
diff --git a/lib/mbscasecmp.c b/lib/mbscasecmp.c
index f63e64e3b..8a2f434b1 100644
--- a/lib/mbscasecmp.c
+++ b/lib/mbscasecmp.c
@@ -31,13 +31,13 @@
 
 #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
 
-/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
-   greater than zero if S1 is lexicographically less than, equal to or greater
-   than S2.
+/* Compare the character strings S1 and S2, ignoring case, returning less than,
+   equal to or greater than zero if S1 is lexicographically less than, equal to
+   or greater than S2.
    Note: This function may, in multibyte locales, return 0 for strings of
    different lengths!  */
 int
-strcasecmp (const char *s1, const char *s2)
+mbscasecmp (const char *s1, const char *s2)
 {
   if (s1 == s2)
     return 0;
diff --git a/lib/strcasecmp.c b/lib/strcasecmp.c
index f63e64e3b..c605fb0f5 100644
--- a/lib/strcasecmp.c
+++ b/lib/strcasecmp.c
@@ -1,7 +1,5 @@
 /* Case-insensitive string comparison function.
    Copyright (C) 1998-1999, 2005-2007 Free Software Foundation, Inc.
-   Written by Bruno Haible <bruno@clisp.org>, 2005,
-   based on earlier glibc code.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -25,79 +23,41 @@
 #include <ctype.h>
 #include <limits.h>
 
-#if HAVE_MBRTOWC
-# include "mbuiter.h"
-#endif
-
 #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
 
 /* Compare strings S1 and S2, ignoring case, returning less than, equal to or
    greater than zero if S1 is lexicographically less than, equal to or greater
    than S2.
-   Note: This function may, in multibyte locales, return 0 for strings of
-   different lengths!  */
+   Note: This function does not work with multibyte strings!  */
+
 int
 strcasecmp (const char *s1, const char *s2)
 {
-  if (s1 == s2)
+  const unsigned char *p1 = (const unsigned char *) s1;
+  const unsigned char *p2 = (const unsigned char *) s2;
+  unsigned char c1, c2;
+
+  if (p1 == p2)
     return 0;
 
-  /* Be careful not to look at the entire extent of s1 or s2 until needed.
-     This is useful because when two strings differ, the difference is
-     most often already in the very few first characters.  */
-#if HAVE_MBRTOWC
-  if (MB_CUR_MAX > 1)
+  do
     {
-      mbui_iterator_t iter1;
-      mbui_iterator_t iter2;
+      c1 = TOLOWER (*p1);
+      c2 = TOLOWER (*p2);
 
-      mbui_init (iter1, s1);
-      mbui_init (iter2, s2);
+      if (c1 == '\0')
+	break;
 
-      while (mbui_avail (iter1) && mbui_avail (iter2))
-	{
-	  int cmp = mb_casecmp (mbui_cur (iter1), mbui_cur (iter2));
-
-	  if (cmp != 0)
-	    return cmp;
-
-	  mbui_advance (iter1);
-	  mbui_advance (iter2);
-	}
-      if (mbui_avail (iter1))
-	/* s2 terminated before s1.  */
-	return 1;
-      if (mbui_avail (iter2))
-	/* s1 terminated before s2.  */
-	return -1;
-      return 0;
+      ++p1;
+      ++p2;
     }
-  else
-#endif
-    {
-      const unsigned char *p1 = (const unsigned char *) s1;
-      const unsigned char *p2 = (const unsigned char *) s2;
-      unsigned char c1, c2;
-
-      do
-	{
-	  c1 = TOLOWER (*p1);
-	  c2 = TOLOWER (*p2);
-
-	  if (c1 == '\0')
-	    break;
+  while (c1 == c2);
 
-	  ++p1;
-	  ++p2;
-	}
-      while (c1 == c2);
-
-      if (UCHAR_MAX <= INT_MAX)
-	return c1 - c2;
-      else
-	/* On machines where 'char' and 'int' are types of the same size, the
-	   difference of two 'unsigned char' values - including the sign bit -
-	   doesn't fit in an 'int'.  */
-	return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
-    }
+  if (UCHAR_MAX <= INT_MAX)
+    return c1 - c2;
+  else
+    /* On machines where 'char' and 'int' are types of the same size, the
+       difference of two 'unsigned char' values - including the sign bit -
+       doesn't fit in an 'int'.  */
+    return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
 }
diff --git a/lib/string_.h b/lib/string_.h
index 6b99fb2d4..59a14ca24 100644
--- a/lib/string_.h
+++ b/lib/string_.h
@@ -115,20 +115,17 @@ extern char *stpncpy (char *restrict __dst, char const *restrict __src,
 /* Compare strings S1 and S2, ignoring case, returning less than, equal to or
    greater than zero if S1 is lexicographically less than, equal to or greater
    than S2.
-   Note: This function may, in multibyte locales, return 0 for strings of
-   different lengths!
-   No known system has a strcasecmp() function that works correctly in
-   multibyte locales.  Therefore use our version always, if the
-   strcase module is available.  */
-#if @GNULIB_STRCASE@
-# if @REPLACE_STRCASECMP@
-#  define strcasecmp rpl_strcasecmp
-extern int strcasecmp (char const *__s1, char const *__s2);
-# endif
-#elif defined GNULIB_POSIXCHECK
+   Note: This function does not work in multibyte locales.  */
+#if ! @HAVE_STRCASECMP@
+extern int strcasecmp (char const *s1, char const *s2);
+#endif
+#if defined GNULIB_POSIXCHECK
+/* strcasecmp() does not work with multibyte strings:
+   POSIX says that it operates on "strings", and "string" in POSIX is defined
+   as a sequence of bytes, not of characters.   */
 # undef strcasecmp
 # define strcasecmp(a,b) \
-    (GL_LINK_WARNING ("strcasecmp is often incorrectly implemented for multibyte locales - use gnulib module 'strcase' for correct and portable internationalization"), \
+    (GL_LINK_WARNING ("strcasecmp cannot work correctly on character strings in multibyte locales - use mbscasecmp if you care about internationalization, or use c_strcasecmp (from gnulib module c-strcase) if you want a locale independent function"), \
      strcasecmp (a, b))
 #endif
 
@@ -337,6 +334,16 @@ extern char * mbsrchr (const char *string, int c);
 extern char * mbsstr (const char *haystack, const char *needle);
 #endif
 
+#if @GNULIB_MBSCASECMP@
+/* Compare the character strings S1 and S2, ignoring case, returning less than,
+   equal to or greater than zero if S1 is lexicographically less than, equal to
+   or greater than S2.
+   Note: This function may, in multibyte locales, return 0 for strings of
+   different lengths!
+   Unlike strcasecmp(), this function works correctly in multibyte locales.  */
+extern int mbscasecmp (const char *s1, const char *s2);
+#endif
+
 
 #ifdef __cplusplus
 }
diff --git a/m4/string_h.m4 b/m4/string_h.m4
index 152a6ad71..8dfe95f0f 100644
--- a/m4/string_h.m4
+++ b/m4/string_h.m4
@@ -71,4 +71,5 @@ AC_DEFUN([gl_STRING_MODULE_INDICATOR_DEFAULTS],
   GNULIB_MBSCHR=0;      AC_SUBST([GNULIB_MBSCHR])
   GNULIB_MBSRCHR=0;     AC_SUBST([GNULIB_MBSRCHR])
   GNULIB_MBSSTR=0;      AC_SUBST([GNULIB_MBSSTR])
+  GNULIB_MBSCASECMP=0;  AC_SUBST([GNULIB_MBSCASECMP])
 ])
diff --git a/modules/string b/modules/string
index 3eac82973..f48369f34 100644
--- a/modules/string
+++ b/modules/string
@@ -24,6 +24,7 @@ string.h: string_.h
 	      -e 's|@''GNULIB_MBSCHR''@|$(GNULIB_MBSCHR)|g' \
 	      -e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \
 	      -e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \
+	      -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \
 	      -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
 	      -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
 	      -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \