From a804c94d45f35507c1d7e2c34cdf53ec814eaede Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 8 Mar 2009 16:34:34 +0100 Subject: [PATCH] New module 'unicase/u8-casecmp'. --- ChangeLog | 5 +++ lib/unicase/u-casecmp.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++ lib/unicase/u8-casecmp.c | 34 +++++++++++++++++++++ modules/unicase/u8-casecmp | 28 +++++++++++++++++ 4 files changed, 143 insertions(+) create mode 100644 lib/unicase/u-casecmp.h create mode 100644 lib/unicase/u8-casecmp.c create mode 100644 modules/unicase/u8-casecmp diff --git a/ChangeLog b/ChangeLog index a749b0a16..f1b65a25f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2009-03-08 Bruno Haible + New module 'unicase/u8-casecmp'. + * lib/unicase/u8-casecmp.c: New file. + * lib/unicase/u-casecmp.h: New file. + * modules/unicase/u8-casecmp: New file. + Tests for module 'unicase/u32-casefold'. * modules/unicase/u32-casefold-tests: New file. * tests/unicase/test-u32-casefold.c: New file. diff --git a/lib/unicase/u-casecmp.h b/lib/unicase/u-casecmp.h new file mode 100644 index 000000000..50325b095 --- /dev/null +++ b/lib/unicase/u-casecmp.h @@ -0,0 +1,76 @@ +/* Case and normalization insensitive comparison of Unicode strings. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +int +FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp) +{ + UNIT buf1[2048 / sizeof (UNIT)]; + UNIT buf2[2048 / sizeof (UNIT)]; + UNIT *norms1; + size_t norms1_length; + UNIT *norms2; + size_t norms2_length; + int cmp; + + /* Optimization: There is no need to do canonical composition of each string. + Decomposition is enough. */ + if (nf != NULL) + nf = uninorm_decomposing_form (nf); + + /* Case-fold and normalize S1. */ + norms1_length = sizeof (buf1) / sizeof (UNIT); + norms1 = U_CASEFOLD (s1, n1, iso639_language, nf, buf1, &norms1_length); + if (norms1 == NULL) + /* errno is set here. */ + return -1; + + /* Case-fold and normalize S2. */ + norms2_length = sizeof (buf2) / sizeof (UNIT); + norms2 = U_CASEFOLD (s2, n2, iso639_language, nf, buf2, &norms2_length); + if (norms2 == NULL) + { + if (norms1 != buf1) + { + int saved_errno = errno; + free (norms1); + errno = saved_errno; + } + return -1; + } + + /* Compare the normalized strings. */ + cmp = U_CMP (norms1, norms2, MIN (norms1_length, norms2_length)); + if (cmp == 0) + { + if (norms1_length < norms2_length) + cmp = -1; + else if (norms1_length > norms2_length) + cmp = 1; + } + else if (cmp > 0) + cmp = 1; + else if (cmp < 0) + cmp = -1; + + if (norms2 != buf2) + free (norms2); + if (norms1 != buf1) + free (norms1); + *resultp = cmp; + return 0; +} diff --git a/lib/unicase/u8-casecmp.c b/lib/unicase/u8-casecmp.c new file mode 100644 index 000000000..3b303420a --- /dev/null +++ b/lib/unicase/u8-casecmp.c @@ -0,0 +1,34 @@ +/* Case and normalization insensitive comparison of UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unicase.h" + +#include +#include + +#include "minmax.h" +#include "uninorm.h" +#include "unistr.h" + +#define FUNC u8_casecmp +#define UNIT uint8_t +#define U_CASEFOLD u8_casefold +#define U_CMP u8_cmp +#include "u-casecmp.h" diff --git a/modules/unicase/u8-casecmp b/modules/unicase/u8-casecmp new file mode 100644 index 000000000..521ccdc5a --- /dev/null +++ b/modules/unicase/u8-casecmp @@ -0,0 +1,28 @@ +Description: +Case and normalization insensitive comparison of UTF-8 strings. + +Files: +lib/unicase/u8-casecmp.c +lib/unicase/u-casecmp.h + +Depends-on: +unicase/base +unicase/u8-casefold +uninorm/decomposing-form +unistr/u8-cmp +minmax + +configure.ac: + +Makefile.am: +lib_SOURCES += unicase/u8-casecmp.c + +Include: +"unicase.h" + +License: +LGPL + +Maintainer: +Bruno Haible + -- 2.11.0