From: Bruno Haible Date: Fri, 6 Mar 2009 18:21:57 +0000 (+0100) Subject: New module 'uninorm/u8-normcmp'. X-Git-Tag: v0.1~6214 X-Git-Url: http://erislabs.org.uk/gitweb/?a=commitdiff_plain;h=68f2b34ff4db6fc25ff2e6122c6a9d1ca5b3edd7;p=gnulib.git New module 'uninorm/u8-normcmp'. --- diff --git a/ChangeLog b/ChangeLog index cd19322ab..2160b9f32 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,14 @@ 2009-03-06 Bruno Haible + New module 'uninorm/u8-normcmp'. + * lib/uninorm.h (u8_normcmp, u16_normcmp, u32_normcmp): New + declarations. + * lib/uninorm/u8-normcmp.c: New file. + * lib/uninorm/u-normcmp.h: New file. + * modules/uninorm/u8-normcmp: New file. + +2009-03-06 Bruno Haible + * lib/w32spawn.h (dup_noinherit): Add cast, to avoid gcc warning. Reported by Eric Blake. diff --git a/lib/uninorm.h b/lib/uninorm.h index 07ecacc1f..54497469c 100644 --- a/lib/uninorm.h +++ b/lib/uninorm.h @@ -145,6 +145,21 @@ extern uint32_t * uint32_t *resultbuf, size_t *lengthp); +/* Compare S1 and S2, ignoring normalization. + NF must be either UNINORM_NFD or UNINORM_NFKD. + If successful, set *RESULT to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and + return 0. Upon failure, return the error number. */ +extern int + u8_normcmp (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2, + uninorm_t nf, int *result); +extern int + u16_normcmp (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2, + uninorm_t nf, int *result); +extern int + u32_normcmp (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2, + uninorm_t nf, int *result); + + /* Normalization of a stream of Unicode characters. A "stream of Unicode characters" is essentially a function that accepts an diff --git a/lib/uninorm/u-normcmp.h b/lib/uninorm/u-normcmp.h new file mode 100644 index 000000000..288bcab86 --- /dev/null +++ b/lib/uninorm/u-normcmp.h @@ -0,0 +1,60 @@ +/* Normalization insensitive comparison of UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +int +FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2, + uninorm_t nf, int *result) +{ + UNIT *norms1; + size_t norms1_length; + UNIT *norms2; + size_t norms2_length; + int cmp; + + /* Normalize S1. */ + norms1 = U_NORMALIZE (nf, s1, n1, NULL, &norms1_length); + if (norms1 == NULL) + return errno; + + /* Normalize S2. */ + norms2 = U_NORMALIZE (nf, s2, n2, NULL, &norms2_length); + if (norms2 == NULL) + { + int saved_errno = errno; + free (norms1); + return saved_errno; + } + + /* Compare the normalized strings. */ + cmp = U_CMP (norms1, norms2, MIN (norms1_length, norms2_length)); + if (cmp == 0) + { + if (norms1_length < norms2_length) + cmp = -1; + else if (norms1_length > norms2_length) + cmp = 1; + } + else if (cmp > 0) + cmp = 1; + else if (cmp < 0) + cmp = -1; + + free (norms2); + free (norms1); + *result = cmp; + return 0; +} diff --git a/lib/uninorm/u8-normcmp.c b/lib/uninorm/u8-normcmp.c new file mode 100644 index 000000000..aa8dc8e07 --- /dev/null +++ b/lib/uninorm/u8-normcmp.c @@ -0,0 +1,33 @@ +/* Normalization insensitive comparison of UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "uninorm.h" + +#include +#include + +#include "minmax.h" +#include "unistr.h" + +#define FUNC u8_normcmp +#define UNIT uint8_t +#define U_NORMALIZE u8_normalize +#define U_CMP u8_cmp +#include "u-normcmp.h" diff --git a/modules/uninorm/u8-normcmp b/modules/uninorm/u8-normcmp new file mode 100644 index 000000000..f1e45186a --- /dev/null +++ b/modules/uninorm/u8-normcmp @@ -0,0 +1,26 @@ +Description: +Normalization insensitive comparison of UTF-8 strings. + +Files: +lib/uninorm/u8-normcmp.c +lib/uninorm/u-normcmp.h + +Depends-on: +uninorm/u8-normalize +unistr/u8-cmp +minmax + +configure.ac: + +Makefile.am: +lib_SOURCES += uninorm/u8-normcmp.c + +Include: +"uninorm.h" + +License: +LGPL + +Maintainer: +Bruno Haible +