From decec2d8e8f0d9741cd5f3edce58c3f9eabcef77 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 8 Mar 2009 16:40:04 +0100 Subject: [PATCH] New module 'unicase/u8-casexfrm'. --- ChangeLog | 5 +++ lib/unicase/u-casexfrm.h | 90 +++++++++++++++++++++++++++++++++++++++++++++ lib/unicase/u8-casexfrm.c | 35 ++++++++++++++++++ modules/unicase/u8-casexfrm | 29 +++++++++++++++ 4 files changed, 159 insertions(+) create mode 100644 lib/unicase/u-casexfrm.h create mode 100644 lib/unicase/u8-casexfrm.c create mode 100644 modules/unicase/u8-casexfrm diff --git a/ChangeLog b/ChangeLog index b66f395f7..225968700 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2009-03-08 Bruno Haible + New module 'unicase/u8-casexfrm'. + * lib/unicase/u8-casexfrm.c: New file. + * lib/unicase/u-casexfrm.h: New file. + * modules/unicase/u8-casexfrm: New file. + Tests for module 'unicase/u32-casecmp'. * modules/unicase/u32-casecmp-tests: New file. * tests/unicase/test-u32-casecmp.c: New file. diff --git a/lib/unicase/u-casexfrm.h b/lib/unicase/u-casexfrm.h new file mode 100644 index 000000000..a7298a970 --- /dev/null +++ b/lib/unicase/u-casexfrm.h @@ -0,0 +1,90 @@ +/* Locale dependent transformation for case insensitive comparison of Unicode + strings. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +char * +FUNC (const UNIT *s, size_t n, const char *iso639_language, uninorm_t nf, + char *resultbuf, size_t *lengthp) +{ + UNIT foldedsbuf[2048 / sizeof (UNIT)]; + UNIT *foldeds; + size_t foldeds_length; + char convsbuf[2048]; + char *convs; + size_t convs_length; + int ret; + char *result; + + /* Casefold and normalize the Unicode string. */ + foldeds_length = sizeof (foldedsbuf) / sizeof (UNIT); + foldeds = U_CASEFOLD (s, n, iso639_language, nf, foldedsbuf, &foldeds_length); + if (foldeds == NULL) + /* errno is set here. */ + return NULL; + + /* Convert it to locale encoding. */ + convs = convsbuf; + convs_length = sizeof (convsbuf) - 1; + ret = U_CONV_TO_ENCODING (locale_charset (), + iconveh_error, + foldeds, foldeds_length, + NULL, + &convs, &convs_length); + if (ret < 0) + { + if (foldeds != foldedsbuf) + { + int saved_errno = errno; + free (foldeds); + errno = saved_errno; + } + return NULL; + } + + if (foldeds != foldedsbuf) + free (foldeds); + + /* Ensure one more byte is available. */ + if (convs != convsbuf) + { + char *memory = (char *) realloc (convs, convs_length + 1); + if (memory == NULL) + { + free (convs); + errno = ENOMEM; + return NULL; + } + convs = memory; + } + + /* Apply locale dependent transformations for comparison. */ + result = memxfrm (convs, convs_length, resultbuf, lengthp); + if (result == NULL) + { + if (convs != convsbuf) + { + int saved_errno = errno; + free (convs); + errno = saved_errno; + } + return NULL; + } + + if (convs != convsbuf) + free (convs); + return result; +} diff --git a/lib/unicase/u8-casexfrm.c b/lib/unicase/u8-casexfrm.c new file mode 100644 index 000000000..7e1d767ca --- /dev/null +++ b/lib/unicase/u8-casexfrm.c @@ -0,0 +1,35 @@ +/* Locale dependent transformation for case insensitive comparison of UTF-8 + strings. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unicase.h" + +#include +#include + +#include "localcharset.h" +#include "uniconv.h" +#include "memxfrm.h" + +#define FUNC u8_casexfrm +#define UNIT uint8_t +#define U_CASEFOLD u8_casefold +#define U_CONV_TO_ENCODING u8_conv_to_encoding +#include "u-casexfrm.h" diff --git a/modules/unicase/u8-casexfrm b/modules/unicase/u8-casexfrm new file mode 100644 index 000000000..474a179cc --- /dev/null +++ b/modules/unicase/u8-casexfrm @@ -0,0 +1,29 @@ +Description: +Locale dependent transformation for case insensitive comparison of UTF-8 +strings. + +Files: +lib/unicase/u8-casexfrm.c +lib/unicase/u-casexfrm.h + +Depends-on: +unicase/base +unicase/u8-casefold +uniconv/u8-conv-to-enc +localcharset +memxfrm + +configure.ac: + +Makefile.am: +lib_SOURCES += unicase/u8-casexfrm.c + +Include: +"unicase.h" + +License: +LGPL + +Maintainer: +Bruno Haible + -- 2.11.0