From 64c35d399b87031707089d8c7438af86cacb2536 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 8 Mar 2009 14:00:35 +0100 Subject: [PATCH] Declare new API. --- ChangeLog | 19 +++++ lib/unicase.h | 227 ++++++++++++++++++++++++++++++++++++++++++++------- modules/unicase/base | 2 + 3 files changed, 220 insertions(+), 28 deletions(-) diff --git a/ChangeLog b/ChangeLog index 941b7f47c..ee2842ad0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,24 @@ 2009-03-08 Bruno Haible + * lib/unicase.h: Include stdbool.h, uninorm.h. + (u8_toupper, u16_toupper, u32_toupper, u8_tolower, u16_tolower, + u32_tolower, u8_totitle, u16_totitle, u32_totitle): Add nf argument. + (u8_casefold, u16_casefold, u32_casefold): Add iso639_language and nf + arguments. + (u8_casecmp, u16_casecmp, u32_casecmp): Add iso639_language, nf, + resultp arguments. + (u8_casexfrm, u16_casexfrm, u32_casexfrm): New declarations. + (u8_casecoll, u16_casecoll, u32_casecoll): Add iso639_language, nf, + resultp arguments. + (u8_is_uppercase, u16_is_uppercase, u32_is_uppercase, u8_is_lowercase, + u16_is_lowercase, u32_is_lowercase, u8_is_titlecase, u16_is_titlecase, + u32_is_titlecase, u8_is_casefolded, u16_is_casefolded, + u32_is_casefolded, u8_is_cased, u16_is_cased, u32_is_cased): New + declarations. + * modules/unicase/base (Depends-on): Add uninorm/base, stdbool. + +2009-03-08 Bruno Haible + * lib/uninorm.h (u8_normcmp, u16_normcmp, u32_normcmp, u8_normcoll, u16_normcoll, u32_normcoll): Rename argument 'result' to 'resultp'. * lib/uninorm/u-normcmp.h (FUNC): Likewise. diff --git a/lib/unicase.h b/lib/unicase.h index ebd59529d..63fd86232 100644 --- a/lib/unicase.h +++ b/lib/unicase.h @@ -19,9 +19,14 @@ #include "unitypes.h" +#include + /* Get size_t. */ #include +/* Get uninorm_t. */ +#include "uninorm.h" + #ifdef __cplusplus extern "C" { #endif @@ -59,57 +64,223 @@ extern ucs4_t extern const char * uc_locale_language (void); -/* Return the uppercase mapping of a string. */ +/* Conventions: + + All functions prefixed with u8_ operate on UTF-8 encoded strings. + Their unit is an uint8_t (1 byte). + + All functions prefixed with u16_ operate on UTF-16 encoded strings. + Their unit is an uint16_t (a 2-byte word). + + All functions prefixed with u32_ operate on UCS-4 encoded strings. + Their unit is an uint32_t (a 4-byte word). + + All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly + n units. + + Functions returning a string result take a (resultbuf, lengthp) argument + pair. If resultbuf is not NULL and the result fits into *lengthp units, + it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly + allocated string is returned. In both cases, *lengthp is set to the + length (number of units) of the returned string. In case of error, + NULL is returned and errno is set. */ + +/* Return the uppercase mapping of a string. + The nf argument identifies the normalization form to apply after the + case-mapping. It can also be NULL, for no normalization. */ extern uint8_t * - u8_toupper (const uint8_t *s, size_t n, const char *iso639_language, uint8_t *resultbuf, size_t *lengthp); + u8_toupper (const uint8_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); extern uint16_t * - u16_toupper (const uint16_t *s, size_t n, const char *iso639_language, uint16_t *resultbuf, size_t *lengthp); + u16_toupper (const uint16_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); extern uint32_t * - u32_toupper (const uint32_t *s, size_t n, const char *iso639_language, uint32_t *resultbuf, size_t *lengthp); + u32_toupper (const uint32_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); -/* Return the lowercase mapping of a string. */ +/* Return the lowercase mapping of a string. + The nf argument identifies the normalization form to apply after the + case-mapping. It can also be NULL, for no normalization. */ extern uint8_t * - u8_tolower (const uint8_t *s, size_t n, const char *iso639_language, uint8_t *resultbuf, size_t *lengthp); + u8_tolower (const uint8_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); extern uint16_t * - u16_tolower (const uint16_t *s, size_t n, const char *iso639_language, uint16_t *resultbuf, size_t *lengthp); + u16_tolower (const uint16_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); extern uint32_t * - u32_tolower (const uint32_t *s, size_t n, const char *iso639_language, uint32_t *resultbuf, size_t *lengthp); + u32_tolower (const uint32_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); -/* Return the titlecase mapping of a string. */ +/* Return the titlecase mapping of a string. + The nf argument identifies the normalization form to apply after the + case-mapping. It can also be NULL, for no normalization. */ extern uint8_t * - u8_totitle (const uint8_t *s, size_t n, const char *iso639_language, uint8_t *resultbuf, size_t *lengthp); + u8_totitle (const uint8_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); extern uint16_t * - u16_totitle (const uint16_t *s, size_t n, const char *iso639_language, uint16_t *resultbuf, size_t *lengthp); + u16_totitle (const uint16_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); extern uint32_t * - u32_totitle (const uint32_t *s, size_t n, const char *iso639_language, uint32_t *resultbuf, size_t *lengthp); + u32_totitle (const uint32_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); -/* Return the case folded string. */ +/* Return the case folded string. + The nf argument identifies the normalization form to apply after the + case-mapping. It can also be NULL, for no normalization. */ extern uint8_t * - u8_casefold (const uint8_t *s, size_t n, uint8_t *resultbuf, size_t *lengthp); + u8_casefold (const uint8_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint8_t *resultbuf, size_t *lengthp); extern uint16_t * - u16_casefold (const uint16_t *s, size_t n, uint16_t *resultbuf, size_t *lengthp); + u16_casefold (const uint16_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint16_t *resultbuf, size_t *lengthp); extern uint32_t * - u32_casefold (const uint32_t *s, size_t n, uint32_t *resultbuf, size_t *lengthp); + u32_casefold (const uint32_t *s, size_t n, const char *iso639_language, + uninorm_t nf, + uint32_t *resultbuf, size_t *lengthp); -/* Compare S1 and S2, ignoring case. - Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2. */ +/* Compare S1 and S2, ignoring differences in case and normalization. + The nf argument identifies the normalization form to apply after the + case-mapping. It can also be NULL, for no normalization. + If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and + return 0. Upon failure, return -1 with errno set. */ extern int - u8_casecmp (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2); + u8_casecmp (const uint8_t *s1, size_t n1, + const uint8_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); extern int - u16_casecmp (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2); + u16_casecmp (const uint16_t *s1, size_t n1, + const uint16_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); extern int - u32_casecmp (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2); + u32_casecmp (const uint32_t *s1, size_t n1, + const uint32_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); -/* Compare S1 and S2 using the collation rules of the current locale, - ignoring case. - Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2. - Upon failure, set errno and return any value. */ +/* Converts the string S of length N to a string in locale encoding, in such a + way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with memcmp2() is + equivalent to comparing S1 and S2 with uN_casecoll(). + NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization. */ +extern char * + u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language, + uninorm_t nf, char *resultbuf, size_t *lengthp); +extern char * + u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language, + uninorm_t nf, char *resultbuf, size_t *lengthp); +extern char * + u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language, + uninorm_t nf, char *resultbuf, size_t *lengthp); + +/* Compare S1 and S2, ignoring differences in case and normalization, using the + collation rules of the current locale. + The nf argument identifies the normalization form to apply after the + case-mapping. It must be either UNINORM_NFC or UNINORM_NFKC. It can also + be NULL, for no normalization. + If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and + return 0. Upon failure, return -1 with errno set. */ +extern int + u8_casecoll (const uint8_t *s1, size_t n1, + const uint8_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); +extern int + u16_casecoll (const uint16_t *s1, size_t n1, + const uint16_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); +extern int + u32_casecoll (const uint32_t *s1, size_t n1, + const uint32_t *s2, size_t n2, + const char *iso639_language, uninorm_t nf, int *resultp); + + +/* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false + otherwise, and return 0. Upon failure, return -1 with errno set. */ extern int - u8_casecoll (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2); + u8_is_uppercase (const uint8_t *s, size_t n, + const char *iso639_language, + bool *resultp); extern int - u16_casecoll (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2); + u16_is_uppercase (const uint16_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u32_is_uppercase (const uint32_t *s, size_t n, + const char *iso639_language, + bool *resultp); + +/* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false + otherwise, and return 0. Upon failure, return -1 with errno set. */ extern int - u32_casecoll (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2); + u8_is_lowercase (const uint8_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u16_is_lowercase (const uint16_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u32_is_lowercase (const uint32_t *s, size_t n, + const char *iso639_language, + bool *resultp); + +/* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false + otherwise, and return 0. Upon failure, return -1 with errno set. */ +extern int + u8_is_titlecase (const uint8_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u16_is_titlecase (const uint16_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u32_is_titlecase (const uint32_t *s, size_t n, + const char *iso639_language, + bool *resultp); + +/* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to + false otherwise, and return 0. Upon failure, return -1 with errno set. */ +extern int + u8_is_casefolded (const uint8_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u16_is_casefolded (const uint16_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u32_is_casefolded (const uint32_t *s, size_t n, + const char *iso639_language, + bool *resultp); + +/* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to + either upper case or lower case or title case is not a no-op. + Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping, + under the lower case mapping, and under the title case mapping; in other + words, when NFD(S) consists entirely of caseless characters. + Upon failure, return -1 with errno set. */ +extern int + u8_is_cased (const uint8_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u16_is_cased (const uint16_t *s, size_t n, + const char *iso639_language, + bool *resultp); +extern int + u32_is_cased (const uint32_t *s, size_t n, + const char *iso639_language, + bool *resultp); + /* ========================================================================= */ diff --git a/modules/unicase/base b/modules/unicase/base index 2160c2d07..ddd2fe6f4 100644 --- a/modules/unicase/base +++ b/modules/unicase/base @@ -6,6 +6,8 @@ lib/unicase.h Depends-on: unitypes +uninorm/base +stdbool configure.ac: -- 2.11.0