From c319d937dd79856ef77368026070028d492046f6 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Mon, 8 Jan 2007 20:35:06 +0000 Subject: [PATCH] Move out the body of the conversion function to a compilation unit of its own. Use unitypes.h. --- ChangeLog | 30 +++++++++ lib/ucs4-utf16.h | 58 +++++++--------- lib/ucs4-utf8.h | 78 +++++++--------------- lib/unistr/ucs4-utf16.c | 60 +++++++++++++++++ lib/unistr/ucs4-utf8.c | 71 ++++++++++++++++++++ lib/unistr/utf16-ucs4.c | 53 +++++++++++++++ lib/unistr/utf8-ucs4.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++ lib/utf16-ucs4.h | 68 +++++++------------ lib/utf8-ucs4.h | 172 +++++++----------------------------------------- modules/ucs4-utf16 | 4 +- modules/ucs4-utf8 | 4 +- modules/utf16-ucs4 | 5 +- modules/utf8-ucs4 | 5 +- 13 files changed, 489 insertions(+), 285 deletions(-) create mode 100644 lib/unistr/ucs4-utf16.c create mode 100644 lib/unistr/ucs4-utf8.c create mode 100644 lib/unistr/utf16-ucs4.c create mode 100644 lib/unistr/utf8-ucs4.c diff --git a/ChangeLog b/ChangeLog index 3de97ee3b..447f99183 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,35 @@ 2007-01-08 Bruno Haible + * modules/utf8-ucs4 (Files, lib_SOURCES): Add unistr/utf8-ucs4.c. + (Depends-on): Add unitypes. + * lib/utf8-ucs4.h: Add double-inclusion guard. Include unitypes.h. + (u8_mbtouc_aux): Move out to separate file. + (u8_mbtouc): Use ucs4_t, uint8_t types. + * lib/unistr/utf8-ucs4.c: New file. + + * modules/utf16-ucs4 (Files, lib_SOURCES): Add unistr/utf16-ucs4.c. + (Depends-on): Add unitypes. + * lib/utf16-ucs4.h: Add double-inclusion guard. Include unitypes.h. + (u16_mbtouc_aux): Move out to separate file. + (u16_mbtouc): Use ucs4_t, uint16_t types. + * lib/unistr/utf16-ucs4.c: New file. + + * modules/ucs4-utf8 (Files, lib_SOURCES): Add unistr/ucs4-utf8.c. + (Depends-on): Add unitypes. + * lib/ucs4-utf8.h: Add double-inclusion guard. Include unitypes.h. + (u8_uctomb_aux): Move out to separate file. + (u8_uctomb): Use ucs4_t, uint8_t types. + * lib/unistr/ucs4-utf8.c: New file. + + * modules/ucs4-utf16 (Files, lib_SOURCES): Add unistr/ucs4-utf16.c. + (Depends-on): Add unitypes. + * lib/ucs4-utf16.h: Add double-inclusion guard. Include unitypes.h. + (u16_uctomb_aux): Move out to separate file. + (u16_uctomb): Use ucs4_t, uint16_t types. + * lib/unistr/ucs4-utf16.c: New file. + +2007-01-08 Bruno Haible + Avoid variable names that conflict with those from libtool. * m4/lib-link.m4 (AC_LIB_RPATH, AC_LIB_LINKFLAGS_BODY, AC_LIB_LINKFLAGS_FROM_LIBS): Rename libext to acl_libext, diff --git a/lib/ucs4-utf16.h b/lib/ucs4-utf16.h index 9c5cae9e4..8450a99f5 100644 --- a/lib/ucs4-utf16.h +++ b/lib/ucs4-utf16.h @@ -1,51 +1,37 @@ /* Conversion UCS-4 to UTF-16. - Copyright (C) 2002 Free Software Foundation, Inc. - Written by Bruno Haible , 2002. + Copyright (C) 2002, 2005-2007 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software Foundation, -Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ +#ifndef _UCS4_UTF16_H +#define _UCS4_UTF16_H #include +#include "unitypes.h" + +extern int u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n); /* Return the length (number of units) of the UTF-16 representation of uc, after storing it at S. Return -1 upon failure, -2 if the number of available units, N, is too small. */ -static int -u16_uctomb_aux (unsigned short *s, unsigned int uc, int n) -{ - if (uc >= 0x10000) - { - if (uc < 0x110000) - { - if (n >= 2) - { - s[0] = 0xd800 + ((uc - 0x10000) >> 10); - s[1] = 0xdc00 + ((uc - 0x10000) & 0x3ff); - return 2; - } - } - else - return -1; - } - return -2; -} - static inline int -u16_uctomb (unsigned short *s, unsigned int uc, int n) +u16_uctomb (uint16_t *s, ucs4_t uc, int n) { - if (uc < 0x10000 && n > 0) + if (uc < 0xd800 && n > 0) { s[0] = uc; return 1; @@ -53,3 +39,5 @@ u16_uctomb (unsigned short *s, unsigned int uc, int n) else return u16_uctomb_aux (s, uc, n); } + +#endif /* _UCS4_UTF16_H */ diff --git a/lib/ucs4-utf8.h b/lib/ucs4-utf8.h index b7295978d..e8c3e7f24 100644 --- a/lib/ucs4-utf8.h +++ b/lib/ucs4-utf8.h @@ -1,71 +1,35 @@ /* Conversion UCS-4 to UTF-8. - Copyright (C) 2002 Free Software Foundation, Inc. - Written by Bruno Haible , 2002. + Copyright (C) 2002, 2005-2007 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software Foundation, -Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ +#ifndef _UCS4_UTF8_H +#define _UCS4_UTF8_H #include +#include "unitypes.h" + +extern int u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n); /* Return the length (number of units) of the UTF-8 representation of uc, after storing it at S. Return -1 upon failure, -2 if the number of available units, N, is too small. */ -static int -u8_uctomb_aux (unsigned char *s, unsigned int uc, int n) -{ - int count; - - if (uc < 0x80) - count = 1; - else if (uc < 0x800) - count = 2; - else if (uc < 0x10000) - count = 3; -#if 0 - else if (uc < 0x200000) - count = 4; - else if (uc < 0x4000000) - count = 5; - else if (uc <= 0x7fffffff) - count = 6; -#else - else if (uc < 0x110000) - count = 4; -#endif - else - return -1; - - if (n < count) - return -2; - - switch (count) /* note: code falls through cases! */ - { -#if 0 - case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000; - case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000; -#endif - case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000; - case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800; - case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0; - case 1: s[0] = uc; - } - return count; -} - static inline int -u8_uctomb (unsigned char *s, unsigned int uc, int n) +u8_uctomb (uint8_t *s, ucs4_t uc, int n) { if (uc < 0x80 && n > 0) { @@ -75,3 +39,5 @@ u8_uctomb (unsigned char *s, unsigned int uc, int n) else return u8_uctomb_aux (s, uc, n); } + +#endif /* _UCS4_UTF8_H */ diff --git a/lib/unistr/ucs4-utf16.c b/lib/unistr/ucs4-utf16.c new file mode 100644 index 000000000..c79c3f5ac --- /dev/null +++ b/lib/unistr/ucs4-utf16.c @@ -0,0 +1,60 @@ +/* Conversion UCS-4 to UTF-16. + Copyright (C) 2002, 2006 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#include + +/* Specification. */ +#include "ucs4-utf16.h" + +int +u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n) +{ + if (uc < 0xd800) + { + /* The case n >= 1 is already handled by the caller. */ + } + else if (uc < 0x10000) + { + if (uc >= 0xe000) + { + if (n >= 1) + { + s[0] = uc; + return 1; + } + } + else + return -1; + } + else + { + if (uc < 0x110000) + { + if (n >= 2) + { + s[0] = 0xd800 + ((uc - 0x10000) >> 10); + s[1] = 0xdc00 + ((uc - 0x10000) & 0x3ff); + return 2; + } + } + else + return -1; + } + return -2; +} diff --git a/lib/unistr/ucs4-utf8.c b/lib/unistr/ucs4-utf8.c new file mode 100644 index 000000000..82034382c --- /dev/null +++ b/lib/unistr/ucs4-utf8.c @@ -0,0 +1,71 @@ +/* Conversion UCS-4 to UTF-8. + Copyright (C) 2002, 2006 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#include + +/* Specification. */ +#include "ucs4-utf8.h" + +int +u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n) +{ + int count; + + if (uc < 0x80) + /* The case n >= 1 is already handled by the caller. */ + return -2; + else if (uc < 0x800) + count = 2; + else if (uc < 0x10000) + { + if (uc < 0xd800 || uc >= 0xe000) + count = 3; + else + return -1; + } +#if 0 + else if (uc < 0x200000) + count = 4; + else if (uc < 0x4000000) + count = 5; + else if (uc <= 0x7fffffff) + count = 6; +#else + else if (uc < 0x110000) + count = 4; +#endif + else + return -1; + + if (n < count) + return -2; + + switch (count) /* note: code falls through cases! */ + { +#if 0 + case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000; + case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000; +#endif + case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000; + case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800; + case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0; + /*case 1:*/ s[0] = uc; + } + return count; +} diff --git a/lib/unistr/utf16-ucs4.c b/lib/unistr/utf16-ucs4.c new file mode 100644 index 000000000..bf04bca62 --- /dev/null +++ b/lib/unistr/utf16-ucs4.c @@ -0,0 +1,53 @@ +/* Conversion UTF-16 to UCS-4. + Copyright (C) 2001-2002, 2006-2007 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#include + +/* Specification. */ +#include "utf16-ucs4.h" + +int +u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n) +{ + uint16_t c = *s; + +#if CONFIG_UNICODE_SAFETY + if (c < 0xdc00) +#endif + { + if (n >= 2) + { +#if CONFIG_UNICODE_SAFETY + if (s[1] >= 0xdc00 && s[1] < 0xe000) +#endif + { + *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00); + return 2; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + } + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 1; +} diff --git a/lib/unistr/utf8-ucs4.c b/lib/unistr/utf8-ucs4.c new file mode 100644 index 000000000..76236cd9e --- /dev/null +++ b/lib/unistr/utf8-ucs4.c @@ -0,0 +1,166 @@ +/* Conversion UTF-8 to UCS-4. + Copyright (C) 2001-2002, 2006-2007 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#include + +/* Specification. */ +#include "utf8-ucs4.h" + +int +u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c >= 0xc2) + { + if (c < 0xe0) + { + if (n >= 2) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40) +#endif + { + *puc = ((unsigned int) (c & 0x1f) << 6) + | (unsigned int) (s[1] ^ 0x80); + return 2; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xf0) + { + if (n >= 3) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (c >= 0xe1 || s[1] >= 0xa0) + && (c != 0xed || s[1] < 0xa0)) +#endif + { + *puc = ((unsigned int) (c & 0x0f) << 12) + | ((unsigned int) (s[1] ^ 0x80) << 6) + | (unsigned int) (s[2] ^ 0x80); + return 3; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xf8) + { + if (n >= 4) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 + && (c >= 0xf1 || s[1] >= 0x90) +#if 1 + && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) +#endif + ) +#endif + { + *puc = ((unsigned int) (c & 0x07) << 18) + | ((unsigned int) (s[1] ^ 0x80) << 12) + | ((unsigned int) (s[2] ^ 0x80) << 6) + | (unsigned int) (s[3] ^ 0x80); + return 4; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } +#if 0 + else if (c < 0xfc) + { + if (n >= 5) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 + && (c >= 0xf9 || s[1] >= 0x88)) +#endif + { + *puc = ((unsigned int) (c & 0x03) << 24) + | ((unsigned int) (s[1] ^ 0x80) << 18) + | ((unsigned int) (s[2] ^ 0x80) << 12) + | ((unsigned int) (s[3] ^ 0x80) << 6) + | (unsigned int) (s[4] ^ 0x80); + return 5; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xfe) + { + if (n >= 6) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 + && (s[5] ^ 0x80) < 0x40 + && (c >= 0xfd || s[1] >= 0x84)) +#endif + { + *puc = ((unsigned int) (c & 0x01) << 30) + | ((unsigned int) (s[1] ^ 0x80) << 24) + | ((unsigned int) (s[2] ^ 0x80) << 18) + | ((unsigned int) (s[3] ^ 0x80) << 12) + | ((unsigned int) (s[4] ^ 0x80) << 6) + | (unsigned int) (s[5] ^ 0x80); + return 6; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } +#endif + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 1; +} diff --git a/lib/utf16-ucs4.h b/lib/utf16-ucs4.h index e182c28da..37e289d0d 100644 --- a/lib/utf16-ucs4.h +++ b/lib/utf16-ucs4.h @@ -1,57 +1,37 @@ /* Conversion UTF-16 to UCS-4. - Copyright (C) 2001-2002 Free Software Foundation, Inc. - Written by Bruno Haible , 2001. + Copyright (C) 2001-2002, 2005-2007 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software Foundation, -Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ +#ifndef _UTF16_UCS4_H +#define _UTF16_UCS4_H #include +#include "unitypes.h" -/* Return the length (number of units) of the first character in S, putting - its 'ucs4_t' representation in *PUC. */ -static int -u16_mbtouc_aux (unsigned int *puc, const unsigned short *s, size_t n) -{ - unsigned short c = *s; +extern int u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n); - if (c < 0xdc00) - { - if (n >= 2) - { - if (s[1] >= 0xdc00 && s[1] < 0xe000) - { - *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00); - return 2; - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return n; - } - } - /* invalid multibyte character */ - *puc = 0xfffd; - return 1; -} +/* Return the length (number of units) of the first character in S, putting + its 'ucs4_t' representation in *PUC. + The number of available units, N, must be > 0. */ static inline int -u16_mbtouc (unsigned int *puc, const unsigned short *s, size_t n) +u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n) { - unsigned short c = *s; + uint16_t c = *s; if (c < 0xd800 || c >= 0xe000) { @@ -61,3 +41,5 @@ u16_mbtouc (unsigned int *puc, const unsigned short *s, size_t n) else return u16_mbtouc_aux (puc, s, n); } + +#endif /* _UTF16_UCS4_H */ diff --git a/lib/utf8-ucs4.h b/lib/utf8-ucs4.h index e97b5546a..feaf83661 100644 --- a/lib/utf8-ucs4.h +++ b/lib/utf8-ucs4.h @@ -1,161 +1,37 @@ /* Conversion UTF-8 to UCS-4. - Copyright (C) 2001-2002 Free Software Foundation, Inc. - Written by Bruno Haible , 2001. + Copyright (C) 2001-2002, 2005-2007 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software Foundation, -Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ +#ifndef _UTF8_UCS4_H +#define _UTF8_UCS4_H #include +#include "unitypes.h" -/* Return the length (number of units) of the first character in S, putting - its 'ucs4_t' representation in *PUC. */ -static int -u8_mbtouc_aux (unsigned int *puc, const unsigned char *s, size_t n) -{ - unsigned char c = *s; +extern int u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n); - if (c >= 0xc2) - { - if (c < 0xe0) - { - if (n >= 2) - { - if ((s[1] ^ 0x80) < 0x40) - { - *puc = ((unsigned int) (c & 0x1f) << 6) - | (unsigned int) (s[1] ^ 0x80); - return 2; - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return n; - } - } - else if (c < 0xf0) - { - if (n >= 3) - { - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (c >= 0xe1 || s[1] >= 0xa0)) - { - *puc = ((unsigned int) (c & 0x0f) << 12) - | ((unsigned int) (s[1] ^ 0x80) << 6) - | (unsigned int) (s[2] ^ 0x80); - return 3; - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return n; - } - } - else if (c < 0xf8) - { - if (n >= 4) - { - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 - && (c >= 0xf1 || s[1] >= 0x90) -#if 1 - && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) -#endif - ) - { - *puc = ((unsigned int) (c & 0x07) << 18) - | ((unsigned int) (s[1] ^ 0x80) << 12) - | ((unsigned int) (s[2] ^ 0x80) << 6) - | (unsigned int) (s[3] ^ 0x80); - return 4; - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return n; - } - } -#if 0 - else if (c < 0xfc) - { - if (n >= 5) - { - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 - && (c >= 0xf9 || s[1] >= 0x88)) - { - *puc = ((unsigned int) (c & 0x03) << 24) - | ((unsigned int) (s[1] ^ 0x80) << 18) - | ((unsigned int) (s[2] ^ 0x80) << 12) - | ((unsigned int) (s[3] ^ 0x80) << 6) - | (unsigned int) (s[4] ^ 0x80); - return 5; - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return n; - } - } - else if (c < 0xfe) - { - if (n >= 6) - { - if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 - && (s[5] ^ 0x80) < 0x40 - && (c >= 0xfd || s[1] >= 0x84)) - { - *puc = ((unsigned int) (c & 0x01) << 30) - | ((unsigned int) (s[1] ^ 0x80) << 24) - | ((unsigned int) (s[2] ^ 0x80) << 18) - | ((unsigned int) (s[3] ^ 0x80) << 12) - | ((unsigned int) (s[4] ^ 0x80) << 6) - | (unsigned int) (s[5] ^ 0x80); - return 6; - } - /* invalid multibyte character */ - } - else - { - /* incomplete multibyte character */ - *puc = 0xfffd; - return n; - } - } -#endif - } - /* invalid multibyte character */ - *puc = 0xfffd; - return 1; -} +/* Return the length (number of units) of the first character in S, putting + its 'ucs4_t' representation in *PUC. + The number of available units, N, must be > 0. */ static inline int -u8_mbtouc (unsigned int *puc, const unsigned char *s, size_t n) +u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) { - unsigned char c = *s; + uint8_t c = *s; if (c < 0x80) { @@ -165,3 +41,5 @@ u8_mbtouc (unsigned int *puc, const unsigned char *s, size_t n) else return u8_mbtouc_aux (puc, s, n); } + +#endif /* _UTF8_UCS4_H */ diff --git a/modules/ucs4-utf16 b/modules/ucs4-utf16 index c4d1b06af..54ca0ea67 100644 --- a/modules/ucs4-utf16 +++ b/modules/ucs4-utf16 @@ -3,15 +3,17 @@ Conversion UCS-4 to UTF-16. Files: lib/ucs4-utf16.h +lib/unistr/ucs4-utf16.c m4/ucs4-utf.m4 Depends-on: +unitypes configure.ac: gl_UCS4_UTF Makefile.am: -lib_SOURCES += ucs4-utf16.h +lib_SOURCES += ucs4-utf16.h unistr/ucs4-utf16.c Include: diff --git a/modules/ucs4-utf8 b/modules/ucs4-utf8 index be9c3ded0..7048494a6 100644 --- a/modules/ucs4-utf8 +++ b/modules/ucs4-utf8 @@ -3,15 +3,17 @@ Conversion UCS-4 to UTF-8. Files: lib/ucs4-utf8.h +lib/unistr/ucs4-utf8.c m4/ucs4-utf.m4 Depends-on: +unitypes configure.ac: gl_UCS4_UTF Makefile.am: -lib_SOURCES += ucs4-utf8.h +lib_SOURCES += ucs4-utf8.h unistr/ucs4-utf8.c Include: diff --git a/modules/utf16-ucs4 b/modules/utf16-ucs4 index de40bd6b8..442183f94 100644 --- a/modules/utf16-ucs4 +++ b/modules/utf16-ucs4 @@ -3,17 +3,20 @@ Conversion UTF-16 to UCS-4. Files: lib/utf16-ucs4.h +lib/unistr/utf16-ucs4.c m4/utf-ucs4.m4 Depends-on: +unitypes configure.ac: gl_UTF_UCS4 Makefile.am: -lib_SOURCES += utf16-ucs4.h +lib_SOURCES += utf16-ucs4.h unistr/utf16-ucs4.c Include: +"utf16-ucs4.h" License: LGPL diff --git a/modules/utf8-ucs4 b/modules/utf8-ucs4 index 0c127a762..6a2182826 100644 --- a/modules/utf8-ucs4 +++ b/modules/utf8-ucs4 @@ -3,17 +3,20 @@ Conversion UTF-8 to UCS-4. Files: lib/utf8-ucs4.h +lib/unistr/utf8-ucs4.c m4/utf-ucs4.m4 Depends-on: +unitypes configure.ac: gl_UTF_UCS4 Makefile.am: -lib_SOURCES += utf8-ucs4.h +lib_SOURCES += utf8-ucs4.h unistr/utf8-ucs4.c Include: +"utf8-ucs4.h" License: LGPL -- 2.11.0