From 999c736ebb021bc282b697f630021196bacf4baa Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Fri, 25 Mar 2011 23:14:10 +0100 Subject: [PATCH] New module 'unictype/category-longname'. * lib/unictype.in.h (uc_general_category_long_name): New declaration. * lib/unictype/categ_longname.c: New file. * modules/unictype/category-longname: New file. * modules/unictype/category-all (Depends-on): Add it. --- ChangeLog | 8 +++ lib/unictype.in.h | 4 ++ lib/unictype/categ_longname.c | 106 +++++++++++++++++++++++++++++++++++++ modules/unictype/category-all | 1 + modules/unictype/category-longname | 25 +++++++++ 5 files changed, 144 insertions(+) create mode 100644 lib/unictype/categ_longname.c create mode 100644 modules/unictype/category-longname diff --git a/ChangeLog b/ChangeLog index a1da946fc..b33f027a8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,13 @@ 2011-03-25 Bruno Haible + New module 'unictype/category-longname'. + * lib/unictype.in.h (uc_general_category_long_name): New declaration. + * lib/unictype/categ_longname.c: New file. + * modules/unictype/category-longname: New file. + * modules/unictype/category-all (Depends-on): Add it. + +2011-03-25 Bruno Haible + Tests for module 'unictype/category-LC'. * modules/unictype/category-LC-tests: New file. * tests/unictype/test-categ_LC.c: New file, automatically generated. diff --git a/lib/unictype.in.h b/lib/unictype.in.h index b25a46198..ebaa616ab 100644 --- a/lib/unictype.in.h +++ b/lib/unictype.in.h @@ -207,6 +207,10 @@ extern uc_general_category_t extern const char * uc_general_category_name (uc_general_category_t category); +/* Return the long name of a general category. */ +extern const char * + uc_general_category_long_name (uc_general_category_t category); + /* Return the general category given by name, e.g. "Lu". */ extern uc_general_category_t uc_general_category_byname (const char *category_name); diff --git a/lib/unictype/categ_longname.c b/lib/unictype/categ_longname.c new file mode 100644 index 000000000..58a653b8b --- /dev/null +++ b/lib/unictype/categ_longname.c @@ -0,0 +1,106 @@ +/* Categories of Unicode characters. + Copyright (C) 2002, 2006-2007, 2011 Free Software Foundation, Inc. + Written by Bruno Haible , 2011. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unictype.h" + +static const char u_category_long_name[30][22] = +{ + "Uppercase Letter", + "Lowercase Letter", + "Titlecase Letter", + "Modifier Letter", + "Other Letter", + "Nonspacing Mark", + "Spacing Mark", + "Enclosing Mark", + "Decimal Number", + "Letter Number", + "Other Number", + "Connector Punctuation", + "Dash Punctuation", + "Open Punctuation", + "Close Punctuation", + "Initial Punctuation", + "Final Punctuation", + "Other Punctuation", + "Math Symbol", + "Currency Symbol", + "Modifier Symbol", + "Other Symbol", + "Space Separator", + "Line Separator", + "Paragraph Separator", + "Control", + "Format", + "Surrogate", + "Private Use", + "Unassigned" +}; + +const char * +uc_general_category_long_name (uc_general_category_t category) +{ + uint32_t bitmask = category.bitmask; + /* bitmask should consist of a single bit. */ + if (bitmask != 0) + { + if ((bitmask & (bitmask - 1)) == 0) + { + int bit; + /* Take log2 using a variant of Robert Harley's method. + Found by Bruno Haible 1996. */ + uint32_t n = bitmask; + static const char ord2_tab[64] = + { + -1, 0, 1, 12, 2, 6, -1, 13, 3, -1, 7, -1, -1, -1, -1, 14, + 10, 4, -1, -1, 8, -1, -1, 25, -1, -1, -1, -1, -1, 21, 27, 15, + 31, 11, 5, -1, -1, -1, -1, -1, 9, -1, -1, 24, -1, -1, 20, 26, + 30, -1, -1, -1, -1, 23, -1, 19, 29, -1, 22, 18, 28, 17, 16, -1 + }; + n += n << 4; + n += n << 6; + n = (n << 16) - n; + bit = ord2_tab[n >> 26]; + + if (bit < sizeof (u_category_long_name) / sizeof (u_category_long_name[0])) + return u_category_long_name[bit]; + } + else + { + if (bitmask == UC_CATEGORY_MASK_L) + return "Letter"; + if (bitmask == UC_CATEGORY_MASK_LC) + return "Cased Letter"; + if (bitmask == UC_CATEGORY_MASK_M) + return "Mark"; + if (bitmask == UC_CATEGORY_MASK_N) + return "Number"; + if (bitmask == UC_CATEGORY_MASK_P) + return "Punctuation"; + if (bitmask == UC_CATEGORY_MASK_S) + return "Symbol"; + if (bitmask == UC_CATEGORY_MASK_Z) + return "Separator"; + if (bitmask == UC_CATEGORY_MASK_C) + return "Other"; + } + } + return NULL; +} diff --git a/modules/unictype/category-all b/modules/unictype/category-all index 76ea1627a..2a5cbac80 100644 --- a/modules/unictype/category-all +++ b/modules/unictype/category-all @@ -45,6 +45,7 @@ unictype/category-Zs unictype/category-and unictype/category-and-not unictype/category-byname +unictype/category-longname unictype/category-name unictype/category-of unictype/category-or diff --git a/modules/unictype/category-longname b/modules/unictype/category-longname new file mode 100644 index 000000000..76db6d9e5 --- /dev/null +++ b/modules/unictype/category-longname @@ -0,0 +1,25 @@ +Description: +Name of Unicode character category. + +Files: +lib/unictype/categ_longname.c + +Depends-on: +unictype/base + +configure.ac: +gl_LIBUNISTRING_MODULE([0.9.4], [unictype/category-longname]) + +Makefile.am: +if LIBUNISTRING_COMPILE_UNICTYPE_CATEGORY_LONGNAME +lib_SOURCES += unictype/categ_longname.c +endif + +Include: +"unictype.h" + +License: +LGPL + +Maintainer: +Bruno Haible -- 2.11.0