From: Bruno Haible Date: Sun, 18 May 2008 13:38:15 +0000 (+0200) Subject: New module 'propername', moved here from GNU gettext. X-Git-Tag: v0.1~7385 X-Git-Url: http://erislabs.org.uk/gitweb/?a=commitdiff_plain;h=c9678b023ef80b4a807e39a0b225b6b4acc58fb1;p=gnulib.git New module 'propername', moved here from GNU gettext. --- diff --git a/ChangeLog b/ChangeLog index 7c0a840fb..41ea89df0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2008-05-18 Bruno Haible + + * modules/propername: New file, from GNU gettext. + * lib/propername.h: New file, from GNU gettext. + * lib/propername.c: New file, from GNU gettext. + * MODULES.html.sh (Internationalization functions): Add propername. + 2008-05-16 Jim Meyering Bruno Haible diff --git a/MODULES.html.sh b/MODULES.html.sh index 277d84cf9..d46cf681d 100755 --- a/MODULES.html.sh +++ b/MODULES.html.sh @@ -2382,6 +2382,7 @@ func_all_modules () func_begin_table func_module gettext func_module gettext-h + func_module propername func_module iconv func_module striconv func_module xstriconv diff --git a/lib/propername.c b/lib/propername.c new file mode 100644 index 000000000..0d3681e26 --- /dev/null +++ b/lib/propername.c @@ -0,0 +1,283 @@ +/* Localization of proper names. + Copyright (C) 2006-2008 Free Software Foundation, Inc. + Written by Bruno Haible , 2006. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "propername.h" + +#include +#include +#include +#include +#include +#if HAVE_ICONV +# include +#endif + +#include "trim.h" +#include "mbchar.h" +#if HAVE_MBRTOWC +# include "mbuiter.h" +#endif +#include "localcharset.h" +#include "c-strcase.h" +#include "xstriconv.h" +#include "xalloc.h" +#include "gettext.h" + + +/* Tests whether STRING contains trim (SUB), starting and ending at word + boundaries. + Here, instead of implementing Unicode Standard Annex #29 for determining + word boundaries, we assume that trim (SUB) starts and ends with words and + only test whether the part before it ends with a non-word and the part + after it starts with a non-word. */ +static bool +mbsstr_trimmed_wordbounded (const char *string, const char *sub) +{ + char *tsub = trim (sub); + bool found = false; + + for (; *string != '\0';) + { + const char *tsub_in_string = mbsstr (string, tsub); + if (tsub_in_string == NULL) + break; + else + { +#if HAVE_MBRTOWC + if (MB_CUR_MAX > 1) + { + mbui_iterator_t string_iter; + bool word_boundary_before; + bool word_boundary_after; + + mbui_init (string_iter, string); + word_boundary_before = true; + if (mbui_cur_ptr (string_iter) < tsub_in_string) + { + mbchar_t last_char_before_tsub; + do + { + if (!mbui_avail (string_iter)) + abort (); + last_char_before_tsub = mbui_cur (string_iter); + mbui_advance (string_iter); + } + while (mbui_cur_ptr (string_iter) < tsub_in_string); + if (mb_isalnum (last_char_before_tsub)) + word_boundary_before = false; + } + + mbui_init (string_iter, tsub_in_string); + { + mbui_iterator_t tsub_iter; + + for (mbui_init (tsub_iter, tsub); + mbui_avail (tsub_iter); + mbui_advance (tsub_iter)) + { + if (!mbui_avail (string_iter)) + abort (); + mbui_advance (string_iter); + } + } + word_boundary_after = true; + if (mbui_avail (string_iter)) + { + mbchar_t first_char_after_tsub = mbui_cur (string_iter); + if (mb_isalnum (first_char_after_tsub)) + word_boundary_after = false; + } + + if (word_boundary_before && word_boundary_after) + { + found = true; + break; + } + + mbui_init (string_iter, tsub_in_string); + if (!mbui_avail (string_iter)) + break; + string = tsub_in_string + mb_len (mbui_cur (string_iter)); + } + else +#endif /* HAVE_MBRTOWC */ + { + bool word_boundary_before; + const char *p; + bool word_boundary_after; + + word_boundary_before = true; + if (string < tsub_in_string) + if (isalnum ((unsigned char) tsub_in_string[-1])) + word_boundary_before = false; + + p = tsub_in_string + strlen (tsub); + word_boundary_after = true; + if (*p != '\0') + if (isalnum ((unsigned char) *p)) + word_boundary_after = false; + + if (word_boundary_before && word_boundary_after) + { + found = true; + break; + } + + if (*tsub_in_string == '\0') + break; + string = tsub_in_string + 1; + } + } + } + free (tsub); + return found; +} + +/* Return the localization of NAME. NAME is written in ASCII. */ + +const char * +proper_name (const char *name) +{ + /* See whether there is a translation. */ + const char *translation = gettext (name); + + if (translation != name) + { + /* See whether the translation contains the original name. */ + if (mbsstr_trimmed_wordbounded (translation, name)) + return translation; + else + { + /* Return "TRANSLATION (NAME)". */ + char *result = + XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char); + + sprintf (result, "%s (%s)", translation, name); + return result; + } + } + else + return name; +} + +/* Return the localization of a name whose original writing is not ASCII. + NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal + escape sequences. NAME_ASCII is a fallback written only with ASCII + characters. */ + +const char * +proper_name_utf8 (const char *name_ascii, const char *name_utf8) +{ + /* See whether there is a translation. */ + const char *translation = gettext (name_ascii); + + /* Try to convert NAME_UTF8 to the locale encoding. */ + const char *locale_code = locale_charset (); + char *alloc_name_converted = NULL; + char *alloc_name_converted_translit = NULL; + const char *name_converted = NULL; + const char *name_converted_translit = NULL; + const char *name; + + if (c_strcasecmp (locale_code, "UTF-8") != 0) + { +#if HAVE_ICONV + name_converted = alloc_name_converted = + xstr_iconv (name_utf8, "UTF-8", locale_code); + +# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 \ + || _LIBICONV_VERSION >= 0x0105 + { + size_t len = strlen (locale_code); + char *locale_code_translit = XNMALLOC (len + 10 + 1, char); + memcpy (locale_code_translit, locale_code, len); + memcpy (locale_code_translit + len, "//TRANSLIT", 10 + 1); + + name_converted_translit = alloc_name_converted_translit = + xstr_iconv (name_utf8, "UTF-8", locale_code_translit); + + free (locale_code_translit); + } +# endif +#endif + } + else + { + name_converted = name_utf8; + name_converted_translit = name_utf8; + } + + /* The name in locale encoding. */ + name = (name_converted != NULL ? name_converted : + name_converted_translit != NULL ? name_converted_translit : + name_ascii); + + if (translation != name_ascii) + { + /* See whether the translation contains the original name. */ + if (mbsstr_trimmed_wordbounded (translation, name_ascii) + || (name_converted != NULL + && mbsstr_trimmed_wordbounded (translation, name_converted)) + || (name_converted_translit != NULL + && mbsstr_trimmed_wordbounded (translation, name_converted_translit))) + { + if (alloc_name_converted != NULL) + free (alloc_name_converted); + if (alloc_name_converted_translit != NULL) + free (alloc_name_converted_translit); + return translation; + } + else + { + /* Return "TRANSLATION (NAME)". */ + char *result = + XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char); + + sprintf (result, "%s (%s)", translation, name); + + if (alloc_name_converted != NULL) + free (alloc_name_converted); + if (alloc_name_converted_translit != NULL) + free (alloc_name_converted_translit); + return result; + } + } + else + { + if (alloc_name_converted != NULL && alloc_name_converted != name) + free (alloc_name_converted); + if (alloc_name_converted_translit != NULL + && alloc_name_converted_translit != name) + free (alloc_name_converted_translit); + return name; + } +} + +#ifdef TEST +# include +int +main (int argc, char *argv[]) +{ + setlocale (LC_ALL, ""); + if (mbsstr_trimmed_wordbounded (argv[1], argv[2])) + printf("found\n"); + return 0; +} +#endif diff --git a/lib/propername.h b/lib/propername.h new file mode 100644 index 000000000..1b0545b63 --- /dev/null +++ b/lib/propername.h @@ -0,0 +1,105 @@ +/* Localization of proper names. + Copyright (C) 2006, 2008 Free Software Foundation, Inc. + Written by Bruno Haible , 2006. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* INTRODUCTION + + What do + + Torbjörn Granlund (coreutils) + François Pinard (coreutils) + Danilo Šegan (gettext) + + have in common? + + A non-ASCII name. This causes trouble in the --version output. The simple + "solution", unfortunately mutilates the name. + + $ du --version| grep Granlund + Écrit par Torbjorn Granlund, David MacKenzie, Paul Eggert et Jim Meyering. + + $ ptx --version| grep Pinard + Écrit par F. Pinard. + + What is desirable, is to print the full name if the output character set + allows it, and the ASCIIfied name only as a fallback. + + $ recode-sr-latin --version + ... + Written by Danilo Šegan and Bruno Haible. + + $ LC_ALL=C recode-sr-latin --version + ... + Written by Danilo Segan and Bruno Haible. + + The 'propername' module does exactly this. Plus, for languages that use + a different writing system than the Latin alphabet, it allows a translator + to write the name using that different writing system. In that case the + output will look like this: + () + + To use the 'propername' module is done in three simple steps: + + 1) Add it to the list of gnulib modules to import, + + 2) Change the arguments of version_etc, from + + from "Paul Eggert" + to proper_name ("Paul Eggert") + + from "Torbjorn Granlund" + to proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund") + + from "F. Pinard" + to proper_name_utf8 ("Franc,ois Pinard", "Fran\303\247ois Pinard") + + (Optionally, here you can also add / * TRANSLATORS: ... * / comments + explaining how the name is written or pronounced.) + + 3) If you are using GNU gettext version 0.16.1 or older, in po/Makevars, + in the definition of the XGETTEXT_OPTIONS variable, add: + + --keyword=proper_name:1,"This is a proper name. See the gettext manual, section Names." + --keyword=proper_name_utf8:1,"This is a proper name. See the gettext manual, section Names." + + This specifies automatic comments for the translator. (Requires + xgettext >= 0.15.) + */ + +#ifndef _PROPERNAME_H +#define _PROPERNAME_H + + +#ifdef __cplusplus +extern "C" { +#endif + +/* Return the localization of NAME. NAME is written in ASCII. */ +extern const char * proper_name (const char *name); + +/* Return the localization of a name whose original writing is not ASCII. + NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal + escape sequences. NAME_ASCII is a fallback written only with ASCII + characters. */ +extern const char * proper_name_utf8 (const char *name_ascii, + const char *name_utf8); + +#ifdef __cplusplus +} +#endif + + +#endif /* _PROPERNAME_H */ diff --git a/modules/propername b/modules/propername new file mode 100644 index 000000000..fb31901cf --- /dev/null +++ b/modules/propername @@ -0,0 +1,43 @@ +Description: +Localization of proper names. + +Notice: +If you are using GNU gettext version 0.16.1 or older, add the following options +to XGETTEXT_OPTIONS in your po/Makevars: + --keyword=proper_name:1,'This is a proper name. See the gettext manual, section Names.' + --keyword=proper_name_utf8:1,'This is a proper name. See the gettext manual, section Names.' + +Files: +lib/propername.h +lib/propername.c + +Depends-on: +stdbool +trim +mbsstr +mbchar +mbuiter +iconv +localcharset +c-strcase +xstriconv +xalloc +gettext-h + +configure.ac: +m4_ifdef([AM_XGETTEXT_OPTION], + [AM_XGETTEXT_OPTION([--keyword=proper_name:1,'This is a proper name. See the gettext manual, section Names.']) + AM_XGETTEXT_OPTION([--keyword=proper_name_utf8:1,'This is a proper name. See the gettext manual, section Names.'])]) + +Makefile.am: +lib_SOURCES += propername.h propername.c + +Include: +"propername.h" + +License: +GPL + +Maintainer: +Bruno Haible +