even better handling of invalid Unicode characters

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2975 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
master
David Lawrence Ramsey 2005-08-05 03:14:29 +00:00
parent 71dd8c1ca6
commit 6ff695c154
3 changed files with 22 additions and 16 deletions

View File

@ -134,6 +134,10 @@ CVS code -
get_key_buffer(), check_statusblank(), nanogetstr(), get_key_buffer(), check_statusblank(), nanogetstr(),
titlebar(), statusbar(), bottombars(), edit_refresh(), titlebar(), statusbar(), bottombars(), edit_refresh(),
do_yesno(), and do_help(). (DLR) do_yesno(), and do_help(). (DLR)
- Treat the Unicode characters D800-DFFF and FFFE-FFFF as
invalid, since the C library's multibyte functions don't seem
to. New function is_valid_unicode(); changes to mbrep() and
make_mbchar(). (DLR)
- color.c: - color.c:
- Remove unneeded fcntl.h include. (DLR) - Remove unneeded fcntl.h include. (DLR)
- chars.c: - chars.c:
@ -143,13 +147,6 @@ CVS code -
mbrep() mbrep()
- New function, the equivalent of control_mbrep() for non-control - New function, the equivalent of control_mbrep() for non-control
characters. (DLR) characters. (DLR)
- Treat the Unicode characters D800-DFFF and FFFE-FFFF as
invalid, since the C library's multibyte functions don't seem
to. (DLR)
make_mbchar()
- Treat the Unicode characters D800-DFFF and FFFE-FFFF as
invalid, since the C library's multibyte functions don't seem
to. (DLR)
parse_mbchar() parse_mbchar()
- Remove now-unneeded bad_chr parameter. (DLR) - Remove now-unneeded bad_chr parameter. (DLR)
mbstrchr() mbstrchr()

View File

@ -255,10 +255,8 @@ char *mbrep(const char *c, char *crep, int *crep_len)
if (ISSET(USE_UTF8)) { if (ISSET(USE_UTF8)) {
wchar_t wc; wchar_t wc;
/* Unicode D800-DFFF and FFFE-FFFF are invalid, even though /* Reject invalid Unicode characters. */
* they're parsed properly. */ if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) {
if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || ((0xD800 <= wc && wc <=
0xDFFF) || (0XFFFE <= wc && wc <= 0xFFFF))) {
mbtowc(NULL, NULL, 0); mbtowc(NULL, NULL, 0);
crep = (char *)bad_mbchar; crep = (char *)bad_mbchar;
*crep_len = bad_mbchar_len; *crep_len = bad_mbchar_len;
@ -331,12 +329,10 @@ char *make_mbchar(int chr, int *chr_mb_len)
#ifdef ENABLE_UTF8 #ifdef ENABLE_UTF8
if (ISSET(USE_UTF8)) { if (ISSET(USE_UTF8)) {
chr_mb = charalloc(MB_CUR_MAX); chr_mb = charalloc(MB_CUR_MAX);
*chr_mb_len = wctomb(chr_mb, chr); *chr_mb_len = wctomb(chr_mb, (wchar_t)chr);
/* Unicode D800-DFFF and FFFE-FFFF are invalid, even though /* Reject invalid Unicode characters. */
* they're parsed properly. */ if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) {
if (*chr_mb_len < 0 || ((0xD800 <= chr && chr <= 0xDFFF) ||
(0XFFFE <= chr && chr <= 0xFFFF))) {
wctomb(NULL, 0); wctomb(NULL, 0);
*chr_mb_len = 0; *chr_mb_len = 0;
} }
@ -887,6 +883,16 @@ bool has_blank_mbchars(const char *s)
#endif /* ENABLE_NANORC */ #endif /* ENABLE_NANORC */
#endif /* !DISABLE_JUSTIFY */ #endif /* !DISABLE_JUSTIFY */
#ifdef ENABLE_UTF8
/* Return TRUE if wc is valid Unicode (i.e, it's not negative or in the
* ranges D800-DFFF or FFFE-FFFF), and FALSE otherwise. */
bool is_valid_unicode(wchar_t wc)
{
return (0 <= wc && (wc <= 0xD7FF || 0xE000 <= wc) && (wc !=
0xFFFE && wc != 0xFFFF));
}
#endif
#ifdef ENABLE_NANORC #ifdef ENABLE_NANORC
/* Check if the string s is a valid multibyte string. Return TRUE if it /* Check if the string s is a valid multibyte string. Return TRUE if it
* is, and FALSE otherwise. */ * is, and FALSE otherwise. */

View File

@ -197,6 +197,9 @@ bool has_blank_chars(const char *s);
bool has_blank_mbchars(const char *s); bool has_blank_mbchars(const char *s);
#endif #endif
#endif #endif
#ifdef ENABLE_UTF8
bool is_valid_unicode(wchar_t wc);
#endif
#ifdef ENABLE_NANORC #ifdef ENABLE_NANORC
bool is_valid_mbstring(const char *s); bool is_valid_mbstring(const char *s);
#endif #endif