From 6ff695c1545a39bc7e36aede26cc721b58fa509c Mon Sep 17 00:00:00 2001 From: David Lawrence Ramsey Date: Fri, 5 Aug 2005 03:14:29 +0000 Subject: [PATCH] even better handling of invalid Unicode characters git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2975 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- ChangeLog | 11 ++++------- src/chars.c | 24 +++++++++++++++--------- src/proto.h | 3 +++ 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2321bc7a..70764eff 100644 --- a/ChangeLog +++ b/ChangeLog @@ -134,6 +134,10 @@ CVS code - get_key_buffer(), check_statusblank(), nanogetstr(), titlebar(), statusbar(), bottombars(), edit_refresh(), do_yesno(), and do_help(). (DLR) + - Treat the Unicode characters D800-DFFF and FFFE-FFFF as + invalid, since the C library's multibyte functions don't seem + to. New function is_valid_unicode(); changes to mbrep() and + make_mbchar(). (DLR) - color.c: - Remove unneeded fcntl.h include. (DLR) - chars.c: @@ -143,13 +147,6 @@ CVS code - mbrep() - New function, the equivalent of control_mbrep() for non-control characters. (DLR) - - Treat the Unicode characters D800-DFFF and FFFE-FFFF as - invalid, since the C library's multibyte functions don't seem - to. (DLR) - make_mbchar() - - Treat the Unicode characters D800-DFFF and FFFE-FFFF as - invalid, since the C library's multibyte functions don't seem - to. (DLR) parse_mbchar() - Remove now-unneeded bad_chr parameter. (DLR) mbstrchr() diff --git a/src/chars.c b/src/chars.c index f21b9d6f..8703d9cc 100644 --- a/src/chars.c +++ b/src/chars.c @@ -255,10 +255,8 @@ char *mbrep(const char *c, char *crep, int *crep_len) if (ISSET(USE_UTF8)) { wchar_t wc; - /* Unicode D800-DFFF and FFFE-FFFF are invalid, even though - * they're parsed properly. */ - if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || ((0xD800 <= wc && wc <= - 0xDFFF) || (0XFFFE <= wc && wc <= 0xFFFF))) { + /* Reject invalid Unicode characters. */ + if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) { mbtowc(NULL, NULL, 0); crep = (char *)bad_mbchar; *crep_len = bad_mbchar_len; @@ -331,12 +329,10 @@ char *make_mbchar(int chr, int *chr_mb_len) #ifdef ENABLE_UTF8 if (ISSET(USE_UTF8)) { chr_mb = charalloc(MB_CUR_MAX); - *chr_mb_len = wctomb(chr_mb, chr); + *chr_mb_len = wctomb(chr_mb, (wchar_t)chr); - /* Unicode D800-DFFF and FFFE-FFFF are invalid, even though - * they're parsed properly. */ - if (*chr_mb_len < 0 || ((0xD800 <= chr && chr <= 0xDFFF) || - (0XFFFE <= chr && chr <= 0xFFFF))) { + /* Reject invalid Unicode characters. */ + if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) { wctomb(NULL, 0); *chr_mb_len = 0; } @@ -887,6 +883,16 @@ bool has_blank_mbchars(const char *s) #endif /* ENABLE_NANORC */ #endif /* !DISABLE_JUSTIFY */ +#ifdef ENABLE_UTF8 +/* Return TRUE if wc is valid Unicode (i.e, it's not negative or in the + * ranges D800-DFFF or FFFE-FFFF), and FALSE otherwise. */ +bool is_valid_unicode(wchar_t wc) +{ + return (0 <= wc && (wc <= 0xD7FF || 0xE000 <= wc) && (wc != + 0xFFFE && wc != 0xFFFF)); +} +#endif + #ifdef ENABLE_NANORC /* Check if the string s is a valid multibyte string. Return TRUE if it * is, and FALSE otherwise. */ diff --git a/src/proto.h b/src/proto.h index d3d5ee7c..86ef80ee 100644 --- a/src/proto.h +++ b/src/proto.h @@ -197,6 +197,9 @@ bool has_blank_chars(const char *s); bool has_blank_mbchars(const char *s); #endif #endif +#ifdef ENABLE_UTF8 +bool is_valid_unicode(wchar_t wc); +#endif #ifdef ENABLE_NANORC bool is_valid_mbstring(const char *s); #endif