even better handling of invalid Unicode characters

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2975 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
2005-08-05 03:14:29 +00:00 · 2005-08-05 03:14:29 +00:00 · 6ff695c154
parent 71dd8c1ca6
commit 6ff695c154
3 changed files with 22 additions and 16 deletions
--- a/11
+++ b/11
@ -134,6 +134,10 @@ CVS code -
 	  get_key_buffer(), check_statusblank(), nanogetstr(),
 	  titlebar(), statusbar(), bottombars(), edit_refresh(),
 	  do_yesno(), and do_help(). (DLR)
+	- Treat the Unicode characters D800-DFFF and FFFE-FFFF as
+	  invalid, since the C library's multibyte functions don't seem
+	  to.  New function is_valid_unicode(); changes to mbrep() and
+	  make_mbchar(). (DLR)
 - color.c:
 	- Remove unneeded fcntl.h include. (DLR)
 - chars.c:
@ -143,13 +147,6 @@ CVS code -
  mbrep()
 	- New function, the equivalent of control_mbrep() for non-control
 	  characters. (DLR)
-	- Treat the Unicode characters D800-DFFF and FFFE-FFFF as
-	  invalid, since the C library's multibyte functions don't seem
-	  to. (DLR)
-  make_mbchar()
-	- Treat the Unicode characters D800-DFFF and FFFE-FFFF as
-	  invalid, since the C library's multibyte functions don't seem
-	  to. (DLR)
  parse_mbchar()
 	- Remove now-unneeded bad_chr parameter. (DLR)
  mbstrchr()
--- a/src/chars.c
+++ b/src/chars.c
@ -255,10 +255,8 @@ char *mbrep(const char *c, char *crep, int *crep_len)
    if (ISSET(USE_UTF8)) {
 	wchar_t wc;

-	/* Unicode D800-DFFF and FFFE-FFFF are invalid, even though
-	 * they're parsed properly. */
-	if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || ((0xD800 <= wc && wc <=
-		0xDFFF) || (0XFFFE <= wc && wc <= 0xFFFF))) {
+	/* Reject invalid Unicode characters. */
+	if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) {
 	    mbtowc(NULL, NULL, 0);
 	    crep = (char *)bad_mbchar;
 	    *crep_len = bad_mbchar_len;
@ -331,12 +329,10 @@ char *make_mbchar(int chr, int *chr_mb_len)
 #ifdef ENABLE_UTF8
    if (ISSET(USE_UTF8)) {
 	chr_mb = charalloc(MB_CUR_MAX);
-	*chr_mb_len = wctomb(chr_mb, chr);
+	*chr_mb_len = wctomb(chr_mb, (wchar_t)chr);

-	/* Unicode D800-DFFF and FFFE-FFFF are invalid, even though
-	 * they're parsed properly. */
-	if (*chr_mb_len < 0 || ((0xD800 <= chr && chr <= 0xDFFF) ||
-		(0XFFFE <= chr && chr <= 0xFFFF))) {
+	/* Reject invalid Unicode characters. */
+	if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) {
 	    wctomb(NULL, 0);
 	    *chr_mb_len = 0;
 	}
@ -887,6 +883,16 @@ bool has_blank_mbchars(const char *s)
 #endif /* ENABLE_NANORC */
 #endif /* !DISABLE_JUSTIFY */

+#ifdef ENABLE_UTF8
+/* Return TRUE if wc is valid Unicode (i.e, it's not negative or in the
+ * ranges D800-DFFF or FFFE-FFFF), and FALSE otherwise. */
+bool is_valid_unicode(wchar_t wc)
+{
+    return (0 <= wc && (wc <= 0xD7FF || 0xE000 <= wc) && (wc !=
+	0xFFFE && wc != 0xFFFF));
+}
+#endif
+
 #ifdef ENABLE_NANORC
 /* Check if the string s is a valid multibyte string.  Return TRUE if it
 * is, and FALSE otherwise. */
--- a/src/proto.h
+++ b/src/proto.h
@ -197,6 +197,9 @@ bool has_blank_chars(const char *s);
 bool has_blank_mbchars(const char *s);
 #endif
 #endif
+#ifdef ENABLE_UTF8
+bool is_valid_unicode(wchar_t wc);
+#endif
 #ifdef ENABLE_NANORC
 bool is_valid_mbstring(const char *s);
 #endif