more get_unicode_kbinput() fixes: properly discard hexadecimal FFFE to

FFFF instead of xxxE and xxxF, and discard hexadecimal D800 to DFFF as well, as they're also invalid Unicode characters git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2972 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
2005-08-02 19:54:25 +00:00 · 2005-08-02 19:54:25 +00:00 · b7bf27a509
parent c25ed534eb
commit b7bf27a509
2 changed files with 23 additions and 20 deletions
--- a/8
+++ b/8
@ -263,10 +263,10 @@ CVS code -
 	  as wc does. (DLR)
 - winio.c:
  get_word_kbinput()
-	- Limit the input word to hexadecimal FFFD instead of FFFF, as
-	  FFFE and FFFF are invalid Unicode characters, rename variables
-	  word and word_digits to uni and uni_digits, and rename to
-	  get_unicode_kbinput(). (DLR)
+	- Don't allow the input word to be between hexadecimal D800 to
+	  DFFF or hexadecimal FFFE to FFFD, as they are invalid Unicode
+	  characters; rename variables word and word_digits to uni and
+	  uni_digits; and rename to get_unicode_kbinput(). (DLR)
  display_string()
 	- Instead of using parse_mbchar()'s bad_chr parameter, use
 	  mbrep() to get the representation of a bad character. (DLR)
--- a/src/winio.c
+++ b/src/winio.c
@ -1232,8 +1232,8 @@ int get_byte_kbinput(int kbinput
 }

 /* Translate a Unicode sequence: turn a four-digit hexadecimal number
- * from 0000 to FFFD (case-insensitive) into its corresponding multibyte
- * value. */
+ * from 0000 to D7FF or E000 to FFFD (case-insensitive) into its
+ * corresponding multibyte value. */
 int get_unicode_kbinput(int kbinput
 #ifndef NANO_SMALL
 	, bool reset
@ -1257,13 +1257,13 @@ int get_unicode_kbinput(int kbinput
    switch (uni_digits) {
 	case 1:
 	    /* One digit: reset the Unicode sequence holder and add the
-	     * digit we got to the 4096's position of the Unicode
+	     * digit we got to the 0x1000's position of the Unicode
 	     * sequence holder. */
 	    uni = 0;
 	    if ('0' <= kbinput && kbinput <= '9')
-		uni += (kbinput - '0') * 4096;
+		uni += (kbinput - '0') * 0x1000;
 	    else if ('a' <= tolower(kbinput) && tolower(kbinput) <= 'f')
-		uni += (tolower(kbinput) + 10 - 'a') * 4096;
+		uni += (tolower(kbinput) + 10 - 'a') * 0x1000;
 	    else
 		/* If the character we got isn't a hexadecimal digit, or
 		 * if it is and it would put the Unicode sequence out of
@ -1271,12 +1271,14 @@ int get_unicode_kbinput(int kbinput
 		retval = kbinput;
 	    break;
 	case 2:
-	    /* Two digits: add the digit we got to the 256's position of
-	     * the Unicode sequence holder. */
-	    if ('0' <= kbinput && kbinput <= '9')
-		uni += (kbinput - '0') * 256;
-	    else if ('a' <= tolower(kbinput) && tolower(kbinput) <= 'f')
-		uni += (tolower(kbinput) + 10 - 'a') * 256;
+	    /* Two digits: add the digit we got to the 0x100's position
+	     * of the Unicode sequence holder. */
+	    if (('0' <= kbinput && kbinput <= '7') || (uni != 0xD000 &&
+		'8' <= kbinput && kbinput <= '9'))
+		uni += (kbinput - '0') * 0x100;
+	    else if (uni != 0xd000 && 'a' <= tolower(kbinput) &&
+		tolower(kbinput) <= 'f')
+		uni += (tolower(kbinput) + 10 - 'a') * 0x100;
 	    else
 		/* If the character we got isn't a hexadecimal digit, or
 		 * if it is and it would put the Unicode sequence out of
@ -1284,12 +1286,12 @@ int get_unicode_kbinput(int kbinput
 		retval = kbinput;
 	    break;
 	case 3:
-	    /* Three digits: add the digit we got to the 16's position
+	    /* Three digits: add the digit we got to the 0x10's position
 	     * of the Unicode sequence holder. */
 	    if ('0' <= kbinput && kbinput <= '9')
-		uni += (kbinput - '0') * 16;
+		uni += (kbinput - '0') * 0x10;
 	    else if ('a' <= tolower(kbinput) && tolower(kbinput) <= 'f')
-		uni += (tolower(kbinput) + 10 - 'a') * 16;
+		uni += (tolower(kbinput) + 10 - 'a') * 0x10;
 	    else
 		/* If the character we got isn't a hexadecimal digit, or
 		 * if it is and it would put the Unicode sequence out of
@ -1303,8 +1305,9 @@ int get_unicode_kbinput(int kbinput
 	    if ('0' <= kbinput && kbinput <= '9') {
 		uni += (kbinput - '0');
 		retval = uni;
-	    } else if ('a' <= tolower(kbinput) &&
-		tolower(kbinput) <= 'd') {
+	    } else if (('a' <= tolower(kbinput) &&
+		tolower(kbinput) <= 'd') || (uni != 0xFFF0 && 'e' <=
+		tolower(kbinput) && tolower(kbinput) <= 'f')) {
 		uni += (tolower(kbinput) + 10 - 'a');
 		retval = uni;
 	    } else