improvements to wide/multibyte character input and output, using wide

curses functions where applicable git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2182 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
2004-12-12 19:04:56 +00:00 · 2004-12-12 19:04:56 +00:00 · 78ea5e4a8a
parent 164bf36ea8
commit 78ea5e4a8a
5 changed files with 97 additions and 100 deletions
--- a/22
+++ b/22
@ -35,20 +35,21 @@ CVS code -
 	  encodings such as KOI8-R can type properly again. (DLR, found
 	  by Arthur Ivanov)
 	- Massively overhaul the input and output routines to support
-	  buffered input and output, the first steps toward wide
+	  buffered input and output, the first steps toward
-	  character input and output, and double-Escape/verbatim input
+	  wide/multibyte character input and output, and
-	  of double-byte Unicode characters instead of single-byte ASCII
+	  double-Escape/verbatim input of double-byte Unicode characters
-	  characters.  New functions do_input(), do_mouse(),
+	  instead of single-byte ASCII characters.  New functions
-	  do_output(), is_byte_char(), get_buffer(), get_buffer_len(),
+	  is_byte_char(), get_buffer(), get_buffer_len(),
 	  buffer_to_keys(), unget_input(), get_input(), parse_kbinput(),
 	  and parse_verbatim_kbinput(); new macro charcpy(); changes to
 	  do_char() (renamed to do_output()), get_edit_input() (renamed
 	  to do_input() and moved to nano.c), get_edit_mouse() (renamed
 	  do_mouse() and moved to nano.c), do_verbatim_input(),
 	  do_tab(), main(), and get_ascii_kbinput() (renamed to
-	  get_word_kbinput()). (DLR; buffered input/output based on
+	  get_word_kbinput()).  The wide version of ncurses is required
-	  ideas from mutt 1.4.2.1; double-Escape input of Unicode
+	  in order for output to work properly. (DLR; buffered
-	  characters suggested by Michael Piefel)
+	  input/output based on ideas from mutt 1.4.2.1; double-Escape
 	  input of Unicode characters suggested by Michael Piefel)
 - cut.c:
  do_cut_text()
 	- If keep_cutbuffer is FALSE, only blow away the text in the
@ -132,6 +133,11 @@ CVS code -
 	- Add a debug message. (DLR)
 - configure.ac:
 	- Remove specific references to control key shortcuts. (DLR)
 	- Check for the wide version of ncurses, without which multibyte
 	  strings don't seem to be displayed properly. (DLR; check for
 	  addwstr() in curses to determine whether it has wide character
 	  support inspired by mutt 1.4.2.1i's checking for waddnwstr()
 	  for the same reason)
 - doc/nanorc.sample:
 	- Add return to the "c-file" regexes. (DLR)
--- a/configure.ac
+++ b/configure.ac
@ -312,28 +312,32 @@ AC_CHECK_FUNCS(getopt_long)
 dnl Checks for libraries.
 if eval "test x$CURSES_LIB_NAME = x"
 then
-    AC_CHECK_HEADERS(curses.h ncurses.h)
+    AC_CHECK_HEADERS(ncurses.h)
-    AC_CHECK_LIB(ncurses, initscr, [CURSES_LIB="-lncurses" CURSES_LIB_NAME=ncurses])
+    AC_CHECK_LIB(ncursesw, addwstr, [CURSES_LIB="-lncursesw" CURSES_LIB_NAME=ncursesw CURSES_LIB_WIDE="yes"])
    if eval "test x$CURSES_LIB_NAME = x"
    then
 	 AC_CHECK_LIB(ncurses, initscr, [CURSES_LIB="-lncurses" CURSES_LIB_NAME=ncurses])
    fi
 fi
 if eval "test x$CURSES_LIB_NAME = x"
 then
-    AC_CHECK_LIB(curses, initscr, [CURSES_LIB="-lcurses" CURSES_LIB_NAME=curses])
+    AC_CHECK_HEADERS(curses.h)
-fi
+    AC_CHECK_LIB(curses, addwstr, [CURSES_LIB="-lcurses" CURSES_LIB_NAME=curses CURSES_LIB_WIDE="yes"])
-
+    if eval "test x$CURSES_LIB_NAME = x"
-if eval "test x$CURSES_LIB_NAME = x"
+    then
-then
+	 AC_CHECK_LIB(curses, initscr, [CURSES_LIB="-lcurses" CURSES_LIB_NAME=curses])
-    AC_CHECK_LIB(termcap, tgetent, [CURSES_LIB="-ltermcap" CURSES_LIB_NAME=termcap])
+    fi
 fi
 if eval "test x$CURSES_LIB_NAME = x"
 then
    AC_MSG_WARN([
-*** No termcap lib available, consider getting the official ncurses
+*** No curses lib available.  Consider getting the official ncurses
 *** distribution from ftp://ftp.gnu.org/pub/gnu/ncurses if you get
 *** errors compiling nano.])
 else
-    AC_MSG_RESULT([Using $CURSES_LIB_NAME as the termcap library])
+    AC_MSG_RESULT([Using $CURSES_LIB_NAME as the curses library])
 fi
 AC_CHECK_LIB([$CURSES_LIB_NAME], use_default_colors, AC_DEFINE(HAVE_USE_DEFAULT_COLORS, 1, [Define this if your curses library has the use_default_colors command.]))
@ -353,6 +357,13 @@ then
 	LDFLAGS="$LDFLAGS $GLIB_LIBS"
 fi
 if test "x$CURSES_LIB_WIDE" == "xyes"
 then
 	AC_DEFINE(NANO_WIDE, 1, [Define this if your system has wide character support.])
 else
 	AC_MSG_WARN([No wide character support found.  nano will not be able to support UTF-8.])
 fi
 AC_CONFIG_FILES([
 Makefile
 doc/Makefile
--- a/src/nano.c
+++ b/src/nano.c
@ -3567,26 +3567,22 @@ void do_output(int *kbinput, size_t kbinput_len)
 	/* Do we have to call edit_refresh(), or can we get away with
 	 * update_line()? */
    char key[
 #ifdef NANO_WIDE
-	MB_LEN_MAX
+    char *key =
 	charalloc(MB_CUR_MAX)
 #else
-	1
+	charalloc(1)
 #endif
-	];		/* The current multibyte character we have. */
+	;
    int key_len;	/* The length of the current multibyte
 			 * character. */
    assert(current != NULL && current->data != NULL);
    /* Turn off constant cursor position display. */
    UNSET(CONSTUPDATE);
 #ifdef NANO_WIDE
    wctomb(NULL, 0);
 #endif
    for (i = 0; i < kbinput_len; i++) {
 	int key_len;
 	/* Null to newline, if needed. */
 	if (kbinput[i] == '\0')
 	    kbinput[i] = '\n';
@ -3628,7 +3624,7 @@ void do_output(int *kbinput, size_t kbinput_len)
 	charcpy(&current->data[current_x], key, key_len);
 	current_len += key_len;
 	/* FIXME: Should totsize be the number of single-byte characters
-	 * or the number of multibyte characters?  Assume for former for
+	 * or the number of multibyte characters?  Assume the former for
 	 * now. */
 	totsize += key_len;
 	set_modified();
@ -3669,15 +3665,13 @@ void do_output(int *kbinput, size_t kbinput_len)
 #endif
    }
 #ifdef NANO_WIDE
    wctomb(NULL, 0);
 #endif
    /* Turn constant cursor position display back on if it was on
     * before. */
    if (old_constupdate)
 	SET(CONSTUPDATE);
    free(key);
    if (do_refresh)
 	edit_refresh();
    else
--- a/src/nano.h
+++ b/src/nano.h
@ -135,10 +135,6 @@
 #define VERMSG "GNU nano " VERSION
 /* FIXME: We should be checking for this instead of unconditionally
 * using it. */
 #define NANO_WIDE 1
 /* If we aren't using ncurses, turn the mouse support off, as it's
 * ncurses-specific. */
 #ifndef NCURSES_MOUSE_VERSION
--- a/src/winio.c
+++ b/src/winio.c
@ -122,7 +122,7 @@ void reset_kbinput(void)
 * default keystroke buffer is empty. */
 void get_buffer(WINDOW *win)
 {
-    int input;
+    int input, input_key_code;
    /* If the keystroke buffer isn't empty, get out. */
    if (key_buffer != NULL)
@ -134,19 +134,36 @@ void get_buffer(WINDOW *win)
 #ifndef NANO_SMALL
    allow_pending_sigwinch(TRUE);
 #endif
-    input = wgetch(win);
+
 #ifdef NANO_WIDE
    if (!ISSET(NO_UTF8)) {
 	wint_t tmp;
 	input_key_code = wget_wch(win, &tmp);
 	input = (int)tmp;
    } else {
 #endif
 	input = wgetch(win);
 	input_key_code = !is_byte_char(input);
 #ifdef NANO_WIDE
    }
 #endif
 #ifndef NANO_SMALL
    allow_pending_sigwinch(FALSE);
 #endif
    /* Increment the length of the keystroke buffer, save the value of
     * the keystroke in key, and set key_code to TRUE if the keystroke
-     * is an extended keypad value and hence shouldn't be treated as a
+     * is an extended keypad value or FALSE if it isn't. */
     * multibyte character. */
    key_buffer_len++;
    key_buffer = (buffer *)nmalloc(sizeof(buffer));
    key_buffer[0].key = input;
-    key_buffer[0].key_code = !is_byte_char(input);
+    key_buffer[0].key_code =
 #ifdef NANO_WIDE
 	!ISSET(NO_UTF8) ? (input_key_code == KEY_CODE_YES) :
 #endif
 	input_key_code;
    /* Read in the remaining characters using non-blocking input. */
    nodelay(win, TRUE);
@ -155,73 +172,49 @@ void get_buffer(WINDOW *win)
 #ifndef NANO_SMALL
 	allow_pending_sigwinch(TRUE);
 #endif
 	input = wgetch(win);
 #ifndef NANO_SMALL
 	allow_pending_sigwinch(FALSE);
 #endif
 #ifdef NANO_WIDE
 	if (!ISSET(NO_UTF8)) {
 	    wint_t tmp;
 	    input_key_code = wget_wch(win, &tmp);
 	    input = (int)tmp;
 	} else {
 #endif
 	    input = wgetch(win);
 	    input_key_code = !is_byte_char(input);
 #ifdef NANO_WIDE
 	}
 #endif
 	/* If there aren't any more characters, stop reading. */
-	if (input == ERR)
+	if (
 #ifdef NANO_WIDE
 		(!ISSET(NO_UTF8) && input_key_code == ERR) ||
 #endif
 		input == ERR)
 	    break;
 	/* Otherwise, increment the length of the keystroke buffer, save
 	 * the value of the keystroke in key, and set key_code to TRUE
-	 * if the keystroke is an extended keypad value and hence
+	 * if the keystroke is an extended keypad value or FALSE if it
-	 * shouldn't be treated as a multibyte character. */
+	 * isn't. */
 	key_buffer_len++;
 	key_buffer = (buffer *)nrealloc(key_buffer, key_buffer_len *
 		sizeof(buffer));
 	key_buffer[key_buffer_len - 1].key = input;
-	key_buffer[key_buffer_len - 1].key_code = !is_byte_char(input);
+	key_buffer[key_buffer_len - 1].key_code =
 #ifdef NANO_WIDE
 		!ISSET(NO_UTF8) ? (input_key_code == KEY_CODE_YES) :
 #endif
 		input_key_code;
 #ifndef NANO_SMALL
 	allow_pending_sigwinch(FALSE);
 #endif
    }
    /* Switch back to non-blocking input. */
    nodelay(win, FALSE);
 #ifdef NANO_WIDE
    if (!ISSET(NO_UTF8)) {
 	size_t i;
 	buffer *clean_key_buffer = NULL;
 	size_t clean_key_buffer_len = 0;
 	mbtowc(NULL, NULL, 0);
 	/* Change all complete and valid multibyte keystrokes to
 	 * their wide character values, discarding the others. */
 	for (i = 0; i < key_buffer_len; i++) {
 	    wchar_t wide_key;
 	    int wide_key_len;
 	    if (key_buffer[i].key_code) {
 		mbtowc(NULL, NULL, 0);
 		wide_key_len = 1;
 		wide_key = key_buffer[i].key;
 	    } else
 		wide_key_len = mbtowc(&wide_key,
 			(const char *)&key_buffer[i].key, 1);
 	    if (wide_key_len != -1) {
 		clean_key_buffer_len++;
 		clean_key_buffer = (buffer *)nrealloc(clean_key_buffer,
 			clean_key_buffer_len * sizeof(buffer));
 		clean_key_buffer[clean_key_buffer_len - 1].key =
 			(int)wide_key;
 		clean_key_buffer[clean_key_buffer_len - 1].key_code =
 			key_buffer[i].key_code;
 	    }
 	}
 	mbtowc(NULL, NULL, 0);
 	/* Replace the default keystroke buffer with the non-(-1)
 	 * keystroke buffer. */
 	key_buffer_len = clean_key_buffer_len;
 	free(key_buffer);
 	key_buffer = clean_key_buffer;
    }
 #endif
 }
 /* Return the length of the default keystroke buffer. */
@ -258,12 +251,10 @@ void unget_input(buffer *input, size_t input_len)
 #ifdef NANO_WIDE
    if (!ISSET(NO_UTF8)) {
 	size_t i;
-
+	char *key = charalloc(MB_CUR_MAX);
 	wctomb(NULL, 0);
 	/* Keep all valid wide keystrokes, discarding the others. */
 	for (i = 0; i < input_len; i++) {
 	    char key[MB_LEN_MAX];
 	    int key_len = input[i].key_code ? 1 :
 		wctomb(key, (wchar_t)input[i].key);
@ -278,8 +269,7 @@ void unget_input(buffer *input, size_t input_len)
 	    }
 	}
-	wctomb(NULL, 0);
+	free(key);
    } else {
 #endif
 	clean_input = input;