massive updates to multibyte/wide character support; deal with multibyte

characters and strings instead of wide characters and strings as much as
possible, and move multibyte/wide character-specific functions into
their own source file, chars.c


git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2248 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
master
David Lawrence Ramsey 2005-01-12 03:25:57 +00:00
parent 775d46daf9
commit b54155c4a4
12 changed files with 945 additions and 857 deletions

View File

@ -87,6 +87,23 @@ CVS code -
do_statusbar_input() and do_statusbar_output(); new functions
keys_to_buffer(), unparse_kbinput(), and
do_statusbar_verbatim_input(). (DLR)
- Yet more steps toward full wide character/multibyte character
support. Overhaul the functions that already have support for
them to work with multibyte strings as much as possible, add
support to a few more functions as well, and move multibyte
character-specific functions to their own source file. New
file chars.c; new functions is_blank_mbchar(),
is_blank_wchar(), is_cntrl_mbchar(), is_cntrl_wchar(),
control_mbrep(), control_wrep(), mbwidth(), mb_cur_max(), and
make_mbchar(); changes to is_blank_char() (moved to chars.c),
is_cntrl_char() (moved to chars.c), parse_char() (renamed
parse_mbchar() and moved to chars.c), do_verbatim_input(),
do_delete(), do_tab(), do_input(), do_output(), get_buffer(),
unget_input(), unget_kbinput(), get_input(), parse_kbinput(),
unparse_kbinput(), parse_verbatim_kbinput(),
do_statusbar_input(), do_statusbar_verbatim_kbinput(),
do_statusbar_output(), and display_string(); removal of
buffer_to_keys() and keys_to_buffer(). (DLR)
- cut.c:
do_cut_text()
- If keep_cutbuffer is FALSE, only blow away the text in the
@ -209,6 +226,7 @@ CVS code -
obsolete and it defines a struct termio that we don't use
anywhere. (DLR)
- Typo fixes. (DLR)
- Add checks for iswblank(), mblen(), and wctype.h. (DLR)
- doc/faq.html:
- Remove now-inaccurate note about verbatim input's not working
at prompts, and update its description to mention that it
@ -219,6 +237,8 @@ CVS code -
display. Since ASCII is technically only seven bits wide,
characters 128-255 aren't ASCII. (DLR, suggested by Michael
Piefel)
- src/Makefile.am:
- Add chars.c to nano_SOURCES. (DLR)
GNU nano 1.3.5 - 2004.11.22
- General:

View File

@ -40,7 +40,7 @@ AM_GNU_GETTEXT([external], [need-ngettext])
dnl Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS(fcntl.h getopt.h libintl.h limits.h regex.h termios.h wchar.h)
AC_CHECK_HEADERS(fcntl.h getopt.h libintl.h limits.h regex.h termios.h wchar.h wctype.h)
AC_CHECK_HEADER(regex.h,
AC_MSG_CHECKING([for broken regexec])
AC_TRY_RUN([
@ -291,7 +291,7 @@ AC_MSG_WARN([*** Can not use slang when cross-compiling])),
esac], [AC_MSG_RESULT(no)])
dnl Checks for functions
AC_CHECK_FUNCS(snprintf vsnprintf isblank strcasecmp strncasecmp strcasestr strnlen getline getdelim mbtowc wctomb wcwidth)
AC_CHECK_FUNCS(snprintf vsnprintf isblank iswblank strcasecmp strncasecmp strcasestr strnlen getline getdelim mblen mbtowc wctomb wcwidth)
if test "x$ac_cv_func_snprintf" = "xno" -o "x$ac_cv_func_vsnprintf" = "xno"
then
AM_PATH_GLIB_2_0(2.0.0,,
@ -357,9 +357,9 @@ then
LDFLAGS="$LDFLAGS $GLIB_LIBS"
fi
if test "x$CURSES_LIB_WIDE" = "xyes" -a "x$ac_cv_func_mbtowc" = "xyes" -a "x$ac_cv_func_wctomb" = "xyes" -a "x$ac_cv_func_wcwidth" = "xyes"
if test "x$CURSES_LIB_WIDE" = "xyes" -a "x$ac_cv_func_mblen" = "xyes" -a "x$ac_cv_func_mbtowc" = "xyes" -a "x$ac_cv_func_wctomb" = "xyes" -a "x$ac_cv_func_wcwidth" = "xyes"
then
AC_DEFINE(NANO_WIDE, 1, [Define this if your system has wide character support (a wide curses library, mbtowc(), wctomb(), and wcwidth()).])
AC_DEFINE(NANO_WIDE, 1, [Define this if your system has wide character support (a wide curses library, mblen(), mbtowc(), wctomb(), and wcwidth()).])
else
AC_MSG_WARN([Insufficient wide character support found. nano will not be able to support UTF-8.])
fi

View File

@ -4,7 +4,8 @@ INCLUDES = -Iintl -DLOCALEDIR=\"$(localedir)\" -DSYSCONFDIR=\"$(sysconfdir)\"
ACLOCAL_AMFLAGS = -I m4
bin_PROGRAMS = nano
nano_SOURCES = color.c \
nano_SOURCES = chars.c \
color.c \
cut.c \
files.c \
global.c \

352
src/chars.c Normal file
View File

@ -0,0 +1,352 @@
/* $Id$ */
/**************************************************************************
* chars.c *
* *
* Copyright (C) 2005 Chris Allegretta *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2, or (at your option) *
* any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software *
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *
* *
**************************************************************************/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdlib.h>
#include <ctype.h>
#include <assert.h>
#include "proto.h"
#include "nano.h"
#if defined(HAVE_WCHAR_H) && defined(NANO_WIDE)
#include <wchar.h>
#endif
#if defined(HAVE_WCTYPE_H) && defined(NANO_WIDE)
#include <wctype.h>
#endif
/* This function is equivalent to isblank(). */
bool is_blank_char(unsigned char c)
{
return
#ifdef HAVE_ISBLANK
isblank(c)
#else
isspace(c) && (c == '\t' || !is_cntrl_char(c))
#endif
;
}
/* This function is equivalent to isblank() for multibyte characters. */
bool is_blank_mbchar(const char *c)
{
assert(c != NULL);
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) {
wchar_t wc;
int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
if (c_mb_len <= 0) {
mbtowc(NULL, NULL, 0);
wc = (unsigned char)*c;
}
return is_blank_wchar(wc);
} else
#endif
return is_blank_char((unsigned char)*c);
}
#ifdef NANO_WIDE
/* This function is equivalent to isblank() for wide characters. */
bool is_blank_wchar(wchar_t wc)
{
return
#ifdef HAVE_ISWBLANK
iswblank(wc)
#else
iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc))
#endif
;
}
#endif
/* This function is equivalent to iscntrl(), except in that it also
* handles control characters with their high bits set. */
bool is_cntrl_char(unsigned char c)
{
return (c < 32) || (127 <= c && c < 160);
}
/* This function is equivalent to iscntrl() for multibyte characters,
* except in that it also handles multibyte control characters with
* their high bits set. */
bool is_cntrl_mbchar(const char *c)
{
assert(c != NULL);
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) {
wchar_t wc;
int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
if (c_mb_len <= 0) {
mbtowc(NULL, NULL, 0);
wc = (unsigned char)*c;
}
return is_cntrl_wchar(wc);
} else
#endif
return is_cntrl_char((unsigned char)*c);
}
#ifdef NANO_WIDE
/* This function is equivalent to iscntrl() for wide characters, except
* in that it also handles wide control characters with their high bits
* set. */
bool is_cntrl_wchar(wchar_t wc)
{
return (0 <= wc && wc < 32) || (127 <= wc && wc < 160);
}
#endif
/* c is a control character. It displays as ^@, ^?, or ^[ch] where ch
* is c + 64. We return that character. */
unsigned char control_rep(unsigned char c)
{
/* Treat newlines embedded in a line as encoded nulls. */
if (c == '\n')
return '@';
else if (c == NANO_CONTROL_8)
return '?';
else
return c + 64;
}
/* c is a multibyte control character. It displays as ^@, ^?, or ^[ch]
* where ch is c + 64. We return that multibyte character. */
char *control_mbrep(const char *c, char *crep, int *crep_len)
{
assert(c != NULL);
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) {
wchar_t wc, wcrep;
int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX), crep_mb_len;
if (c_mb_len <= 0) {
mbtowc(NULL, NULL, 0);
wc = *c;
}
wcrep = control_wrep(wc);
crep_mb_len = wctomb(crep, wcrep);
if (crep_mb_len <= 0) {
wctomb(NULL, 0);
crep_mb_len = 0;
}
*crep_len = crep_mb_len;
return crep;
} else {
#endif
*crep_len = 1;
crep[0] = control_rep((unsigned char)*c);
return crep;
#ifdef NANO_WIDE
}
#endif
}
#ifdef NANO_WIDE
/* c is a wide control character. It displays as ^@, ^?, or ^[ch] where
* ch is c + 64. We return that wide character. */
wchar_t control_wrep(wchar_t wc)
{
/* Treat newlines embedded in a line as encoded nulls. */
if (wc == '\n')
return '@';
else if (wc == NANO_CONTROL_8)
return '?';
else
return wc + 64;
}
#endif
/* This function is equivalent to wcwidth() for multibyte characters. */
int mbwidth(const char *c)
{
assert(c != NULL);
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) {
wchar_t wc;
int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX), width;
if (c_mb_len <= 0) {
mbtowc(NULL, NULL, 0);
wc = (unsigned char)*c;
}
width = wcwidth(wc);
if (width == -1)
width++;
return width;
} else
#endif
return 1;
}
/* Return the maximum width in bytes of a multibyte character. */
int mb_cur_max(void)
{
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8))
return MB_CUR_MAX;
else
#endif
return 1;
}
/* Convert the value in chr to a multibyte character with the same
* wide character value as chr. Return the multibyte character and its
* length. */
char *make_mbchar(unsigned int chr, char *chr_mb, int *chr_mb_len)
{
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) {
*chr_mb_len = wctomb(chr_mb, chr);
if (*chr_mb_len <= 0) {
mbtowc(NULL, NULL, 0);
*chr_mb_len = 1;
chr_mb[0] = (unsigned char)chr;
}
} else {
#endif
*chr_mb_len = 1;
chr_mb[0] = (unsigned char)chr;
#ifdef NANO_WIDE
}
#endif
return chr_mb;
}
/* Parse a multibyte character from buf. Return the number of bytes
* used. If chr isn't NULL, store the multibyte character in it. If
* bad_chr isn't NULL, set it to TRUE if we have a null byte or a bad
* multibyte character. If col isn't NULL, store the new display width
* in it. If *str is '\t', we expect col to have the current display
* width. */
int parse_mbchar(const char *buf, char *chr
#ifdef NANO_WIDE
, bool *bad_chr
#endif
, size_t *col)
{
int buf_mb_len;
assert(buf != NULL);
#ifdef NANO_WIDE
if (bad_chr != NULL)
*bad_chr = FALSE;
if (!ISSET(NO_UTF8)) {
/* Get the number of bytes in the multibyte character. */
buf_mb_len = mblen(buf, MB_CUR_MAX);
/* If buf contains a null byte or an invalid multibyte
* character, interpret buf's first byte and set bad_chr to
* TRUE. */
if (buf_mb_len <= 0) {
mblen(NULL, 0);
buf_mb_len = 1;
if (bad_chr != NULL)
*bad_chr = TRUE;
}
/* Save the multibyte character in chr. */
if (chr != NULL) {
int i;
for (i = 0; i < buf_mb_len; i++)
chr[i] = buf[i];
}
/* Save the column width of the wide character in col. */
if (col != NULL) {
/* If we have a tab, get its width in columns using the
* current value of col. */
if (*buf == '\t')
*col += tabsize - *col % tabsize;
/* If we have a control character, get its width using one
* column for the "^" that will be displayed in front of it,
* and the width in columns of its visible equivalent as
* returned by control_rep(). */
else if (is_cntrl_mbchar(buf)) {
char *ctrl_buf_mb = charalloc(mb_cur_max());
int ctrl_buf_mb_len;
(*col)++;
ctrl_buf_mb = control_mbrep(buf, ctrl_buf_mb,
&ctrl_buf_mb_len);
*col += mbwidth(ctrl_buf_mb);
free(ctrl_buf_mb);
/* If we have a normal character, get its width in columns
* normally. */
} else
*col += mbwidth(buf);
}
} else {
#endif
/* Get the number of bytes in the byte character. */
buf_mb_len = 1;
/* Save the byte character in chr. */
if (chr != NULL)
*chr = *buf;
if (col != NULL) {
/* If we have a tab, get its width in columns using the
* current value of col. */
if (*buf == '\t')
*col += tabsize - *col % tabsize;
/* If we have a control character, it's two columns wide:
* one column for the "^" that will be displayed in front of
* it, and one column for its visible equivalent as returned
* by control_rep(). */
else if (is_cntrl_char((unsigned char)*buf))
*col += 2;
/* If we have a normal character, it's one column wide. */
else
(*col)++;
}
#ifdef NANO_WIDE
}
#endif
return buf_mb_len;
}

View File

@ -2175,7 +2175,7 @@ char *input_tab(char *buf, int place, bool *lastwastab, int *newplace,
tmp = matchbuf;
/* skip any leading white space */
while (*tmp && isblank(*tmp))
while (*tmp && is_blank_char(*tmp))
++tmp;
/* Free up any memory already allocated */

View File

@ -2,7 +2,7 @@
/**************************************************************************
* move.c *
* *
* Copyright (C) 1999-2004 Chris Allegretta *
* Copyright (C) 1999-2005 Chris Allegretta *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2, or (at your option) *
@ -58,7 +58,7 @@ void do_home(void)
if (ISSET(SMART_HOME)) {
size_t current_x_save = current_x;
for (current_x = 0; isblank(current->data[current_x]) &&
for (current_x = 0; is_blank_char(current->data[current_x]) &&
current->data[current_x] != '\0'; current_x++)
;

View File

@ -1144,8 +1144,9 @@ bool open_pipe(const char *command)
void do_verbatim_input(void)
{
int *kbinput; /* Used to hold verbatim input. */
size_t kbinput_len; /* Length of verbatim input. */
int *kbinput;
size_t kbinput_len, i;
char *output;
statusbar(_("Verbatim input"));
@ -1153,9 +1154,15 @@ void do_verbatim_input(void)
kbinput = get_verbatim_kbinput(edit, &kbinput_len);
/* Display all the verbatim characters at once. */
do_output(kbinput, kbinput_len);
output = charalloc(kbinput_len + 1);
free(kbinput);
for (i = 0; i < kbinput_len; i++)
output[i] = (char)kbinput[i];
output[i] = '\0';
do_output(output, kbinput_len);
free(output);
}
void do_backspace(void)
@ -1178,7 +1185,7 @@ void do_delete(void)
placewewant = xplustabs();
if (current->data[current_x] != '\0') {
int char_len = parse_char(current->data + current_x, NULL
int char_buf_len = parse_mbchar(current->data + current_x, NULL
#ifdef NANO_WIDE
, NULL
#endif
@ -1189,15 +1196,15 @@ void do_delete(void)
/* Let's get dangerous. */
charmove(&current->data[current_x],
&current->data[current_x + char_len],
line_len - char_len + 1);
&current->data[current_x + char_buf_len],
line_len - char_buf_len + 1);
null_at(&current->data, current_x + line_len - char_len);
null_at(&current->data, current_x + line_len - char_buf_len);
#ifndef NANO_SMALL
if (current_x < mark_beginx && mark_beginbuf == current)
mark_beginx -= char_len;
mark_beginx -= char_buf_len;
#endif
totsize -= char_len;
totsize -= char_buf_len;
} else if (current != filebot && (current->next != filebot ||
current->data[0] == '\0')) {
/* We can delete the line before filebot only if it is blank: it
@ -1251,8 +1258,9 @@ void do_delete(void)
void do_tab(void)
{
int kbinput = '\t';
do_output(&kbinput, 1);
char *kbinput = "\t";
do_output(kbinput, 1);
}
/* Someone hits return *gasp!* */
@ -1455,7 +1463,7 @@ bool do_wrap(filestruct *inptr)
wrap_line = inptr->data + i;
for (; i < len; i++, wrap_line++) {
/* Record where the last word ended. */
if (!isblank(*wrap_line))
if (!is_blank_char(*wrap_line))
word_back = i;
/* If we have found a legal wrap point and the current word
* extends too far, then we stop. */
@ -1463,7 +1471,7 @@ bool do_wrap(filestruct *inptr)
strnlenpt(inptr->data, word_back + 1) > fill)
break;
/* We record the latest legal wrap point. */
if (word_back != i && !isblank(wrap_line[1]))
if (word_back != i && !is_blank_char(wrap_line[1]))
wrap_loc = i;
}
if (i == len)
@ -1536,7 +1544,7 @@ bool do_wrap(filestruct *inptr)
* between after_break and wrap_line. If the line already ends
* in a tab or a space, we don't add a space and decrement
* totsize to account for that. */
if (!isblank(newline[new_line_len - 1]))
if (!is_blank_char(newline[new_line_len - 1]))
strcat(newline, " ");
else
totsize--;
@ -2172,7 +2180,7 @@ size_t indent_length(const char *line)
size_t len = 0;
assert(line != NULL);
while (isblank(*line)) {
while (is_blank_char(*line)) {
line++;
len++;
}
@ -2200,7 +2208,7 @@ void justify_format(filestruct *line, size_t skip)
assert(line != NULL);
assert(line->data != NULL);
assert(skip < strlen(line->data));
assert(!isblank(line->data[skip]));
assert(!is_blank_char(line->data[skip]));
back = line->data + skip;
for (front = back; ; front++) {
@ -2497,10 +2505,10 @@ bool breakable(const char *line, ssize_t goal)
while (*line != '\0' && goal >= 0) {
size_t pos = 0;
if (isblank(*line))
if (is_blank_char(*line))
return TRUE;
line += parse_char(line, NULL
line += parse_mbchar(line, NULL
#ifdef NANO_WIDE
, NULL
#endif
@ -2538,7 +2546,7 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
assert(*line != '\t');
line_len = parse_char(line, NULL
line_len = parse_mbchar(line, NULL
#ifdef NANO_WIDE
, NULL
#endif
@ -3468,7 +3476,16 @@ int do_input(bool *meta_key, bool *func_key, bool *s_or_t, bool
if (kbinput != NULL) {
/* Display all the characters in the input buffer at
* once. */
do_output(kbinput, kbinput_len);
char *output = charalloc(kbinput_len + 1);
size_t i;
for (i = 0; i < kbinput_len; i++)
output[i] = (char)kbinput[i];
output[i] = '\0';
do_output(output, kbinput_len);
free(output);
/* Empty the input buffer. */
kbinput_len = 0;
@ -3588,55 +3605,45 @@ bool do_mouse(void)
}
#endif /* !DISABLE_MOUSE */
/* The user typed kbinput_len wide characters. Add them to the edit
* buffer as multibyte characters. */
void do_output(int *kbinput, size_t kbinput_len)
/* The user typed kbinput_len multibyte characters. Add them to the
* edit buffer. */
void do_output(char *output, size_t output_len)
{
size_t i, current_len = strlen(current->data);
size_t current_len = strlen(current->data), i = 0;
bool old_constupdate = ISSET(CONSTUPDATE);
bool do_refresh = FALSE;
/* Do we have to call edit_refresh(), or can we get away with
* update_line()? */
char *key =
#ifdef NANO_WIDE
!ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) :
#endif
charalloc(1);
char *char_buf = charalloc(mb_cur_max());
int char_buf_len;
assert(current != NULL && current->data != NULL);
/* Turn off constant cursor position display. */
UNSET(CONSTUPDATE);
for (i = 0; i < kbinput_len; i++) {
int key_len;
while (i < output_len) {
/* Null to newline, if needed. */
if (kbinput[i] == '\0')
kbinput[i] = '\n';
if (output[i] == '\0')
output[i] = '\n';
/* Newline to Enter, if needed. */
else if (kbinput[i] == '\n') {
else if (output[i] == '\n') {
do_enter();
i++;
continue;
}
/* Interpret the next multibyte character. If it's an invalid
* multibyte character, interpret it as though it's a byte
* character. */
char_buf_len = parse_mbchar(output + i, char_buf
#ifdef NANO_WIDE
/* Change the wide character to its multibyte value. If it's
* invalid, go on to the next character. */
if (!ISSET(NO_UTF8)) {
key_len = wctomb(key, (wchar_t)kbinput[i]);
, NULL
#endif
, NULL);
if (key_len == -1)
continue;
/* Interpret the character as a single-byte sequence. */
} else {
#endif
key_len = 1;
key[0] = (unsigned char)kbinput[i];
#ifdef NANO_WIDE
}
#endif
i += char_buf_len;
/* When a character is inserted on the current magicline, it
* means we need a new one! */
@ -3644,30 +3651,30 @@ void do_output(int *kbinput, size_t kbinput_len)
new_magicline();
/* More dangerousness fun =) */
current->data = charealloc(current->data,
current_len + (key_len * 2));
current->data = charealloc(current->data, current_len +
(char_buf_len * 2));
assert(current_x <= current_len);
charmove(&current->data[current_x + key_len],
charmove(&current->data[current_x + char_buf_len],
&current->data[current_x],
current_len - current_x + key_len);
charcpy(&current->data[current_x], key, key_len);
current_len += key_len;
totsize += key_len;
current_len - current_x + char_buf_len);
charcpy(&current->data[current_x], char_buf, char_buf_len);
current_len += char_buf_len;
totsize += char_buf_len;
set_modified();
#ifndef NANO_SMALL
/* Note that current_x has not yet been incremented. */
if (current == mark_beginbuf && current_x < mark_beginx)
mark_beginx += key_len;
mark_beginx += char_buf_len;
#endif
do_right(FALSE);
#ifndef DISABLE_WRAPPING
/* If we're wrapping text, we need to call edit_refresh(). */
if (!ISSET(NO_WRAP) && kbinput[i] != '\t') {
if (!ISSET(NO_WRAP) && output[i] != '\t') {
bool do_refresh_save = do_refresh;
do_refresh = do_wrap(current);
@ -3692,7 +3699,7 @@ void do_output(int *kbinput, size_t kbinput_len)
if (old_constupdate)
SET(CONSTUPDATE);
free(key);
free(char_buf);
if (do_refresh)
edit_refresh();

View File

@ -100,12 +100,8 @@
# endif
#endif
/* If no isblank(), strcasecmp(), strncasecmp(), strcasestr(),
* strnlen(), getdelim(), or getline(), use the versions we have. */
#ifndef HAVE_ISBLANK
#define isblank is_blank_char
#endif
/* If no strcasecmp(), strncasecmp(), strcasestr(), strnlen(),
* getdelim(), or getline(), use the versions we have. */
#ifndef HAVE_STRCASECMP
#define strcasecmp nstricmp
#endif
@ -161,11 +157,6 @@ typedef enum {
} topmidnone;
/* Structure types. */
typedef struct buffer {
int key;
bool key_code;
} buffer;
typedef struct filestruct {
char *data;
struct filestruct *next; /* Next node. */

View File

@ -150,6 +150,31 @@ extern char *homedir;
/* Functions we want available. */
/* Public functions in chars.c. */
bool is_blank_char(unsigned char c);
bool is_blank_mbchar(const char *c);
#ifdef NANO_WIDE
bool is_blank_wchar(wchar_t wc);
#endif
bool is_cntrl_char(unsigned char c);
bool is_cntrl_mbchar(const char *c);
#ifdef NANO_WIDE
bool is_cntrl_wchar(wchar_t wc);
#endif
unsigned char control_rep(unsigned char c);
char *control_mbrep(const char *c, char *crep, int *crep_len);
#ifdef NANO_WIDE
wchar_t control_wrep(wchar_t c);
#endif
int mbwidth(const char *c);
int mb_cur_max(void);
char *make_mbchar(unsigned int chr, char *chr_mb, int *chr_mb_len);
int parse_mbchar(const char *buf, char *chr
#ifdef NANO_WIDE
, bool *bad_chr
#endif
, size_t *col);
/* Public functions in color.c. */
#ifdef ENABLE_COLOR
void set_colorpairs(void);
@ -396,7 +421,7 @@ int do_input(bool *meta_key, bool *func_key, bool *s_or_t, bool
#ifndef DISABLE_MOUSE
bool do_mouse(void);
#endif
void do_output(int *kbinput, size_t kbinput_len);
void do_output(char *output, size_t output_len);
/* Public functions in rcfile.c. */
#ifdef ENABLE_NANORC
@ -470,19 +495,9 @@ int regexec_safe(const regex_t *preg, const char *string, size_t nmatch,
#endif
int regexp_bol_or_eol(const regex_t *preg, const char *string);
#endif
#ifndef HAVE_ISBLANK
int is_blank_char(int c);
#endif
int is_cntrl_char(int c);
bool is_byte_char(int c);
int num_of_digits(int n);
unsigned char control_rep(unsigned char c);
bool is_byte(int c);
bool parse_num(const char *str, ssize_t *val);
int parse_char(const char *buf, int *chr
#ifdef NANO_WIDE
, bool *bad_chr
#endif
, size_t *col);
size_t move_left(const char *buf, size_t pos);
size_t move_right(const char *buf, size_t pos);
void align(char **strp);
@ -541,18 +556,16 @@ void reset_kbinput(void);
#endif
void get_buffer(WINDOW *win);
size_t get_buffer_len(void);
int *buffer_to_keys(buffer *input, size_t input_len);
buffer *keys_to_buffer(int *input, size_t input_len);
void unget_input(buffer *input, size_t input_len);
void unget_input(int *input, size_t input_len);
void unget_kbinput(int kbinput, bool meta_key, bool func_key);
buffer *get_input(WINDOW *win, size_t input_len);
int *get_input(WINDOW *win, size_t input_len);
int get_kbinput(WINDOW *win, bool *meta_key, bool *func_key);
int parse_kbinput(WINDOW *win, bool *meta_key, bool *func_key
#ifndef NANO_SMALL
, bool reset
#endif
);
int get_escape_seq_kbinput(const int *sequence, size_t seq_len, bool
int get_escape_seq_kbinput(const int *seq, size_t seq_len, bool
*ignore_seq);
int get_escape_seq_abcd(int kbinput);
int get_byte_kbinput(int kbinput
@ -566,7 +579,7 @@ int get_word_kbinput(int kbinput
#endif
);
int get_control_kbinput(int kbinput);
void unparse_kbinput(size_t pos, int *kbinput, size_t kbinput_len);
void unparse_kbinput(char *output, size_t output_len);
int *get_verbatim_kbinput(WINDOW *win, size_t *kbinput_len);
int *parse_verbatim_kbinput(WINDOW *win, size_t *kbinput_len);
#ifndef DISABLE_MOUSE
@ -590,7 +603,7 @@ void do_statusbar_backspace(void);
void do_statusbar_delete(void);
void do_statusbar_cut_text(void);
void do_statusbar_verbatim_input(bool *got_enter);
void do_statusbar_output(int *kbinput, size_t kbinput_len, bool
void do_statusbar_output(char *output, size_t output_len, bool
*got_enter);
size_t xplustabs(void);
size_t actual_x(const char *str, size_t xplus);

View File

@ -126,7 +126,7 @@ void rcfile_error(const char *msg, ...)
/* Parse the next word from the string. Returns NULL if we hit EOL. */
char *parse_next_word(char *ptr)
{
while (!isblank(*ptr) && *ptr != '\n' && *ptr != '\0')
while (!is_blank_char(*ptr) && *ptr != '\n' && *ptr != '\0')
ptr++;
if (*ptr == '\0')
@ -135,7 +135,7 @@ char *parse_next_word(char *ptr)
/* Null terminate and advance ptr */
*ptr++ = 0;
while (isblank(*ptr))
while (is_blank_char(*ptr))
ptr++;
return ptr;
@ -175,7 +175,7 @@ char *parse_argument(char *ptr)
ptr = last_quote + 1;
}
if (ptr != NULL)
while (isblank(*ptr))
while (is_blank_char(*ptr))
ptr++;
return ptr;
}
@ -233,7 +233,7 @@ char *parse_next_regex(char *ptr)
/* Null terminate and advance ptr. */
*ptr++ = '\0';
while (isblank(*ptr))
while (is_blank_char(*ptr))
ptr++;
return ptr;
@ -477,7 +477,7 @@ void parse_rcfile(FILE *rcstream)
while (fgets(buf, 1023, rcstream) != 0) {
lineno++;
ptr = buf;
while (isblank(*ptr))
while (is_blank_char(*ptr))
ptr++;
if (*ptr == '\n' || *ptr == '\0')

View File

@ -33,10 +33,6 @@
#include "proto.h"
#include "nano.h"
#if defined(HAVE_WCHAR_H) && defined(NANO_WIDE)
#include <wchar.h>
#endif
#ifdef HAVE_REGEX_H
#ifdef BROKEN_REGEXEC
int regexec_safe(const regex_t *preg, const char *string, size_t nmatch,
@ -56,29 +52,6 @@ int regexp_bol_or_eol(const regex_t *preg, const char *string)
}
#endif /* HAVE_REGEX_H */
#ifndef HAVE_ISBLANK
/* This function is equivalent to isblank(). */
int is_blank_char(int c)
{
return isspace(c) && (!is_cntrl_char(c) || c == '\t');
}
#endif
/* This function is equivalent to iscntrl(), except in that it also
* handles control characters with their high bits set. */
int is_cntrl_char(int c)
{
return (-128 <= c && c < -96) || (0 <= c && c < 32) ||
(127 <= c && c < 160);
}
/* Return TRUE if the character c is in byte range, and FALSE
* otherwise. */
bool is_byte_char(int c)
{
return (unsigned int)c == (unsigned char)c;
}
int num_of_digits(int n)
{
int i = 1;
@ -94,17 +67,9 @@ int num_of_digits(int n)
return i;
}
/* c is a control character. It displays as ^@, ^?, or ^[ch] where ch
* is c + 64. We return that character. */
unsigned char control_rep(unsigned char c)
bool is_byte(int c)
{
/* Treat newlines embedded in a line as encoded nulls. */
if (c == '\n')
return '@';
else if (c == NANO_CONTROL_8)
return '?';
else
return c + 64;
return ((unsigned int)c == (unsigned char)c);
}
/* Read a ssize_t from str, and store it in *val (if val is not NULL).
@ -128,116 +93,6 @@ bool parse_num(const char *str, ssize_t *val)
return TRUE;
}
/* Parse a multibyte character from buf. Return the number of bytes
* used. If chr isn't NULL, store the wide character in it. If
* bad_chr isn't NULL, set it to TRUE if we have a null byte or a bad
* multibyte character. If col isn't NULL, store the new display width
* in it. If *str is '\t', we expect col to have the current display
* width. */
int parse_char(const char *buf, int *chr
#ifdef NANO_WIDE
, bool *bad_chr
#endif
, size_t *col)
{
int wide_buf, mb_buf_len;
assert(buf != NULL);
#ifdef NANO_WIDE
if (bad_chr != NULL)
*bad_chr = FALSE;
if (!ISSET(NO_UTF8)) {
wchar_t tmp;
/* Get the wide character equivalent of the multibyte
* character. */
mb_buf_len = mbtowc(&tmp, buf, MB_CUR_MAX);
wide_buf = (int)tmp;
/* If buf contains a null byte or an invalid multibyte
* character, interpret buf's first byte as a single-byte
* sequence and set bad_chr to TRUE. */
if (mb_buf_len <= 0) {
mb_buf_len = 1;
wide_buf = (unsigned char)*buf;
if (bad_chr != NULL)
*bad_chr = TRUE;
}
/* Save the wide character in chr. */
if (chr != NULL)
*chr = wide_buf;
/* Save the column width of the wide character in col. */
if (col != NULL) {
/* If we have a tab, get its width in columns using the
* current value of col. */
if (wide_buf == '\t')
*col += tabsize - *col % tabsize;
/* If we have a control character, get its width using one
* column for the "^" that will be displayed in front of it,
* and the width in columns of its visible equivalent as
* returned by control_rep(). */
else if (is_cntrl_char(wide_buf)) {
char *ctrl_mb_buf = charalloc(MB_CUR_MAX);
(*col)++;
wide_buf = control_rep((unsigned char)wide_buf);
if (wctomb(ctrl_mb_buf, (wchar_t)wide_buf) != -1) {
int width = wcwidth((wchar_t)wide_buf);
if (width != -1)
*col += width;
}
else
(*col)++;
free(ctrl_mb_buf);
/* If we have a normal character, get its width in columns
* normally. */
} else {
int width = wcwidth((wchar_t)wide_buf);
if (width != -1)
*col += width;
}
}
} else {
#endif
/* Interpret buf's first character as a single-byte sequence. */
mb_buf_len = 1;
wide_buf = (unsigned char)*buf;
/* Save the single-byte sequence in chr as though it's a wide
* character. */
if (chr != NULL)
*chr = wide_buf;
if (col != NULL) {
/* If we have a tab, get its width in columns using the
* current value of col. */
if (wide_buf == '\t')
*col += tabsize - *col % tabsize;
/* If we have a control character, it's two columns wide:
* one column for the "^" that will be displayed in front of
* it, and one column for its visible equivalent as returned
* by control_rep(). */
else if (is_cntrl_char(wide_buf))
*col += 2;
/* If we have a normal character, it's one column wide. */
else
(*col)++;
}
#ifdef NANO_WIDE
}
#endif
return mb_buf_len;
}
/* Return the index in buf of the beginning of the character before the
* one at pos. */
size_t move_left(const char *buf, size_t pos)
@ -249,16 +104,16 @@ size_t move_left(const char *buf, size_t pos)
/* There is no library function to move backward one multibyte
* character. Here is the naive, O(pos) way to do it. */
while (TRUE) {
int mb_buf_len = parse_char(buf + pos - pos_prev, NULL
int buf_mb_len = parse_mbchar(buf + pos - pos_prev, NULL
#ifdef NANO_WIDE
, NULL
#endif
, NULL);
if (pos_prev <= mb_buf_len)
if (pos_prev <= buf_mb_len)
break;
pos_prev -= mb_buf_len;
pos_prev -= buf_mb_len;
}
return pos - pos_prev;
@ -268,7 +123,7 @@ size_t move_left(const char *buf, size_t pos)
* one at pos. */
size_t move_right(const char *buf, size_t pos)
{
return pos + parse_char(buf + pos, NULL
return pos + parse_mbchar(buf + pos, NULL
#ifdef NANO_WIDE
, NULL
#endif

File diff suppressed because it is too large Load Diff