miscellaneous cleanups for the multibyte parsing functions
git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2242 35c25a1d-7b9e-4130-9fde-d3aeb78583b8master
parent
12054fe11b
commit
d96851f534
13
src/nano.c
13
src/nano.c
|
@ -1178,12 +1178,11 @@ void do_delete(void)
|
|||
placewewant = xplustabs();
|
||||
|
||||
if (current->data[current_x] != '\0') {
|
||||
int char_len = parse_char(current->data + current_x, NULL,
|
||||
NULL
|
||||
int char_len = parse_char(current->data + current_x, NULL
|
||||
#ifdef NANO_WIDE
|
||||
, NULL
|
||||
#endif
|
||||
);
|
||||
, NULL);
|
||||
size_t line_len = strlen(current->data + current_x);
|
||||
|
||||
assert(current_x < strlen(current->data));
|
||||
|
@ -2501,11 +2500,11 @@ bool breakable(const char *line, ssize_t goal)
|
|||
if (isblank(*line))
|
||||
return TRUE;
|
||||
|
||||
line += parse_char(line, NULL, &pos
|
||||
line += parse_char(line, NULL
|
||||
#ifdef NANO_WIDE
|
||||
, NULL
|
||||
#endif
|
||||
);
|
||||
, &pos);
|
||||
|
||||
goal -= pos;
|
||||
}
|
||||
|
@ -2539,11 +2538,11 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
|
|||
|
||||
assert(*line != '\t');
|
||||
|
||||
line_len = parse_char(line, NULL, &pos
|
||||
line_len = parse_char(line, NULL
|
||||
#ifdef NANO_WIDE
|
||||
, NULL
|
||||
#endif
|
||||
);
|
||||
, &pos);
|
||||
|
||||
goal -= pos;
|
||||
line += line_len;
|
||||
|
|
10
src/proto.h
10
src/proto.h
|
@ -478,13 +478,13 @@ bool is_byte_char(int c);
|
|||
int num_of_digits(int n);
|
||||
unsigned char control_rep(unsigned char c);
|
||||
bool parse_num(const char *str, ssize_t *val);
|
||||
int parse_char(const char *str, int *chr, size_t *col
|
||||
int parse_char(const char *buf, int *chr
|
||||
#ifdef NANO_WIDE
|
||||
, bool *bad_char
|
||||
, bool *bad_chr
|
||||
#endif
|
||||
);
|
||||
size_t move_left(const char *str, size_t pos);
|
||||
size_t move_right(const char *str, size_t pos);
|
||||
, size_t *col);
|
||||
size_t move_left(const char *buf, size_t pos);
|
||||
size_t move_right(const char *buf, size_t pos);
|
||||
void align(char **strp);
|
||||
void null_at(char **data, size_t index);
|
||||
void unsunder(char *str, size_t true_len);
|
||||
|
|
100
src/utils.c
100
src/utils.c
|
@ -128,66 +128,66 @@ bool parse_num(const char *str, ssize_t *val)
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
/* Parse a multi-byte character from str. Return the number of bytes
|
||||
* used. If chr isn't NULL, store the wide character in it. If col
|
||||
* isn't NULL, store the new display width in it. If *str is '\t', we
|
||||
* expect col to have the current display width. If bad_char isn't
|
||||
* NULL, set it to TRUE if we have a null byte or a bad multibyte
|
||||
* character. */
|
||||
int parse_char(const char *str, int *chr, size_t *col
|
||||
/* Parse a multibyte character from buf. Return the number of bytes
|
||||
* used. If chr isn't NULL, store the wide character in it. If
|
||||
* bad_chr isn't NULL, set it to TRUE if we have a null byte or a bad
|
||||
* multibyte character. If col isn't NULL, store the new display width
|
||||
* in it. If *str is '\t', we expect col to have the current display
|
||||
* width. */
|
||||
int parse_char(const char *buf, int *chr
|
||||
#ifdef NANO_WIDE
|
||||
, bool *bad_char
|
||||
, bool *bad_chr
|
||||
#endif
|
||||
)
|
||||
, size_t *col)
|
||||
{
|
||||
int wide_str, wide_str_len;
|
||||
int wide_buf, mb_buf_len;
|
||||
|
||||
assert(str != NULL);
|
||||
assert(buf != NULL);
|
||||
|
||||
#ifdef NANO_WIDE
|
||||
if (bad_char != NULL)
|
||||
*bad_char = FALSE;
|
||||
if (bad_chr != NULL)
|
||||
*bad_chr = FALSE;
|
||||
|
||||
if (!ISSET(NO_UTF8)) {
|
||||
wchar_t tmp;
|
||||
|
||||
/* Get the wide character equivalent of the multibyte
|
||||
* character. */
|
||||
wide_str_len = mbtowc(&tmp, str, MB_CUR_MAX);
|
||||
wide_str = (int)tmp;
|
||||
mb_buf_len = mbtowc(&tmp, buf, MB_CUR_MAX);
|
||||
wide_buf = (int)tmp;
|
||||
|
||||
/* If str contains a null byte or an invalid multibyte
|
||||
* character, interpret str's first byte as a single-byte
|
||||
* sequence and set bad_char to TRUE. */
|
||||
if (wide_str_len <= 0) {
|
||||
wide_str_len = 1;
|
||||
wide_str = (unsigned char)*str;
|
||||
if (bad_char != NULL)
|
||||
*bad_char = TRUE;
|
||||
/* If buf contains a null byte or an invalid multibyte
|
||||
* character, interpret buf's first byte as a single-byte
|
||||
* sequence and set bad_chr to TRUE. */
|
||||
if (mb_buf_len <= 0) {
|
||||
mb_buf_len = 1;
|
||||
wide_buf = (unsigned char)*buf;
|
||||
if (bad_chr != NULL)
|
||||
*bad_chr = TRUE;
|
||||
}
|
||||
|
||||
/* Save the wide character in chr. */
|
||||
if (chr != NULL)
|
||||
*chr = wide_str;
|
||||
*chr = wide_buf;
|
||||
|
||||
/* Save the column width of the wide character in col. */
|
||||
if (col != NULL) {
|
||||
/* If we have a tab, get its width in columns using the
|
||||
* current value of col. */
|
||||
if (wide_str == '\t')
|
||||
if (wide_buf == '\t')
|
||||
*col += tabsize - *col % tabsize;
|
||||
/* If we have a control character, get its width using one
|
||||
* column for the "^" that will be displayed in front of it,
|
||||
* and the width in columns of its visible equivalent as
|
||||
* returned by control_rep(). */
|
||||
else if (is_cntrl_char(wide_str)) {
|
||||
char *ctrl_wide_str = charalloc(MB_CUR_MAX);
|
||||
else if (is_cntrl_char(wide_buf)) {
|
||||
char *ctrl_mb_buf = charalloc(MB_CUR_MAX);
|
||||
|
||||
(*col)++;
|
||||
wide_str = control_rep((unsigned char)wide_str);
|
||||
wide_buf = control_rep((unsigned char)wide_buf);
|
||||
|
||||
if (wctomb(ctrl_wide_str, (wchar_t)wide_str) != -1) {
|
||||
int width = wcwidth(wide_str);
|
||||
if (wctomb(ctrl_mb_buf, (wchar_t)wide_buf) != -1) {
|
||||
int width = wcwidth((wchar_t)wide_buf);
|
||||
|
||||
if (width != -1)
|
||||
*col += width;
|
||||
|
@ -195,11 +195,11 @@ int parse_char(const char *str, int *chr, size_t *col
|
|||
else
|
||||
(*col)++;
|
||||
|
||||
free(ctrl_wide_str);
|
||||
free(ctrl_mb_buf);
|
||||
/* If we have a normal character, get its width in columns
|
||||
* normally. */
|
||||
} else {
|
||||
int width = wcwidth(wide_str);
|
||||
int width = wcwidth((wchar_t)wide_buf);
|
||||
|
||||
if (width != -1)
|
||||
*col += width;
|
||||
|
@ -207,25 +207,25 @@ int parse_char(const char *str, int *chr, size_t *col
|
|||
}
|
||||
} else {
|
||||
#endif
|
||||
/* Interpret str's first character as a single-byte sequence. */
|
||||
wide_str_len = 1;
|
||||
wide_str = (unsigned char)*str;
|
||||
/* Interpret buf's first character as a single-byte sequence. */
|
||||
mb_buf_len = 1;
|
||||
wide_buf = (unsigned char)*buf;
|
||||
|
||||
/* Save the single-byte sequence in chr as though it's a wide
|
||||
* character. */
|
||||
if (chr != NULL)
|
||||
*chr = wide_str;
|
||||
*chr = wide_buf;
|
||||
|
||||
if (col != NULL) {
|
||||
/* If we have a tab, get its width in columns using the
|
||||
* current value of col. */
|
||||
if (wide_str == '\t')
|
||||
if (wide_buf == '\t')
|
||||
*col += tabsize - *col % tabsize;
|
||||
/* If we have a control character, it's two columns wide:
|
||||
* one column for the "^" that will be displayed in front of
|
||||
* it, and one column for its visible equivalent as returned
|
||||
* by control_rep(). */
|
||||
else if (is_cntrl_char(wide_str))
|
||||
else if (is_cntrl_char(wide_buf))
|
||||
*col += 2;
|
||||
/* If we have a normal character, it's one column wide. */
|
||||
else
|
||||
|
@ -235,44 +235,44 @@ int parse_char(const char *str, int *chr, size_t *col
|
|||
}
|
||||
#endif
|
||||
|
||||
return wide_str_len;
|
||||
return mb_buf_len;
|
||||
}
|
||||
|
||||
/* Return the index in str of the beginning of the character before the
|
||||
/* Return the index in buf of the beginning of the character before the
|
||||
* one at pos. */
|
||||
size_t move_left(const char *str, size_t pos)
|
||||
size_t move_left(const char *buf, size_t pos)
|
||||
{
|
||||
size_t pos_prev = pos;
|
||||
|
||||
assert(str != NULL && pos <= strlen(str));
|
||||
assert(str != NULL && pos <= strlen(buf));
|
||||
|
||||
/* There is no library function to move backward one multibyte
|
||||
* character. Here is the naive, O(pos) way to do it. */
|
||||
while (TRUE) {
|
||||
int str_len = parse_char(str + pos - pos_prev, NULL, NULL
|
||||
int mb_buf_len = parse_char(buf + pos - pos_prev, NULL
|
||||
#ifdef NANO_WIDE
|
||||
, NULL
|
||||
#endif
|
||||
);
|
||||
, NULL);
|
||||
|
||||
if (pos_prev <= str_len)
|
||||
if (pos_prev <= mb_buf_len)
|
||||
break;
|
||||
|
||||
pos_prev -= str_len;
|
||||
pos_prev -= mb_buf_len;
|
||||
}
|
||||
|
||||
return pos - pos_prev;
|
||||
}
|
||||
|
||||
/* Return the index in str of the beginning of the character after the
|
||||
/* Return the index in buf of the beginning of the character after the
|
||||
* one at pos. */
|
||||
size_t move_right(const char *str, size_t pos)
|
||||
size_t move_right(const char *buf, size_t pos)
|
||||
{
|
||||
return pos + parse_char(str + pos, NULL, NULL
|
||||
return pos + parse_char(buf + pos, NULL
|
||||
#ifdef NANO_WIDE
|
||||
, NULL
|
||||
#endif
|
||||
);
|
||||
, NULL);
|
||||
}
|
||||
|
||||
/* Fix the memory allocation for a string. */
|
||||
|
|
108
src/winio.c
108
src/winio.c
|
@ -1936,11 +1936,11 @@ void do_statusbar_backspace(void)
|
|||
void do_statusbar_delete(void)
|
||||
{
|
||||
if (statusbar_x < statusbar_xend) {
|
||||
int char_len = parse_char(answer + statusbar_x, NULL, NULL
|
||||
int char_len = parse_char(answer + statusbar_x, NULL
|
||||
#ifdef NANO_WIDE
|
||||
, NULL
|
||||
#endif
|
||||
);
|
||||
, NULL);
|
||||
|
||||
charmove(answer + statusbar_x, answer + statusbar_x + char_len,
|
||||
statusbar_xend - statusbar_x - char_len + 1);
|
||||
|
@ -2056,11 +2056,11 @@ size_t actual_x(const char *str, size_t xplus)
|
|||
assert(str != NULL);
|
||||
|
||||
while (*str != '\0') {
|
||||
int str_len = parse_char(str, NULL, &length
|
||||
int str_len = parse_char(str, NULL
|
||||
#ifdef NANO_WIDE
|
||||
, NULL
|
||||
#endif
|
||||
);
|
||||
, &length);
|
||||
|
||||
if (length > xplus)
|
||||
break;
|
||||
|
@ -2085,11 +2085,11 @@ size_t strnlenpt(const char *str, size_t size)
|
|||
assert(str != NULL);
|
||||
|
||||
while (*str != '\0') {
|
||||
int str_len = parse_char(str, NULL, &length
|
||||
int str_len = parse_char(str, NULL
|
||||
#ifdef NANO_WIDE
|
||||
, NULL
|
||||
#endif
|
||||
);
|
||||
, &length);
|
||||
|
||||
str += str_len;
|
||||
|
||||
|
@ -2160,25 +2160,25 @@ size_t display_string_len(const char *buf, size_t start_col, size_t
|
|||
/* Throughout the loop, we maintain the fact that *buf displays at
|
||||
* column start_col. */
|
||||
while (start_col <= end_col && *buf != '\0') {
|
||||
int wide_buf, wide_buf_len;
|
||||
size_t old_col = start_col;
|
||||
int wide_buf, mb_buf_len;
|
||||
#ifdef NANO_WIDE
|
||||
bool bad_char;
|
||||
#endif
|
||||
size_t old_col = start_col;
|
||||
|
||||
wide_buf_len = parse_char(buf, &wide_buf, &start_col
|
||||
mb_buf_len = parse_char(buf, &wide_buf
|
||||
#ifdef NANO_WIDE
|
||||
, &bad_char
|
||||
#endif
|
||||
);
|
||||
, &start_col);
|
||||
|
||||
#ifdef NANO_WIDE
|
||||
/* If buf contains a null byte or an invalid multibyte
|
||||
* character, interpret that character as though it's a wide
|
||||
* character. */
|
||||
if (!ISSET(NO_UTF8) && bad_char) {
|
||||
char *bad_wide_buf = charalloc(MB_CUR_MAX);
|
||||
int bad_wide_buf_len;
|
||||
char *bad_mb_buf = charalloc(MB_CUR_MAX);
|
||||
int bad_mb_buf_len;
|
||||
|
||||
/* If we have a control character, add one byte to account
|
||||
* for the "^" that will be displayed in front of it, and
|
||||
|
@ -2191,12 +2191,12 @@ size_t display_string_len(const char *buf, size_t start_col, size_t
|
|||
|
||||
/* Translate the wide character to its multibyte
|
||||
* equivalent. */
|
||||
bad_wide_buf_len = wctomb(bad_wide_buf, (wchar_t)wide_buf);
|
||||
bad_mb_buf_len = wctomb(bad_mb_buf, (wchar_t)wide_buf);
|
||||
|
||||
if (bad_wide_buf_len != -1)
|
||||
retval += bad_wide_buf_len;
|
||||
if (bad_mb_buf_len != -1)
|
||||
retval += bad_mb_buf_len;
|
||||
|
||||
free(bad_wide_buf);
|
||||
free(bad_mb_buf);
|
||||
} else {
|
||||
#endif
|
||||
/* If we have a tab, get its width in bytes using the
|
||||
|
@ -2208,23 +2208,22 @@ size_t display_string_len(const char *buf, size_t start_col, size_t
|
|||
* then add the number of bytes for its visible equivalent
|
||||
* as returned by control_rep(). */
|
||||
else if (is_cntrl_char(wide_buf)) {
|
||||
char ctrl_wide_buf =
|
||||
control_rep((unsigned char)wide_buf);
|
||||
char ctrl_mb_buf = control_rep((unsigned char)wide_buf);
|
||||
|
||||
retval++;
|
||||
retval += parse_char(&ctrl_wide_buf, NULL, NULL
|
||||
retval += parse_char(&ctrl_mb_buf, NULL
|
||||
#ifdef NANO_WIDE
|
||||
, NULL
|
||||
#endif
|
||||
);
|
||||
, NULL);
|
||||
/* If we have a normal character, add its width in bytes
|
||||
* normally. */
|
||||
} else
|
||||
retval += wide_buf_len;
|
||||
retval += mb_buf_len;
|
||||
#ifdef NANO_WIDE
|
||||
}
|
||||
|
||||
buf += wide_buf_len;
|
||||
buf += mb_buf_len;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -2279,43 +2278,43 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
|
|||
|
||||
if (column < start_col || (dollars && column > 0 &&
|
||||
buf[start_index] != '\t')) {
|
||||
int wide_buf, wide_buf_len;
|
||||
int wide_buf, mb_buf_len;
|
||||
|
||||
/* We don't display all of buf[start_index] since it starts to
|
||||
* the left of the screen. */
|
||||
wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL
|
||||
mb_buf_len = parse_char(buf + start_index, &wide_buf
|
||||
#ifdef NANO_WIDE
|
||||
, NULL
|
||||
#endif
|
||||
);
|
||||
, NULL);
|
||||
|
||||
if (is_cntrl_char(wide_buf)) {
|
||||
if (column < start_col) {
|
||||
char *ctrl_wide_buf =
|
||||
char *ctrl_mb_buf =
|
||||
#ifdef NANO_WIDE
|
||||
!ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) :
|
||||
#endif
|
||||
charalloc(1);
|
||||
int ctrl_wide_buf_len, i;
|
||||
int ctrl_mb_buf_len, i;
|
||||
|
||||
wide_buf = control_rep((unsigned char)wide_buf);
|
||||
|
||||
#ifdef NANO_WIDE
|
||||
if (!ISSET(NO_UTF8))
|
||||
ctrl_wide_buf_len = wctomb(ctrl_wide_buf,
|
||||
ctrl_mb_buf_len = wctomb(ctrl_mb_buf,
|
||||
(wchar_t)wide_buf);
|
||||
else {
|
||||
#endif
|
||||
ctrl_wide_buf_len = 1;
|
||||
ctrl_wide_buf[0] = (unsigned char)wide_buf;
|
||||
ctrl_mb_buf_len = 1;
|
||||
ctrl_mb_buf[0] = (unsigned char)wide_buf;
|
||||
#ifdef NANO_WIDE
|
||||
}
|
||||
#endif
|
||||
|
||||
for (i = 0; i < ctrl_wide_buf_len; i++)
|
||||
converted[index++] = ctrl_wide_buf[i];
|
||||
for (i = 0; i < ctrl_mb_buf_len; i++)
|
||||
converted[index++] = ctrl_mb_buf[i];
|
||||
|
||||
free(ctrl_wide_buf);
|
||||
free(ctrl_mb_buf);
|
||||
|
||||
#ifdef NANO_WIDE
|
||||
if (!ISSET(NO_UTF8)) {
|
||||
|
@ -2327,7 +2326,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
|
|||
#endif
|
||||
start_col++;
|
||||
|
||||
start_index += wide_buf_len;
|
||||
start_index += mb_buf_len;
|
||||
}
|
||||
}
|
||||
#ifdef NANO_WIDE
|
||||
|
@ -2335,22 +2334,22 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
|
|||
converted[index++] = ' ';
|
||||
|
||||
start_col++;
|
||||
start_index += wide_buf_len;
|
||||
start_index += mb_buf_len;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
while (index < alloc_len - 1 && buf[start_index] != '\0') {
|
||||
int wide_buf, wide_buf_len;
|
||||
int wide_buf, mb_buf_len;
|
||||
#ifdef NANO_WIDE
|
||||
bool bad_char;
|
||||
#endif
|
||||
|
||||
wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL
|
||||
mb_buf_len = parse_char(buf + start_index, &wide_buf
|
||||
#ifdef NANO_WIDE
|
||||
, &bad_char
|
||||
#endif
|
||||
);
|
||||
, NULL);
|
||||
|
||||
if (wide_buf == '\t') {
|
||||
converted[index++] =
|
||||
|
@ -2367,12 +2366,12 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
|
|||
* contains an invalid multibyte control character, interpret
|
||||
* that character as though it's a normal control character. */
|
||||
} else if (is_cntrl_char(wide_buf)) {
|
||||
char *ctrl_wide_buf =
|
||||
char *ctrl_mb_buf =
|
||||
#ifdef NANO_WIDE
|
||||
!ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) :
|
||||
#endif
|
||||
charalloc(1);
|
||||
int ctrl_wide_buf_len, i;
|
||||
int ctrl_mb_buf_len, i;
|
||||
|
||||
converted[index++] = '^';
|
||||
start_col++;
|
||||
|
@ -2380,20 +2379,20 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
|
|||
|
||||
#ifdef NANO_WIDE
|
||||
if (!ISSET(NO_UTF8))
|
||||
ctrl_wide_buf_len = wctomb(ctrl_wide_buf,
|
||||
ctrl_mb_buf_len = wctomb(ctrl_mb_buf,
|
||||
(wchar_t)wide_buf);
|
||||
else {
|
||||
#endif
|
||||
ctrl_wide_buf_len = 1;
|
||||
ctrl_wide_buf[0] = (unsigned char)wide_buf;
|
||||
ctrl_mb_buf_len = 1;
|
||||
ctrl_mb_buf[0] = (unsigned char)wide_buf;
|
||||
#ifdef NANO_WIDE
|
||||
}
|
||||
#endif
|
||||
|
||||
for (i = 0; i < ctrl_wide_buf_len; i++)
|
||||
converted[index++] = ctrl_wide_buf[i];
|
||||
for (i = 0; i < ctrl_mb_buf_len; i++)
|
||||
converted[index++] = ctrl_mb_buf[i];
|
||||
|
||||
free(ctrl_wide_buf);
|
||||
free(ctrl_mb_buf);
|
||||
|
||||
#ifdef NANO_WIDE
|
||||
if (!ISSET(NO_UTF8)) {
|
||||
|
@ -2419,19 +2418,18 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
|
|||
* character, interpret that character as though it's a
|
||||
* normal non-control character. */
|
||||
if (!ISSET(NO_UTF8) && bad_char) {
|
||||
char *bad_wide_buf = charalloc(MB_CUR_MAX);
|
||||
int bad_wide_buf_len;
|
||||
char *bad_mb_buf = charalloc(MB_CUR_MAX);
|
||||
int bad_mb_buf_len;
|
||||
|
||||
bad_wide_buf_len = wctomb(bad_wide_buf,
|
||||
(wchar_t)wide_buf);
|
||||
bad_mb_buf_len = wctomb(bad_mb_buf, (wchar_t)wide_buf);
|
||||
|
||||
for (i = 0; i < bad_wide_buf_len; i++)
|
||||
converted[index++] = bad_wide_buf[i];
|
||||
for (i = 0; i < bad_mb_buf_len; i++)
|
||||
converted[index++] = bad_mb_buf[i];
|
||||
|
||||
free(bad_wide_buf);
|
||||
free(bad_mb_buf);
|
||||
} else {
|
||||
#endif
|
||||
for (i = 0; i < wide_buf_len; i++)
|
||||
for (i = 0; i < mb_buf_len; i++)
|
||||
converted[index++] = buf[start_index + i];
|
||||
#ifdef NANO_WIDE
|
||||
}
|
||||
|
@ -2446,7 +2444,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
|
|||
start_col++;
|
||||
}
|
||||
|
||||
start_index += wide_buf_len;
|
||||
start_index += mb_buf_len;
|
||||
}
|
||||
|
||||
if (index < alloc_len - 1)
|
||||
|
|
Loading…
Reference in New Issue