miscellaneous cleanups for the multibyte parsing functions

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2242 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
master
David Lawrence Ramsey 2005-01-07 22:39:43 +00:00
parent 12054fe11b
commit d96851f534
4 changed files with 114 additions and 117 deletions

View File

@ -1178,12 +1178,11 @@ void do_delete(void)
placewewant = xplustabs(); placewewant = xplustabs();
if (current->data[current_x] != '\0') { if (current->data[current_x] != '\0') {
int char_len = parse_char(current->data + current_x, NULL, int char_len = parse_char(current->data + current_x, NULL
NULL
#ifdef NANO_WIDE #ifdef NANO_WIDE
, NULL , NULL
#endif #endif
); , NULL);
size_t line_len = strlen(current->data + current_x); size_t line_len = strlen(current->data + current_x);
assert(current_x < strlen(current->data)); assert(current_x < strlen(current->data));
@ -2501,11 +2500,11 @@ bool breakable(const char *line, ssize_t goal)
if (isblank(*line)) if (isblank(*line))
return TRUE; return TRUE;
line += parse_char(line, NULL, &pos line += parse_char(line, NULL
#ifdef NANO_WIDE #ifdef NANO_WIDE
, NULL , NULL
#endif #endif
); , &pos);
goal -= pos; goal -= pos;
} }
@ -2539,11 +2538,11 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
assert(*line != '\t'); assert(*line != '\t');
line_len = parse_char(line, NULL, &pos line_len = parse_char(line, NULL
#ifdef NANO_WIDE #ifdef NANO_WIDE
, NULL , NULL
#endif #endif
); , &pos);
goal -= pos; goal -= pos;
line += line_len; line += line_len;

View File

@ -478,13 +478,13 @@ bool is_byte_char(int c);
int num_of_digits(int n); int num_of_digits(int n);
unsigned char control_rep(unsigned char c); unsigned char control_rep(unsigned char c);
bool parse_num(const char *str, ssize_t *val); bool parse_num(const char *str, ssize_t *val);
int parse_char(const char *str, int *chr, size_t *col int parse_char(const char *buf, int *chr
#ifdef NANO_WIDE #ifdef NANO_WIDE
, bool *bad_char , bool *bad_chr
#endif #endif
); , size_t *col);
size_t move_left(const char *str, size_t pos); size_t move_left(const char *buf, size_t pos);
size_t move_right(const char *str, size_t pos); size_t move_right(const char *buf, size_t pos);
void align(char **strp); void align(char **strp);
void null_at(char **data, size_t index); void null_at(char **data, size_t index);
void unsunder(char *str, size_t true_len); void unsunder(char *str, size_t true_len);

View File

@ -128,66 +128,66 @@ bool parse_num(const char *str, ssize_t *val)
return TRUE; return TRUE;
} }
/* Parse a multi-byte character from str. Return the number of bytes /* Parse a multibyte character from buf. Return the number of bytes
* used. If chr isn't NULL, store the wide character in it. If col * used. If chr isn't NULL, store the wide character in it. If
* isn't NULL, store the new display width in it. If *str is '\t', we * bad_chr isn't NULL, set it to TRUE if we have a null byte or a bad
* expect col to have the current display width. If bad_char isn't * multibyte character. If col isn't NULL, store the new display width
* NULL, set it to TRUE if we have a null byte or a bad multibyte * in it. If *str is '\t', we expect col to have the current display
* character. */ * width. */
int parse_char(const char *str, int *chr, size_t *col int parse_char(const char *buf, int *chr
#ifdef NANO_WIDE #ifdef NANO_WIDE
, bool *bad_char , bool *bad_chr
#endif #endif
) , size_t *col)
{ {
int wide_str, wide_str_len; int wide_buf, mb_buf_len;
assert(str != NULL); assert(buf != NULL);
#ifdef NANO_WIDE #ifdef NANO_WIDE
if (bad_char != NULL) if (bad_chr != NULL)
*bad_char = FALSE; *bad_chr = FALSE;
if (!ISSET(NO_UTF8)) { if (!ISSET(NO_UTF8)) {
wchar_t tmp; wchar_t tmp;
/* Get the wide character equivalent of the multibyte /* Get the wide character equivalent of the multibyte
* character. */ * character. */
wide_str_len = mbtowc(&tmp, str, MB_CUR_MAX); mb_buf_len = mbtowc(&tmp, buf, MB_CUR_MAX);
wide_str = (int)tmp; wide_buf = (int)tmp;
/* If str contains a null byte or an invalid multibyte /* If buf contains a null byte or an invalid multibyte
* character, interpret str's first byte as a single-byte * character, interpret buf's first byte as a single-byte
* sequence and set bad_char to TRUE. */ * sequence and set bad_chr to TRUE. */
if (wide_str_len <= 0) { if (mb_buf_len <= 0) {
wide_str_len = 1; mb_buf_len = 1;
wide_str = (unsigned char)*str; wide_buf = (unsigned char)*buf;
if (bad_char != NULL) if (bad_chr != NULL)
*bad_char = TRUE; *bad_chr = TRUE;
} }
/* Save the wide character in chr. */ /* Save the wide character in chr. */
if (chr != NULL) if (chr != NULL)
*chr = wide_str; *chr = wide_buf;
/* Save the column width of the wide character in col. */ /* Save the column width of the wide character in col. */
if (col != NULL) { if (col != NULL) {
/* If we have a tab, get its width in columns using the /* If we have a tab, get its width in columns using the
* current value of col. */ * current value of col. */
if (wide_str == '\t') if (wide_buf == '\t')
*col += tabsize - *col % tabsize; *col += tabsize - *col % tabsize;
/* If we have a control character, get its width using one /* If we have a control character, get its width using one
* column for the "^" that will be displayed in front of it, * column for the "^" that will be displayed in front of it,
* and the width in columns of its visible equivalent as * and the width in columns of its visible equivalent as
* returned by control_rep(). */ * returned by control_rep(). */
else if (is_cntrl_char(wide_str)) { else if (is_cntrl_char(wide_buf)) {
char *ctrl_wide_str = charalloc(MB_CUR_MAX); char *ctrl_mb_buf = charalloc(MB_CUR_MAX);
(*col)++; (*col)++;
wide_str = control_rep((unsigned char)wide_str); wide_buf = control_rep((unsigned char)wide_buf);
if (wctomb(ctrl_wide_str, (wchar_t)wide_str) != -1) { if (wctomb(ctrl_mb_buf, (wchar_t)wide_buf) != -1) {
int width = wcwidth(wide_str); int width = wcwidth((wchar_t)wide_buf);
if (width != -1) if (width != -1)
*col += width; *col += width;
@ -195,11 +195,11 @@ int parse_char(const char *str, int *chr, size_t *col
else else
(*col)++; (*col)++;
free(ctrl_wide_str); free(ctrl_mb_buf);
/* If we have a normal character, get its width in columns /* If we have a normal character, get its width in columns
* normally. */ * normally. */
} else { } else {
int width = wcwidth(wide_str); int width = wcwidth((wchar_t)wide_buf);
if (width != -1) if (width != -1)
*col += width; *col += width;
@ -207,25 +207,25 @@ int parse_char(const char *str, int *chr, size_t *col
} }
} else { } else {
#endif #endif
/* Interpret str's first character as a single-byte sequence. */ /* Interpret buf's first character as a single-byte sequence. */
wide_str_len = 1; mb_buf_len = 1;
wide_str = (unsigned char)*str; wide_buf = (unsigned char)*buf;
/* Save the single-byte sequence in chr as though it's a wide /* Save the single-byte sequence in chr as though it's a wide
* character. */ * character. */
if (chr != NULL) if (chr != NULL)
*chr = wide_str; *chr = wide_buf;
if (col != NULL) { if (col != NULL) {
/* If we have a tab, get its width in columns using the /* If we have a tab, get its width in columns using the
* current value of col. */ * current value of col. */
if (wide_str == '\t') if (wide_buf == '\t')
*col += tabsize - *col % tabsize; *col += tabsize - *col % tabsize;
/* If we have a control character, it's two columns wide: /* If we have a control character, it's two columns wide:
* one column for the "^" that will be displayed in front of * one column for the "^" that will be displayed in front of
* it, and one column for its visible equivalent as returned * it, and one column for its visible equivalent as returned
* by control_rep(). */ * by control_rep(). */
else if (is_cntrl_char(wide_str)) else if (is_cntrl_char(wide_buf))
*col += 2; *col += 2;
/* If we have a normal character, it's one column wide. */ /* If we have a normal character, it's one column wide. */
else else
@ -235,44 +235,44 @@ int parse_char(const char *str, int *chr, size_t *col
} }
#endif #endif
return wide_str_len; return mb_buf_len;
} }
/* Return the index in str of the beginning of the character before the /* Return the index in buf of the beginning of the character before the
* one at pos. */ * one at pos. */
size_t move_left(const char *str, size_t pos) size_t move_left(const char *buf, size_t pos)
{ {
size_t pos_prev = pos; size_t pos_prev = pos;
assert(str != NULL && pos <= strlen(str)); assert(str != NULL && pos <= strlen(buf));
/* There is no library function to move backward one multibyte /* There is no library function to move backward one multibyte
* character. Here is the naive, O(pos) way to do it. */ * character. Here is the naive, O(pos) way to do it. */
while (TRUE) { while (TRUE) {
int str_len = parse_char(str + pos - pos_prev, NULL, NULL int mb_buf_len = parse_char(buf + pos - pos_prev, NULL
#ifdef NANO_WIDE #ifdef NANO_WIDE
, NULL , NULL
#endif #endif
); , NULL);
if (pos_prev <= str_len) if (pos_prev <= mb_buf_len)
break; break;
pos_prev -= str_len; pos_prev -= mb_buf_len;
} }
return pos - pos_prev; return pos - pos_prev;
} }
/* Return the index in str of the beginning of the character after the /* Return the index in buf of the beginning of the character after the
* one at pos. */ * one at pos. */
size_t move_right(const char *str, size_t pos) size_t move_right(const char *buf, size_t pos)
{ {
return pos + parse_char(str + pos, NULL, NULL return pos + parse_char(buf + pos, NULL
#ifdef NANO_WIDE #ifdef NANO_WIDE
, NULL , NULL
#endif #endif
); , NULL);
} }
/* Fix the memory allocation for a string. */ /* Fix the memory allocation for a string. */

View File

@ -1936,11 +1936,11 @@ void do_statusbar_backspace(void)
void do_statusbar_delete(void) void do_statusbar_delete(void)
{ {
if (statusbar_x < statusbar_xend) { if (statusbar_x < statusbar_xend) {
int char_len = parse_char(answer + statusbar_x, NULL, NULL int char_len = parse_char(answer + statusbar_x, NULL
#ifdef NANO_WIDE #ifdef NANO_WIDE
, NULL , NULL
#endif #endif
); , NULL);
charmove(answer + statusbar_x, answer + statusbar_x + char_len, charmove(answer + statusbar_x, answer + statusbar_x + char_len,
statusbar_xend - statusbar_x - char_len + 1); statusbar_xend - statusbar_x - char_len + 1);
@ -2056,11 +2056,11 @@ size_t actual_x(const char *str, size_t xplus)
assert(str != NULL); assert(str != NULL);
while (*str != '\0') { while (*str != '\0') {
int str_len = parse_char(str, NULL, &length int str_len = parse_char(str, NULL
#ifdef NANO_WIDE #ifdef NANO_WIDE
, NULL , NULL
#endif #endif
); , &length);
if (length > xplus) if (length > xplus)
break; break;
@ -2085,11 +2085,11 @@ size_t strnlenpt(const char *str, size_t size)
assert(str != NULL); assert(str != NULL);
while (*str != '\0') { while (*str != '\0') {
int str_len = parse_char(str, NULL, &length int str_len = parse_char(str, NULL
#ifdef NANO_WIDE #ifdef NANO_WIDE
, NULL , NULL
#endif #endif
); , &length);
str += str_len; str += str_len;
@ -2160,25 +2160,25 @@ size_t display_string_len(const char *buf, size_t start_col, size_t
/* Throughout the loop, we maintain the fact that *buf displays at /* Throughout the loop, we maintain the fact that *buf displays at
* column start_col. */ * column start_col. */
while (start_col <= end_col && *buf != '\0') { while (start_col <= end_col && *buf != '\0') {
int wide_buf, wide_buf_len; int wide_buf, mb_buf_len;
size_t old_col = start_col;
#ifdef NANO_WIDE #ifdef NANO_WIDE
bool bad_char; bool bad_char;
#endif #endif
size_t old_col = start_col;
wide_buf_len = parse_char(buf, &wide_buf, &start_col mb_buf_len = parse_char(buf, &wide_buf
#ifdef NANO_WIDE #ifdef NANO_WIDE
, &bad_char , &bad_char
#endif #endif
); , &start_col);
#ifdef NANO_WIDE #ifdef NANO_WIDE
/* If buf contains a null byte or an invalid multibyte /* If buf contains a null byte or an invalid multibyte
* character, interpret that character as though it's a wide * character, interpret that character as though it's a wide
* character. */ * character. */
if (!ISSET(NO_UTF8) && bad_char) { if (!ISSET(NO_UTF8) && bad_char) {
char *bad_wide_buf = charalloc(MB_CUR_MAX); char *bad_mb_buf = charalloc(MB_CUR_MAX);
int bad_wide_buf_len; int bad_mb_buf_len;
/* If we have a control character, add one byte to account /* If we have a control character, add one byte to account
* for the "^" that will be displayed in front of it, and * for the "^" that will be displayed in front of it, and
@ -2191,12 +2191,12 @@ size_t display_string_len(const char *buf, size_t start_col, size_t
/* Translate the wide character to its multibyte /* Translate the wide character to its multibyte
* equivalent. */ * equivalent. */
bad_wide_buf_len = wctomb(bad_wide_buf, (wchar_t)wide_buf); bad_mb_buf_len = wctomb(bad_mb_buf, (wchar_t)wide_buf);
if (bad_wide_buf_len != -1) if (bad_mb_buf_len != -1)
retval += bad_wide_buf_len; retval += bad_mb_buf_len;
free(bad_wide_buf); free(bad_mb_buf);
} else { } else {
#endif #endif
/* If we have a tab, get its width in bytes using the /* If we have a tab, get its width in bytes using the
@ -2208,23 +2208,22 @@ size_t display_string_len(const char *buf, size_t start_col, size_t
* then add the number of bytes for its visible equivalent * then add the number of bytes for its visible equivalent
* as returned by control_rep(). */ * as returned by control_rep(). */
else if (is_cntrl_char(wide_buf)) { else if (is_cntrl_char(wide_buf)) {
char ctrl_wide_buf = char ctrl_mb_buf = control_rep((unsigned char)wide_buf);
control_rep((unsigned char)wide_buf);
retval++; retval++;
retval += parse_char(&ctrl_wide_buf, NULL, NULL retval += parse_char(&ctrl_mb_buf, NULL
#ifdef NANO_WIDE #ifdef NANO_WIDE
, NULL , NULL
#endif #endif
); , NULL);
/* If we have a normal character, add its width in bytes /* If we have a normal character, add its width in bytes
* normally. */ * normally. */
} else } else
retval += wide_buf_len; retval += mb_buf_len;
#ifdef NANO_WIDE #ifdef NANO_WIDE
} }
buf += wide_buf_len; buf += mb_buf_len;
#endif #endif
} }
@ -2279,43 +2278,43 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
if (column < start_col || (dollars && column > 0 && if (column < start_col || (dollars && column > 0 &&
buf[start_index] != '\t')) { buf[start_index] != '\t')) {
int wide_buf, wide_buf_len; int wide_buf, mb_buf_len;
/* We don't display all of buf[start_index] since it starts to /* We don't display all of buf[start_index] since it starts to
* the left of the screen. */ * the left of the screen. */
wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL mb_buf_len = parse_char(buf + start_index, &wide_buf
#ifdef NANO_WIDE #ifdef NANO_WIDE
, NULL , NULL
#endif #endif
); , NULL);
if (is_cntrl_char(wide_buf)) { if (is_cntrl_char(wide_buf)) {
if (column < start_col) { if (column < start_col) {
char *ctrl_wide_buf = char *ctrl_mb_buf =
#ifdef NANO_WIDE #ifdef NANO_WIDE
!ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) : !ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) :
#endif #endif
charalloc(1); charalloc(1);
int ctrl_wide_buf_len, i; int ctrl_mb_buf_len, i;
wide_buf = control_rep((unsigned char)wide_buf); wide_buf = control_rep((unsigned char)wide_buf);
#ifdef NANO_WIDE #ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) if (!ISSET(NO_UTF8))
ctrl_wide_buf_len = wctomb(ctrl_wide_buf, ctrl_mb_buf_len = wctomb(ctrl_mb_buf,
(wchar_t)wide_buf); (wchar_t)wide_buf);
else { else {
#endif #endif
ctrl_wide_buf_len = 1; ctrl_mb_buf_len = 1;
ctrl_wide_buf[0] = (unsigned char)wide_buf; ctrl_mb_buf[0] = (unsigned char)wide_buf;
#ifdef NANO_WIDE #ifdef NANO_WIDE
} }
#endif #endif
for (i = 0; i < ctrl_wide_buf_len; i++) for (i = 0; i < ctrl_mb_buf_len; i++)
converted[index++] = ctrl_wide_buf[i]; converted[index++] = ctrl_mb_buf[i];
free(ctrl_wide_buf); free(ctrl_mb_buf);
#ifdef NANO_WIDE #ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) { if (!ISSET(NO_UTF8)) {
@ -2327,7 +2326,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
#endif #endif
start_col++; start_col++;
start_index += wide_buf_len; start_index += mb_buf_len;
} }
} }
#ifdef NANO_WIDE #ifdef NANO_WIDE
@ -2335,22 +2334,22 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
converted[index++] = ' '; converted[index++] = ' ';
start_col++; start_col++;
start_index += wide_buf_len; start_index += mb_buf_len;
} }
#endif #endif
} }
while (index < alloc_len - 1 && buf[start_index] != '\0') { while (index < alloc_len - 1 && buf[start_index] != '\0') {
int wide_buf, wide_buf_len; int wide_buf, mb_buf_len;
#ifdef NANO_WIDE #ifdef NANO_WIDE
bool bad_char; bool bad_char;
#endif #endif
wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL mb_buf_len = parse_char(buf + start_index, &wide_buf
#ifdef NANO_WIDE #ifdef NANO_WIDE
, &bad_char , &bad_char
#endif #endif
); , NULL);
if (wide_buf == '\t') { if (wide_buf == '\t') {
converted[index++] = converted[index++] =
@ -2367,12 +2366,12 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
* contains an invalid multibyte control character, interpret * contains an invalid multibyte control character, interpret
* that character as though it's a normal control character. */ * that character as though it's a normal control character. */
} else if (is_cntrl_char(wide_buf)) { } else if (is_cntrl_char(wide_buf)) {
char *ctrl_wide_buf = char *ctrl_mb_buf =
#ifdef NANO_WIDE #ifdef NANO_WIDE
!ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) : !ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) :
#endif #endif
charalloc(1); charalloc(1);
int ctrl_wide_buf_len, i; int ctrl_mb_buf_len, i;
converted[index++] = '^'; converted[index++] = '^';
start_col++; start_col++;
@ -2380,20 +2379,20 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
#ifdef NANO_WIDE #ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) if (!ISSET(NO_UTF8))
ctrl_wide_buf_len = wctomb(ctrl_wide_buf, ctrl_mb_buf_len = wctomb(ctrl_mb_buf,
(wchar_t)wide_buf); (wchar_t)wide_buf);
else { else {
#endif #endif
ctrl_wide_buf_len = 1; ctrl_mb_buf_len = 1;
ctrl_wide_buf[0] = (unsigned char)wide_buf; ctrl_mb_buf[0] = (unsigned char)wide_buf;
#ifdef NANO_WIDE #ifdef NANO_WIDE
} }
#endif #endif
for (i = 0; i < ctrl_wide_buf_len; i++) for (i = 0; i < ctrl_mb_buf_len; i++)
converted[index++] = ctrl_wide_buf[i]; converted[index++] = ctrl_mb_buf[i];
free(ctrl_wide_buf); free(ctrl_mb_buf);
#ifdef NANO_WIDE #ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) { if (!ISSET(NO_UTF8)) {
@ -2419,19 +2418,18 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
* character, interpret that character as though it's a * character, interpret that character as though it's a
* normal non-control character. */ * normal non-control character. */
if (!ISSET(NO_UTF8) && bad_char) { if (!ISSET(NO_UTF8) && bad_char) {
char *bad_wide_buf = charalloc(MB_CUR_MAX); char *bad_mb_buf = charalloc(MB_CUR_MAX);
int bad_wide_buf_len; int bad_mb_buf_len;
bad_wide_buf_len = wctomb(bad_wide_buf, bad_mb_buf_len = wctomb(bad_mb_buf, (wchar_t)wide_buf);
(wchar_t)wide_buf);
for (i = 0; i < bad_wide_buf_len; i++) for (i = 0; i < bad_mb_buf_len; i++)
converted[index++] = bad_wide_buf[i]; converted[index++] = bad_mb_buf[i];
free(bad_wide_buf); free(bad_mb_buf);
} else { } else {
#endif #endif
for (i = 0; i < wide_buf_len; i++) for (i = 0; i < mb_buf_len; i++)
converted[index++] = buf[start_index + i]; converted[index++] = buf[start_index + i];
#ifdef NANO_WIDE #ifdef NANO_WIDE
} }
@ -2446,7 +2444,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
start_col++; start_col++;
} }
start_index += wide_buf_len; start_index += mb_buf_len;
} }
if (index < alloc_len - 1) if (index < alloc_len - 1)