tweaks: elide a function that is an amalgam of three others
In addition, the function was used just once, had a weird return value, and now some more code can be excluded from a non-UTF8 build. Make use of the fact that any single-byte character always occupies just one column, and call the costly mbtowc() and wcwidth() only for characters that actually are multibyte.master
parent
c5955d14ce
commit
cd09482231
30
src/chars.c
30
src/chars.c
|
@ -200,36 +200,6 @@ char control_mbrep(const char *c, bool isdata)
|
|||
return control_rep(*c);
|
||||
}
|
||||
|
||||
/* Assess how many bytes the given (multibyte) character occupies. Return -1
|
||||
* if the byte sequence is invalid, and return the number of bytes minus 8
|
||||
* when it encodes an invalid codepoint. Also, in the second parameter,
|
||||
* return the number of columns that the character occupies. */
|
||||
int length_of_char(const char *c, int *width)
|
||||
{
|
||||
#ifdef ENABLE_UTF8
|
||||
if (use_utf8 && (signed char)*c < 0) {
|
||||
wchar_t wc;
|
||||
int charlen = mbtowc(&wc, c, MAXCHARLEN);
|
||||
|
||||
/* If the sequence is invalid... */
|
||||
if (charlen < 0)
|
||||
return -1;
|
||||
|
||||
/* If the codepoint is invalid... */
|
||||
if (!is_valid_unicode(wc))
|
||||
return charlen - 8;
|
||||
else {
|
||||
*width = wcwidth(wc);
|
||||
/* If the codepoint is unassigned, assume a width of one. */
|
||||
if (*width < 0)
|
||||
*width = 1;
|
||||
return charlen;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* This function is equivalent to wcwidth() for multibyte characters. */
|
||||
int mbwidth(const char *c)
|
||||
{
|
||||
|
|
|
@ -210,7 +210,6 @@ bool is_ascii_cntrl_char(int c);
|
|||
bool is_cntrl_mbchar(const char *c);
|
||||
bool is_word_mbchar(const char *c, bool allow_punct);
|
||||
char control_mbrep(const char *c, bool isdata);
|
||||
int length_of_char(const char *c, int *width);
|
||||
int mbwidth(const char *c);
|
||||
char *make_mbchar(long chr, int *chr_mb_len);
|
||||
int char_length(const char *pointer);
|
||||
|
|
59
src/winio.c
59
src/winio.c
|
@ -1922,7 +1922,7 @@ char *display_string(const char *buf, size_t column, size_t span,
|
|||
}
|
||||
|
||||
while (*buf != '\0' && (column < beyond || mbwidth(buf) == 0)) {
|
||||
int charlength, charwidth = 1;
|
||||
int charlength, charwidth;
|
||||
|
||||
if (*buf == ' ') {
|
||||
/* Show a space as a visible character, or as a space. */
|
||||
|
@ -1961,7 +1961,7 @@ char *display_string(const char *buf, size_t column, size_t span,
|
|||
continue;
|
||||
}
|
||||
|
||||
charlength = length_of_char(buf, &charwidth);
|
||||
charlength = mblen(buf, MAXCHARLEN);
|
||||
|
||||
/* If buf contains a control character, represent it. */
|
||||
if (is_cntrl_mbchar(buf)) {
|
||||
|
@ -1972,29 +1972,46 @@ char *display_string(const char *buf, size_t column, size_t span,
|
|||
continue;
|
||||
}
|
||||
|
||||
/* If buf contains a valid non-control character, simply copy it. */
|
||||
if (charlength > 0) {
|
||||
for (; charlength > 0; charlength--)
|
||||
converted[index++] = *(buf++);
|
||||
|
||||
column += charwidth;
|
||||
#ifdef USING_OLD_NCURSES
|
||||
if (charwidth > 1)
|
||||
seen_wide = TRUE;
|
||||
#endif
|
||||
/* A one-byte character is necessarily one column wide. */
|
||||
if (charlength == 1) {
|
||||
converted[index++] = *(buf++);
|
||||
column++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Represent an invalid starter byte with the Replacement Character. */
|
||||
converted[index++] = '\xEF';
|
||||
converted[index++] = '\xBF';
|
||||
converted[index++] = '\xBD';
|
||||
column++;
|
||||
buf++;
|
||||
#ifdef ENABLE_UTF8
|
||||
/* For a multibyte character, check whether it is valid,
|
||||
* and determine whether it occupies one or two columns. */
|
||||
wchar_t wc;
|
||||
int length = mbtowc(&wc, buf, MAXCHARLEN);
|
||||
|
||||
/* For invalid codepoints, skip extra bytes. */
|
||||
if (charlength < -1)
|
||||
buf += charlength + 7;
|
||||
if (charlength != length)
|
||||
die("Different character lengths");
|
||||
|
||||
/* When invalid, represent it with the Replacement Character. */
|
||||
if (charlength < 0 || !is_valid_unicode(wc)) {
|
||||
converted[index++] = '\xEF';
|
||||
converted[index++] = '\xBF';
|
||||
converted[index++] = '\xBD';
|
||||
column++;
|
||||
buf += (charlength > 0 ? charlength : 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For any valid character, just copy its bytes. */
|
||||
for (; charlength > 0; charlength--)
|
||||
converted[index++] = *(buf++);
|
||||
|
||||
charwidth = wcwidth(wc);
|
||||
|
||||
/* If the codepoint is unassigned, assume a width of one. */
|
||||
column += (charwidth < 0 ? 1 : charwidth);
|
||||
|
||||
#ifdef USING_OLD_NCURSES
|
||||
if (charwidth > 1)
|
||||
seen_wide = TRUE;
|
||||
#endif
|
||||
#endif /* ENABLE_UTF8 */
|
||||
}
|
||||
|
||||
/* If there is more text than can be shown, make room for the ">". */
|
||||
|
|
Loading…
Reference in New Issue