screen: elide another intermediate buffer for every visible character

master
Benno Schulenberg 2016-06-06 12:48:26 +02:00
parent 960e848cc7
commit 0894587305
3 changed files with 55 additions and 43 deletions

View File

@ -35,8 +35,6 @@
static bool use_utf8 = FALSE;
/* Whether we've enabled UTF-8 support. */
static const char *const bad_mbchar = "\xEF\xBF\xBD";
static const int bad_mbchar_len = 3;
/* Enable UTF-8 support. */
void utf8_init(void)
@ -230,38 +228,32 @@ char control_mbrep(const char *c)
return control_rep(*c);
}
/* c is a multibyte non-control character. We return that multibyte
* character. If crep is an invalid multibyte sequence, it will be
* replaced with Unicode 0xFFFD (Replacement Character). */
char *mbrep(const char *c, char *crep, int *crep_len)
/* Assess how many bytes the given (multibyte) character occupies. Return -1
* if the byte sequence is invalid, and return the number of bytes minus 8
* when the byte sequence encodes an invalid codepoint. */
int length_of_char(const char *c)
{
assert(c != NULL && crep != NULL && crep_len != NULL);
assert(c != NULL);
#ifdef ENABLE_UTF8
if (use_utf8) {
wchar_t wc;
int charlen = mbtowc(&wc, c, MB_CUR_MAX);
/* Reject invalid Unicode characters. */
if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) {
/* If the sequence is invalid... */
if (charlen < 0) {
mbtowc_reset();
*crep_len = bad_mbchar_len;
strncpy(crep, bad_mbchar, *crep_len);
} else {
*crep_len = wctomb(crep, wc);
if (*crep_len < 0) {
wctomb_reset();
*crep_len = 0;
}
return -1;
}
/* If the codepoint is invalid... */
if (!is_valid_unicode(wc))
return charlen - 8;
else
return charlen;
} else
#endif
{
*crep_len = 1;
*crep = *c;
}
return crep;
return 1;
}
/* This function is equivalent to wcwidth() for multibyte characters. */

View File

@ -188,7 +188,7 @@ bool is_punct_mbchar(const char *c);
bool is_word_mbchar(const char *c, bool allow_punct);
char control_rep(const signed char c);
char control_mbrep(const char *c);
char *mbrep(const char *c, char *crep, int *crep_len);
int length_of_char(const char *c);
int mbwidth(const char *c);
int mb_cur_max(void);
char *make_mbchar(long chr, int *chr_mb_len);

View File

@ -1780,6 +1780,8 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
}
while (*buf != '\0') {
int charlength;
if (*buf == ' ') {
/* Show a space as a visible character, or as a space. */
#ifndef NANO_TINY
@ -1792,6 +1794,8 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
#endif
converted[index++] = ' ';
start_col++;
buf++;
continue;
} else if (*buf == '\t') {
/* Show a tab as a visible character, or as as a space. */
#ifndef NANO_TINY
@ -1809,30 +1813,46 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
converted[index++] = ' ';
start_col++;
}
buf++;
continue;
}
charlength = length_of_char(buf);
/* If buf contains a control character, represent it. */
} else if (is_cntrl_mbchar(buf)) {
if (is_cntrl_mbchar(buf)) {
converted[index++] = '^';
converted[index++] = control_mbrep(buf);
start_col += 2;
/* If buf contains a non-control character, interpret it. If buf
* contains an invalid multibyte sequence, display it as such. */
} else {
char *character = charalloc(mb_cur_max());
int charlen, i;
character = mbrep(buf, character, &charlen);
for (i = 0; i < charlen; i++)
converted[index++] = character[i];
start_col += mbwidth(character);
free(character);
if (mbwidth(buf) > 1)
seen_wide = TRUE;
buf += charlength;
continue;
}
buf += parse_mbchar(buf, NULL, NULL);
/* If buf contains a valid non-control character, simply copy it. */
if (charlength > 0) {
int width = mbwidth(buf);
for (; charlength > 0; charlength--)
converted[index++] = *(buf++);
start_col += width;
if (width > 1)
seen_wide = TRUE;
continue;
}
/* Represent an invalid sequence with the Replacement Character. */
converted[index++] = '\xEF';
converted[index++] = '\xBF';
converted[index++] = '\xBD';
start_col += 1;
buf++;
/* For invalid codepoints, skip extra bytes. */
if (charlength < -1)
buf += charlength + 7;
}
/* Null-terminate converted. */