tweaks: avoid parsing a multibyte character twice
The number of bytes in the character were determined twice: first in mbwidth() and then in char_length(). Do it just once, in mbtowide(). Also, avoid calling is_cntrl_char(), because it does unneeded checks when we already know that the high bit is set. This duplicates some code, but advance_over() is called a lot, so it is important that it is as fast as possible. This shouldn't slow down plain ASCII, as the extra checks (use_utf8 and *string < 0xA0) are done only for non-ASCII (apart from DEL).master
parent
f11931a0dd
commit
78f92e044a
29
src/chars.c
29
src/chars.c
|
@ -334,13 +334,26 @@ int collect_char(const char *string, char *thechar)
|
||||||
int advance_over(const char *string, size_t *column)
|
int advance_over(const char *string, size_t *column)
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_UTF8
|
#ifdef ENABLE_UTF8
|
||||||
if ((signed char)*string < 0) {
|
if ((signed char)*string < 0 && use_utf8) {
|
||||||
if (is_cntrl_char(string))
|
/* A UTF-8 upper control code has two bytes and takes two columns. */
|
||||||
|
if (((unsigned char)string[0] == 0xC2 && (signed char)string[1] < -96)) {
|
||||||
*column += 2;
|
*column += 2;
|
||||||
else
|
return 2;
|
||||||
*column += mbwidth(string);
|
} else {
|
||||||
|
wchar_t wc;
|
||||||
|
int charlen = mbtowide(&wc, string);
|
||||||
|
|
||||||
return char_length(string);
|
if (charlen < 0) {
|
||||||
|
*column += 1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int width = wcwidth(wc);
|
||||||
|
|
||||||
|
*column += (width < 0) ? 1 : width;
|
||||||
|
|
||||||
|
return charlen;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -349,12 +362,8 @@ int advance_over(const char *string, size_t *column)
|
||||||
*column += tabsize - *column % tabsize;
|
*column += tabsize - *column % tabsize;
|
||||||
else
|
else
|
||||||
*column += 2;
|
*column += 2;
|
||||||
} else if (*string == 0x7F)
|
} else if (0x7E < (unsigned char)*string && (unsigned char)*string < 0xA0)
|
||||||
*column += 2;
|
*column += 2;
|
||||||
#ifndef ENABLE_UTF8
|
|
||||||
else if (0x7F < (unsigned char)*string && (unsigned char)*string < 0xA0)
|
|
||||||
*column += 2;
|
|
||||||
#endif
|
|
||||||
else
|
else
|
||||||
*column += 1;
|
*column += 1;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue