tweaks: avoid parsing a multibyte character twice
The number of bytes in the character were determined twice: first in mbwidth() and then in char_length(). Do it just once, in mbtowide(). Also, avoid calling is_cntrl_char(), because it does unneeded checks when we already know that the high bit is set. This duplicates some code, but advance_over() is called a lot, so it is important that it is as fast as possible. This shouldn't slow down plain ASCII, as the extra checks (use_utf8 and *string < 0xA0) are done only for non-ASCII (apart from DEL).master
parent
f11931a0dd
commit
78f92e044a
29
src/chars.c
29
src/chars.c
|
@ -334,13 +334,26 @@ int collect_char(const char *string, char *thechar)
|
|||
int advance_over(const char *string, size_t *column)
|
||||
{
|
||||
#ifdef ENABLE_UTF8
|
||||
if ((signed char)*string < 0) {
|
||||
if (is_cntrl_char(string))
|
||||
if ((signed char)*string < 0 && use_utf8) {
|
||||
/* A UTF-8 upper control code has two bytes and takes two columns. */
|
||||
if (((unsigned char)string[0] == 0xC2 && (signed char)string[1] < -96)) {
|
||||
*column += 2;
|
||||
else
|
||||
*column += mbwidth(string);
|
||||
return 2;
|
||||
} else {
|
||||
wchar_t wc;
|
||||
int charlen = mbtowide(&wc, string);
|
||||
|
||||
return char_length(string);
|
||||
if (charlen < 0) {
|
||||
*column += 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int width = wcwidth(wc);
|
||||
|
||||
*column += (width < 0) ? 1 : width;
|
||||
|
||||
return charlen;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -349,12 +362,8 @@ int advance_over(const char *string, size_t *column)
|
|||
*column += tabsize - *column % tabsize;
|
||||
else
|
||||
*column += 2;
|
||||
} else if (*string == 0x7F)
|
||||
} else if (0x7E < (unsigned char)*string && (unsigned char)*string < 0xA0)
|
||||
*column += 2;
|
||||
#ifndef ENABLE_UTF8
|
||||
else if (0x7F < (unsigned char)*string && (unsigned char)*string < 0xA0)
|
||||
*column += 2;
|
||||
#endif
|
||||
else
|
||||
*column += 1;
|
||||
|
||||
|
|
Loading…
Reference in New Issue