From 9a7ba5db7949e441dcdfa7f445aa2a132d41428a Mon Sep 17 00:00:00 2001 From: Benno Schulenberg Date: Mon, 4 Jun 2018 13:39:54 +0200 Subject: [PATCH] chars: speed up the parsing of a character for the plain ASCII case Again, if the most significant bit of a UTF-8 byte is zero, it means the character is a single byte and we can skip the call of mblen(), *and* if the character is one byte it also occupies just one column, because all ASCII characters are single-column characters -- apart from control codes. This partially addresses https://savannah.gnu.org/bugs/?51491. --- src/chars.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/chars.c b/src/chars.c index bbd8a98a..fcbc9f4f 100644 --- a/src/chars.c +++ b/src/chars.c @@ -294,14 +294,17 @@ int parse_mbchar(const char *buf, char *chr, size_t *col) { #ifdef ENABLE_UTF8 if (use_utf8) { - /* Get the number of bytes in the multibyte character. */ - int length = mblen(buf, MAXCHARLEN); + int length; - /* When the multibyte sequence is invalid, only take the first byte. */ - if (length <= 0) { - IGNORE_CALL_RESULT(mblen(NULL, 0)); + /* If this is a UTF-8 starter byte, get the number of bytes of the character. */ + if ((signed char)*buf < 0) { + length = mblen(buf, MAXCHARLEN); + + /* When the multibyte sequence is invalid, only take the first byte. */ + if (length <= 0) + length = 1; + } else length = 1; - } /* When requested, store the multibyte character in chr. */ if (chr != NULL) { @@ -322,7 +325,9 @@ int parse_mbchar(const char *buf, char *chr, size_t *col) else if (is_cntrl_mbchar(buf)) { *col += 2; /* If we have a normal character, get its width normally. */ - } else + } else if (length == 1) + *col += 1; + else *col += mbwidth(buf); }