From 78f92e044a1dcaf2a74f7379fa3307e0a4525523 Mon Sep 17 00:00:00 2001
From: Benno Schulenberg <bensberg@telfort.nl>
Date: Fri, 9 Apr 2021 10:52:29 +0200
Subject: [PATCH] tweaks: avoid parsing a multibyte character twice

The number of bytes in the character were determined twice: first in
mbwidth() and then in char_length().  Do it just once, in mbtowide().

Also, avoid calling is_cntrl_char(), because it does unneeded checks
when we already know that the high bit is set.

This duplicates some code, but advance_over() is called a lot, so it
is important that it is as fast as possible.

This shouldn't slow down plain ASCII, as the extra checks (use_utf8
and *string < 0xA0) are done only for non-ASCII (apart from DEL).
---
 src/chars.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/chars.c b/src/chars.c
index 2a923215..620d2b26 100644
--- a/src/chars.c
+++ b/src/chars.c
@@ -334,13 +334,26 @@ int collect_char(const char *string, char *thechar)
 int advance_over(const char *string, size_t *column)
 {
 #ifdef ENABLE_UTF8
-	if ((signed char)*string < 0) {
-		if (is_cntrl_char(string))
+	if ((signed char)*string < 0 && use_utf8) {
+		/* A UTF-8 upper control code has two bytes and takes two columns. */
+		if (((unsigned char)string[0] == 0xC2 && (signed char)string[1] < -96)) {
 			*column += 2;
-		else
-			*column += mbwidth(string);
+			return 2;
+		} else {
+			wchar_t wc;
+			int charlen = mbtowide(&wc, string);
 
-		return char_length(string);
+			if (charlen < 0) {
+				*column += 1;
+				return 1;
+			}
+
+			int width = wcwidth(wc);
+
+			*column += (width < 0) ? 1 : width;
+
+			return charlen;
+		}
 	}
 #endif
 
@@ -349,12 +362,8 @@ int advance_over(const char *string, size_t *column)
 			*column += tabsize - *column % tabsize;
 		else
 			*column += 2;
-	} else if (*string == 0x7F)
+	} else if (0x7E < (unsigned char)*string && (unsigned char)*string < 0xA0)
 		*column += 2;
-#ifndef ENABLE_UTF8
-	else if (0x7F < (unsigned char)*string && (unsigned char)*string < 0xA0)
-		*column += 2;
-#endif
 	else
 		*column += 1;