tweaks: elide a function that is an amalgam of three others

In addition, the function was used just once, had a weird return value, and now some more code can be excluded from a non-UTF8 build. Make use of the fact that any single-byte character always occupies just one column, and call the costly mbtowc() and wcwidth() only for characters that actually are multibyte.
2019-06-10 19:27:42 +02:00 · 2019-06-10 19:27:42 +02:00 · cd09482231
parent c5955d14ce
commit cd09482231
3 changed files with 38 additions and 52 deletions
--- a/src/chars.c
+++ b/src/chars.c
@ -200,36 +200,6 @@ char control_mbrep(const char *c, bool isdata)
 		return control_rep(*c);
 }

-/* Assess how many bytes the given (multibyte) character occupies.  Return -1
- * if the byte sequence is invalid, and return the number of bytes minus 8
- * when it encodes an invalid codepoint.  Also, in the second parameter,
- * return the number of columns that the character occupies. */
-int length_of_char(const char *c, int *width)
-{
-#ifdef ENABLE_UTF8
-	if (use_utf8 && (signed char)*c < 0) {
-		wchar_t wc;
-		int charlen = mbtowc(&wc, c, MAXCHARLEN);
-
-		/* If the sequence is invalid... */
-		if (charlen < 0)
-			return -1;
-
-		/* If the codepoint is invalid... */
-		if (!is_valid_unicode(wc))
-			return charlen - 8;
-		else {
-			*width = wcwidth(wc);
-			/* If the codepoint is unassigned, assume a width of one. */
-			if (*width < 0)
-				*width = 1;
-			return charlen;
-		}
-	} else
-#endif
-		return 1;
-}
-
 /* This function is equivalent to wcwidth() for multibyte characters. */
 int mbwidth(const char *c)
 {
--- a/src/proto.h
+++ b/src/proto.h
@ -210,7 +210,6 @@ bool is_ascii_cntrl_char(int c);
 bool is_cntrl_mbchar(const char *c);
 bool is_word_mbchar(const char *c, bool allow_punct);
 char control_mbrep(const char *c, bool isdata);
-int length_of_char(const char *c, int *width);
 int mbwidth(const char *c);
 char *make_mbchar(long chr, int *chr_mb_len);
 int char_length(const char *pointer);
--- a/src/winio.c
+++ b/src/winio.c
@ -1922,7 +1922,7 @@ char *display_string(const char *buf, size_t column, size_t span,
 	}

 	while (*buf != '\0' && (column < beyond || mbwidth(buf) == 0)) {
-		int charlength, charwidth = 1;
+		int charlength, charwidth;

 		if (*buf == ' ') {
 			/* Show a space as a visible character, or as a space. */
@ -1961,7 +1961,7 @@ char *display_string(const char *buf, size_t column, size_t span,
 			continue;
 		}

-		charlength = length_of_char(buf, &charwidth);
+		charlength = mblen(buf, MAXCHARLEN);

 		/* If buf contains a control character, represent it. */
 		if (is_cntrl_mbchar(buf)) {
@ -1972,29 +1972,46 @@ char *display_string(const char *buf, size_t column, size_t span,
 			continue;
 		}

-		/* If buf contains a valid non-control character, simply copy it. */
-		if (charlength > 0) {
-			for (; charlength > 0; charlength--)
-				converted[index++] = *(buf++);
-
-			column += charwidth;
-#ifdef USING_OLD_NCURSES
-			if (charwidth > 1)
-				seen_wide = TRUE;
-#endif
+		/* A one-byte character is necessarily one column wide. */
+		if (charlength == 1) {
+			converted[index++] = *(buf++);
+			column++;
 			continue;
 		}

-		/* Represent an invalid starter byte with the Replacement Character. */
-		converted[index++] = '\xEF';
-		converted[index++] = '\xBF';
-		converted[index++] = '\xBD';
-		column++;
-		buf++;
+#ifdef ENABLE_UTF8
+		/* For a multibyte character, check whether it is valid,
+		 * and determine whether it occupies one or two columns. */
+		wchar_t wc;
+		int length = mbtowc(&wc, buf, MAXCHARLEN);

-		/* For invalid codepoints, skip extra bytes. */
-		if (charlength < -1)
-			buf += charlength + 7;
+		if (charlength != length)
+			die("Different character lengths");
+
+		/* When invalid, represent it with the Replacement Character. */
+		if (charlength < 0 || !is_valid_unicode(wc)) {
+			converted[index++] = '\xEF';
+			converted[index++] = '\xBF';
+			converted[index++] = '\xBD';
+			column++;
+			buf += (charlength > 0 ? charlength : 1);
+			continue;
+		}
+
+		/* For any valid character, just copy its bytes. */
+		for (; charlength > 0; charlength--)
+			converted[index++] = *(buf++);
+
+		charwidth = wcwidth(wc);
+
+		/* If the codepoint is unassigned, assume a width of one. */
+		column += (charwidth < 0 ? 1 : charwidth);
+
+#ifdef USING_OLD_NCURSES
+		if (charwidth > 1)
+			seen_wide = TRUE;
+#endif
+#endif /* ENABLE_UTF8 */
 	}

 	/* If there is more text than can be shown, make room for the ">". */