chars: represent the high-bit controls more intelligibly

Instead of showing the upper control codes like this:

   ^À ^Á ^Â ^Ã ^Ä ^Å ^Æ ^Ç ^È ^É ^Ê ^Ë ^Ì ^Í ^Î ^Ï
   ^Ð ^Ñ ^Ò ^Ó ^Ô ^Õ ^Ö ^× ^Ø ^Ù ^Ú ^Û ^Ü ^Ý ^Þ ^ß

show them like this:

   ^` ^a ^b ^c ^d ^e ^f ^g ^h ^i ^j ^k ^l ^m ^n ^o
   ^p ^q ^r ^s ^t ^u ^v ^w ^x ^y ^z ^{ ^| ^} ^~ ^=

The lower control codes continue to be shown like this:

   ^@ ^A ^B ^C ^D ^E ^F ^G ^H ^I ^J ^K ^L ^M ^N ^O
   ^P ^Q ^R ^S ^T ^U ^V ^W ^X ^Y ^Z ^[ ^\ ^] ^^ ^_

The representation of DEL (0x7F) continues as ^?.

Further, use knowledge of UTF-8 to avoid a roundtrip through
wide characters.
master
Benno Schulenberg 2016-05-30 11:28:16 +02:00
parent 07a39e8e18
commit 03586c60da
2 changed files with 12 additions and 7 deletions

View File

@ -219,7 +219,7 @@ bool is_word_mbchar(const char *c, bool allow_punct)
/* c is a control character. It displays as ^@, ^?, or ^[ch], where ch
* is (c + 64). We return that character. */
char control_rep(char c)
char control_rep(const signed char c)
{
assert(is_cntrl_char(c));
@ -228,6 +228,10 @@ char control_rep(char c)
return '@';
else if (c == NANO_CONTROL_8)
return '?';
else if (c == -97)
return '=';
else if (c < 0)
return c + 224;
else
return c + 64;
}
@ -250,17 +254,18 @@ wchar_t control_wrep(wchar_t wc)
#endif
/* c is a multibyte control character. It displays as ^@, ^?, or ^[ch],
* where ch is (c + 64). We return that multibyte character. */
* where ch is (c + 64). We return that single-byte character. */
char *control_mbrep(const char *c, char *crep, int *crep_len)
{
assert(c != NULL && crep != NULL && crep_len != NULL);
#ifdef ENABLE_UTF8
if (use_utf8) {
wchar_t wc;
IGNORE_CALL_RESULT(mbtowc(&wc, c, MB_CUR_MAX));
*crep_len = wctomb(crep, control_wrep(wc));
if (0 <= c[0] && c[0] <= 127)
*crep = control_rep(c[0]);
else
*crep = control_rep(c[1]);
*crep_len = 1;
} else
#endif
{

View File

@ -189,7 +189,7 @@ bool is_cntrl_wchar(wchar_t wc);
bool is_cntrl_mbchar(const char *c);
bool is_punct_mbchar(const char *c);
bool is_word_mbchar(const char *c, bool allow_punct);
char control_rep(char c);
char control_rep(const signed char c);
#ifdef ENABLE_UTF8
wchar_t control_wrep(wchar_t wc);
#endif