chars: speed up the determination whether something is a control character
Use knowledge of UTF-8 instead of converting to wide characters first.master
parent
019d7b34ca
commit
af53c56ec8
23
src/chars.c
23
src/chars.c
|
@ -150,20 +150,9 @@ bool is_ascii_cntrl_char(int c)
|
||||||
* handles high-bit control characters. */
|
* handles high-bit control characters. */
|
||||||
bool is_cntrl_char(int c)
|
bool is_cntrl_char(int c)
|
||||||
{
|
{
|
||||||
return (-128 <= c && c < -96) || (0 <= c && c < 32) ||
|
return ((c & 0x60) == 0 || c == 127);
|
||||||
(127 <= c && c < 160);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef ENABLE_UTF8
|
|
||||||
/* This function is equivalent to iscntrl() for wide characters, except
|
|
||||||
* in that it also handles wide control characters with their high bits
|
|
||||||
* set. */
|
|
||||||
bool is_cntrl_wchar(wchar_t wc)
|
|
||||||
{
|
|
||||||
return (0 <= wc && wc < 32) || (127 <= wc && wc < 160);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* This function is equivalent to iscntrl() for multibyte characters,
|
/* This function is equivalent to iscntrl() for multibyte characters,
|
||||||
* except in that it also handles multibyte control characters with
|
* except in that it also handles multibyte control characters with
|
||||||
* their high bits set. */
|
* their high bits set. */
|
||||||
|
@ -173,14 +162,8 @@ bool is_cntrl_mbchar(const char *c)
|
||||||
|
|
||||||
#ifdef ENABLE_UTF8
|
#ifdef ENABLE_UTF8
|
||||||
if (use_utf8) {
|
if (use_utf8) {
|
||||||
wchar_t wc;
|
return ((c[0] & 0xE0) == 0 || c[0] == 127 ||
|
||||||
|
((signed char)c[0] == -62 && (signed char)c[1] < -96));
|
||||||
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
|
|
||||||
mbtowc_reset();
|
|
||||||
wc = bad_wchar;
|
|
||||||
}
|
|
||||||
|
|
||||||
return is_cntrl_wchar(wc);
|
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
return is_cntrl_char((unsigned char)*c);
|
return is_cntrl_char((unsigned char)*c);
|
||||||
|
|
|
@ -183,9 +183,6 @@ bool is_alnum_mbchar(const char *c);
|
||||||
bool is_blank_mbchar(const char *c);
|
bool is_blank_mbchar(const char *c);
|
||||||
bool is_ascii_cntrl_char(int c);
|
bool is_ascii_cntrl_char(int c);
|
||||||
bool is_cntrl_char(int c);
|
bool is_cntrl_char(int c);
|
||||||
#ifdef ENABLE_UTF8
|
|
||||||
bool is_cntrl_wchar(wchar_t wc);
|
|
||||||
#endif
|
|
||||||
bool is_cntrl_mbchar(const char *c);
|
bool is_cntrl_mbchar(const char *c);
|
||||||
bool is_punct_mbchar(const char *c);
|
bool is_punct_mbchar(const char *c);
|
||||||
bool is_word_mbchar(const char *c, bool allow_punct);
|
bool is_word_mbchar(const char *c, bool allow_punct);
|
||||||
|
|
Loading…
Reference in New Issue