Reverting my own patch that claimed that UTF8 is a stateless encoding.

Apparently there is /some/ state somewhere after all.  Don't have time
now to figure out where exactly.


git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@5369 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
master
Benno Schulenberg 2015-09-04 19:34:55 +00:00
parent 459d9f8ba8
commit 9205c28865
2 changed files with 48 additions and 10 deletions

View File

@ -1,3 +1,7 @@
2015-09-04 Benno Schulenberg <bensberg@justemail.net>
* src/chars.c: Reverting r5354 from August 12. This fixes Savannah
bug #45874. Apparently there is /some/ state somewhere after all.
2015-08-29 Benno Schulenberg <bensberg@justemail.net>
* doc/syntax/autoconf.nanor: New file; syntax colouring for Autoconf.
* src/files.c: Rewrap some lines and tweak two comments.

View File

@ -91,6 +91,16 @@ bool is_byte(int c)
return ((unsigned int)c == (unsigned char)c);
}
void mbtowc_reset(void)
{
IGNORE_CALL_RESULT(mbtowc(NULL, NULL, 0));
}
void wctomb_reset(void)
{
IGNORE_CALL_RESULT(wctomb(NULL, 0));
}
/* This function is equivalent to isalnum() for multibyte characters. */
bool is_alnum_mbchar(const char *c)
{
@ -100,8 +110,10 @@ bool is_alnum_mbchar(const char *c)
if (use_utf8) {
wchar_t wc;
if (mbtowc(&wc, c, MB_CUR_MAX) < 0)
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc_reset();
wc = bad_wchar;
}
return iswalnum(wc);
} else
@ -118,8 +130,10 @@ bool is_blank_mbchar(const char *c)
if (use_utf8) {
wchar_t wc;
if (mbtowc(&wc, c, MB_CUR_MAX) < 0)
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc_reset();
wc = bad_wchar;
}
return iswblank(wc);
} else
@ -163,8 +177,10 @@ bool is_cntrl_mbchar(const char *c)
if (use_utf8) {
wchar_t wc;
if (mbtowc(&wc, c, MB_CUR_MAX) < 0)
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc_reset();
wc = bad_wchar;
}
return is_cntrl_wchar(wc);
} else
@ -181,8 +197,10 @@ bool is_punct_mbchar(const char *c)
if (use_utf8) {
wchar_t wc;
if (mbtowc(&wc, c, MB_CUR_MAX) < 0)
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc_reset();
wc = bad_wchar;
}
return iswpunct(wc);
} else
@ -246,13 +264,16 @@ char *control_mbrep(const char *c, char *crep, int *crep_len)
wchar_t wc;
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc_reset();
*crep_len = bad_mbchar_len;
strncpy(crep, bad_mbchar, *crep_len);
} else {
*crep_len = wctomb(crep, control_wrep(wc));
if (*crep_len < 0)
if (*crep_len < 0) {
wctomb_reset();
*crep_len = 0;
}
}
} else {
#endif
@ -278,13 +299,16 @@ char *mbrep(const char *c, char *crep, int *crep_len)
/* Reject invalid Unicode characters. */
if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) {
mbtowc_reset();
*crep_len = bad_mbchar_len;
strncpy(crep, bad_mbchar, *crep_len);
} else {
*crep_len = wctomb(crep, wc);
if (*crep_len < 0)
if (*crep_len < 0) {
wctomb_reset();
*crep_len = 0;
}
}
} else {
#endif
@ -307,13 +331,17 @@ int mbwidth(const char *c)
wchar_t wc;
int width;
if (mbtowc(&wc, c, MB_CUR_MAX) < 0)
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc_reset();
wc = bad_wchar;
}
width = wcwidth(wc);
if (width == -1)
width = wcwidth(bad_wchar);
if (width == -1) {
wc = bad_wchar;
width = wcwidth(wc);
}
return width;
} else
@ -348,8 +376,10 @@ char *make_mbchar(long chr, int *chr_mb_len)
*chr_mb_len = wctomb(chr_mb, (wchar_t)chr);
/* Reject invalid Unicode characters. */
if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr))
if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) {
wctomb_reset();
*chr_mb_len = 0;
}
} else {
#endif
*chr_mb_len = 1;
@ -538,6 +568,7 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
s1_mb_len = parse_mbchar(s1, s1_mb, NULL);
if (mbtowc(&ws1, s1_mb, s1_mb_len) < 0) {
mbtowc_reset();
ws1 = (unsigned char)*s1_mb;
bad_s1_mb = TRUE;
}
@ -545,6 +576,7 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
s2_mb_len = parse_mbchar(s2, s2_mb, NULL);
if (mbtowc(&ws2, s2_mb, s2_mb_len) < 0) {
mbtowc_reset();
ws2 = (unsigned char)*s2_mb;
bad_s2_mb = TRUE;
}
@ -773,6 +805,7 @@ char *mbstrchr(const char *s, const char *c)
wchar_t ws, wc;
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc_reset();
wc = (unsigned char)*c;
bad_c_mb = TRUE;
}
@ -781,6 +814,7 @@ char *mbstrchr(const char *s, const char *c)
int s_mb_len = parse_mbchar(s, s_mb, NULL);
if (mbtowc(&ws, s_mb, s_mb_len) < 0) {
mbtowc_reset();
ws = (unsigned char)*s;
bad_s_mb = TRUE;
}