From 20058a1b639812ca235fbbd9798fd5f922d37a0b Mon Sep 17 00:00:00 2001 From: Benno Schulenberg Date: Tue, 2 Aug 2016 22:09:22 +0200 Subject: [PATCH] spelling: don't consider digits as word parts, because GNU spell doesn't This fixes https://savannah.gnu.org/bugs/?48660. --- src/chars.c | 20 ++++++++++++++++++++ src/proto.h | 1 + src/utils.c | 11 +++++------ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/chars.c b/src/chars.c index cd19c4ae..2c3c2038 100644 --- a/src/chars.c +++ b/src/chars.c @@ -93,6 +93,26 @@ void wctomb_reset(void) IGNORE_CALL_RESULT(wctomb(NULL, 0)); } +/* This function is equivalent to isalpha() for multibyte characters. */ +bool is_alpha_mbchar(const char *c) +{ + assert(c != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + wchar_t wc; + + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); + return 0; + } + + return iswalpha(wc); + } else +#endif + return isalpha((unsigned char)*c); +} + /* This function is equivalent to isalnum() for multibyte characters. */ bool is_alnum_mbchar(const char *c) { diff --git a/src/proto.h b/src/proto.h index d7ae25d8..039ceb7f 100644 --- a/src/proto.h +++ b/src/proto.h @@ -183,6 +183,7 @@ bool nisblank(int c); bool niswblank(wchar_t wc); #endif bool is_byte(int c); +bool is_alpha_mbchar(const char *c); bool is_alnum_mbchar(const char *c); bool is_blank_mbchar(const char *c); bool is_ascii_cntrl_char(int c); diff --git a/src/utils.c b/src/utils.c index 470f15bb..67d90d89 100644 --- a/src/utils.c +++ b/src/utils.c @@ -290,12 +290,11 @@ bool is_separate_word(size_t position, size_t length, const char *buf) parse_mbchar(buf + move_mbleft(buf, position), before, NULL); parse_mbchar(buf + word_end, after, NULL); - /* If we're at the beginning of the line or the character before the - * word isn't a non-punctuation "word" character, and if we're at - * the end of the line or the character after the word isn't a - * non-punctuation "word" character, we have a whole word. */ - retval = (position == 0 || !is_alnum_mbchar(before)) && - (word_end == strlen(buf) || !is_alnum_mbchar(after)); + /* If the word starts at the beginning of the line OR the character before + * the word isn't a letter, and if the word ends at the end of the line OR + * the character after the word isn't a letter, we have a whole word. */ + retval = (position == 0 || !is_alpha_mbchar(before)) && + (word_end == strlen(buf) || !is_alpha_mbchar(after)); free(before); free(after);