diff --git a/ChangeLog b/ChangeLog index 09775a4f..35f07f73 100644 --- a/ChangeLog +++ b/ChangeLog @@ -70,6 +70,11 @@ CVS code - do_cursorpos(). (DLR) - Change the NANO_WIDE #define to ENABLE_UTF8, as the latter is clearer. (DLR) + - Properly handle cases where a search string and a match to it + are not the same length in bytes, i.e, when the latter + contains invalid multibyte characters interpreted as normal + characters. Changes to mbstrncasecmp(), mbstrcasestr(), + mbrevstrcasestr(), findnextstr(), and do_replace_loop(). (DLR) - files.c: open_file() - Assert that filename isn't NULL, and don't do anything special diff --git a/src/chars.c b/src/chars.c index 0d3a9ddf..2acbe146 100644 --- a/src/chars.c +++ b/src/chars.c @@ -475,7 +475,6 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n) assert(s1 != NULL && s2 != NULL); while (n > 0 && *s1 != '\0' && *s2 != '\0') { - bool bad_s1_mb = FALSE, bad_s2_mb = FALSE; int s1_mb_len, s2_mb_len; s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL); @@ -483,7 +482,6 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n) if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) { mbtowc(NULL, NULL, 0); ws1 = (unsigned char)*s1_mb; - bad_s1_mb = TRUE; } s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL); @@ -491,11 +489,9 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n) if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) { mbtowc(NULL, NULL, 0); ws2 = (unsigned char)*s2_mb; - bad_s2_mb = TRUE; } - if (n == 0 || bad_s1_mb != bad_s2_mb || - towlower(ws1) != towlower(ws2)) + if (n == 0 || towlower(ws1) != towlower(ws2)) break; s1 += s1_mb_len; @@ -550,14 +546,11 @@ const char *mbstrcasestr(const char *haystack, const char *needle) int r_mb_len, q_mb_len; while (*q != '\0') { - bool bad_r_mb = FALSE, bad_q_mb = FALSE; - r_mb_len = parse_mbchar(r, r_mb, NULL, NULL); if (mbtowc(&wr, r_mb, r_mb_len) <= 0) { mbtowc(NULL, NULL, 0); wr = (unsigned char)*r; - bad_r_mb = TRUE; } q_mb_len = parse_mbchar(q, q_mb, NULL, NULL); @@ -565,11 +558,9 @@ const char *mbstrcasestr(const char *haystack, const char *needle) if (mbtowc(&wq, q_mb, q_mb_len) <= 0) { mbtowc(NULL, NULL, 0); wq = (unsigned char)*q; - bad_q_mb = TRUE; } - if (bad_r_mb != bad_q_mb || - towlower(wr) != towlower(wq)) + if (towlower(wr) != towlower(wq)) break; r += r_mb_len; @@ -656,14 +647,11 @@ const char *mbrevstrcasestr(const char *haystack, const char *needle, int r_mb_len, q_mb_len; while (*q != '\0') { - bool bad_r_mb = FALSE, bad_q_mb = FALSE; - r_mb_len = parse_mbchar(r, r_mb, NULL, NULL); if (mbtowc(&wr, r_mb, r_mb_len) <= 0) { mbtowc(NULL, NULL, 0); wr = (unsigned char)*r; - bad_r_mb = TRUE; } q_mb_len = parse_mbchar(q, q_mb, NULL, NULL); @@ -671,11 +659,9 @@ const char *mbrevstrcasestr(const char *haystack, const char *needle, if (mbtowc(&wq, q_mb, q_mb_len) <= 0) { mbtowc(NULL, NULL, 0); wq = (unsigned char)*q; - bad_q_mb = TRUE; } - if (bad_r_mb != bad_q_mb || - towlower(wr) != towlower(wq)) + if (towlower(wr) != towlower(wq)) break; r += r_mb_len; diff --git a/src/search.c b/src/search.c index 0fc9e8cf..f9c94720 100644 --- a/src/search.c +++ b/src/search.c @@ -321,12 +321,21 @@ bool findnextstr(bool can_display_wrap, bool wholeword, bool /* Is this potential match a whole word? */ /* Set found_len to the length of the potential match. */ - found_len = #ifdef HAVE_REGEX_H - ISSET(USE_REGEXP) ? - regmatches[0].rm_eo - regmatches[0].rm_so : + if (ISSET(USE_REGEXP)) + found_len = regmatches[0].rm_eo - regmatches[0].rm_so; + else #endif - strlen(needle); + { + size_t needle_len = mbstrlen(needle); + + /* Get found's length in single-byte characters. */ + found_len = 0; + + for (; needle_len > 0; needle_len--) + found_len += parse_mbchar(found + found_len, NULL, + NULL, NULL); + } /* If we're searching for whole words, see if this potential * match is a whole word. */ @@ -784,13 +793,20 @@ ssize_t do_replace_loop(const char *needle, const filestruct #endif if (i > 0 || replaceall) { /* Yes, replace it!!!! */ - char *copy; + char *match, *copy; size_t length_change; if (i == 2) replaceall = TRUE; - copy = replace_line(needle); + /* Get the match's length in single-byte characters. */ + match = mallocstrncpy(NULL, openfile->current->data + + openfile->current_x, match_len + 1); + match[match_len] = '\0'; + + copy = replace_line(match); + + free(match); length_change = strlen(copy) - strlen(openfile->current->data);