properly handle cases where a search string and a match to it are not
the same length in bytes, i.e, when the latter contains invalid multibyte characters interpreted as normal characters git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2888 35c25a1d-7b9e-4130-9fde-d3aeb78583b8master
parent
091068982d
commit
a0aa4df041
|
@ -70,6 +70,11 @@ CVS code -
|
||||||
do_cursorpos(). (DLR)
|
do_cursorpos(). (DLR)
|
||||||
- Change the NANO_WIDE #define to ENABLE_UTF8, as the latter is
|
- Change the NANO_WIDE #define to ENABLE_UTF8, as the latter is
|
||||||
clearer. (DLR)
|
clearer. (DLR)
|
||||||
|
- Properly handle cases where a search string and a match to it
|
||||||
|
are not the same length in bytes, i.e, when the latter
|
||||||
|
contains invalid multibyte characters interpreted as normal
|
||||||
|
characters. Changes to mbstrncasecmp(), mbstrcasestr(),
|
||||||
|
mbrevstrcasestr(), findnextstr(), and do_replace_loop(). (DLR)
|
||||||
- files.c:
|
- files.c:
|
||||||
open_file()
|
open_file()
|
||||||
- Assert that filename isn't NULL, and don't do anything special
|
- Assert that filename isn't NULL, and don't do anything special
|
||||||
|
|
20
src/chars.c
20
src/chars.c
|
@ -475,7 +475,6 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
|
||||||
assert(s1 != NULL && s2 != NULL);
|
assert(s1 != NULL && s2 != NULL);
|
||||||
|
|
||||||
while (n > 0 && *s1 != '\0' && *s2 != '\0') {
|
while (n > 0 && *s1 != '\0' && *s2 != '\0') {
|
||||||
bool bad_s1_mb = FALSE, bad_s2_mb = FALSE;
|
|
||||||
int s1_mb_len, s2_mb_len;
|
int s1_mb_len, s2_mb_len;
|
||||||
|
|
||||||
s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL);
|
s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL);
|
||||||
|
@ -483,7 +482,6 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
|
||||||
if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) {
|
if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) {
|
||||||
mbtowc(NULL, NULL, 0);
|
mbtowc(NULL, NULL, 0);
|
||||||
ws1 = (unsigned char)*s1_mb;
|
ws1 = (unsigned char)*s1_mb;
|
||||||
bad_s1_mb = TRUE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL);
|
s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL);
|
||||||
|
@ -491,11 +489,9 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
|
||||||
if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) {
|
if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) {
|
||||||
mbtowc(NULL, NULL, 0);
|
mbtowc(NULL, NULL, 0);
|
||||||
ws2 = (unsigned char)*s2_mb;
|
ws2 = (unsigned char)*s2_mb;
|
||||||
bad_s2_mb = TRUE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n == 0 || bad_s1_mb != bad_s2_mb ||
|
if (n == 0 || towlower(ws1) != towlower(ws2))
|
||||||
towlower(ws1) != towlower(ws2))
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
s1 += s1_mb_len;
|
s1 += s1_mb_len;
|
||||||
|
@ -550,14 +546,11 @@ const char *mbstrcasestr(const char *haystack, const char *needle)
|
||||||
int r_mb_len, q_mb_len;
|
int r_mb_len, q_mb_len;
|
||||||
|
|
||||||
while (*q != '\0') {
|
while (*q != '\0') {
|
||||||
bool bad_r_mb = FALSE, bad_q_mb = FALSE;
|
|
||||||
|
|
||||||
r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
|
r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
|
||||||
|
|
||||||
if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
|
if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
|
||||||
mbtowc(NULL, NULL, 0);
|
mbtowc(NULL, NULL, 0);
|
||||||
wr = (unsigned char)*r;
|
wr = (unsigned char)*r;
|
||||||
bad_r_mb = TRUE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
|
q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
|
||||||
|
@ -565,11 +558,9 @@ const char *mbstrcasestr(const char *haystack, const char *needle)
|
||||||
if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
|
if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
|
||||||
mbtowc(NULL, NULL, 0);
|
mbtowc(NULL, NULL, 0);
|
||||||
wq = (unsigned char)*q;
|
wq = (unsigned char)*q;
|
||||||
bad_q_mb = TRUE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bad_r_mb != bad_q_mb ||
|
if (towlower(wr) != towlower(wq))
|
||||||
towlower(wr) != towlower(wq))
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
r += r_mb_len;
|
r += r_mb_len;
|
||||||
|
@ -656,14 +647,11 @@ const char *mbrevstrcasestr(const char *haystack, const char *needle,
|
||||||
int r_mb_len, q_mb_len;
|
int r_mb_len, q_mb_len;
|
||||||
|
|
||||||
while (*q != '\0') {
|
while (*q != '\0') {
|
||||||
bool bad_r_mb = FALSE, bad_q_mb = FALSE;
|
|
||||||
|
|
||||||
r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
|
r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
|
||||||
|
|
||||||
if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
|
if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
|
||||||
mbtowc(NULL, NULL, 0);
|
mbtowc(NULL, NULL, 0);
|
||||||
wr = (unsigned char)*r;
|
wr = (unsigned char)*r;
|
||||||
bad_r_mb = TRUE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
|
q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
|
||||||
|
@ -671,11 +659,9 @@ const char *mbrevstrcasestr(const char *haystack, const char *needle,
|
||||||
if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
|
if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
|
||||||
mbtowc(NULL, NULL, 0);
|
mbtowc(NULL, NULL, 0);
|
||||||
wq = (unsigned char)*q;
|
wq = (unsigned char)*q;
|
||||||
bad_q_mb = TRUE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bad_r_mb != bad_q_mb ||
|
if (towlower(wr) != towlower(wq))
|
||||||
towlower(wr) != towlower(wq))
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
r += r_mb_len;
|
r += r_mb_len;
|
||||||
|
|
28
src/search.c
28
src/search.c
|
@ -321,12 +321,21 @@ bool findnextstr(bool can_display_wrap, bool wholeword, bool
|
||||||
/* Is this potential match a whole word? */
|
/* Is this potential match a whole word? */
|
||||||
|
|
||||||
/* Set found_len to the length of the potential match. */
|
/* Set found_len to the length of the potential match. */
|
||||||
found_len =
|
|
||||||
#ifdef HAVE_REGEX_H
|
#ifdef HAVE_REGEX_H
|
||||||
ISSET(USE_REGEXP) ?
|
if (ISSET(USE_REGEXP))
|
||||||
regmatches[0].rm_eo - regmatches[0].rm_so :
|
found_len = regmatches[0].rm_eo - regmatches[0].rm_so;
|
||||||
|
else
|
||||||
#endif
|
#endif
|
||||||
strlen(needle);
|
{
|
||||||
|
size_t needle_len = mbstrlen(needle);
|
||||||
|
|
||||||
|
/* Get found's length in single-byte characters. */
|
||||||
|
found_len = 0;
|
||||||
|
|
||||||
|
for (; needle_len > 0; needle_len--)
|
||||||
|
found_len += parse_mbchar(found + found_len, NULL,
|
||||||
|
NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
/* If we're searching for whole words, see if this potential
|
/* If we're searching for whole words, see if this potential
|
||||||
* match is a whole word. */
|
* match is a whole word. */
|
||||||
|
@ -784,13 +793,20 @@ ssize_t do_replace_loop(const char *needle, const filestruct
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (i > 0 || replaceall) { /* Yes, replace it!!!! */
|
if (i > 0 || replaceall) { /* Yes, replace it!!!! */
|
||||||
char *copy;
|
char *match, *copy;
|
||||||
size_t length_change;
|
size_t length_change;
|
||||||
|
|
||||||
if (i == 2)
|
if (i == 2)
|
||||||
replaceall = TRUE;
|
replaceall = TRUE;
|
||||||
|
|
||||||
copy = replace_line(needle);
|
/* Get the match's length in single-byte characters. */
|
||||||
|
match = mallocstrncpy(NULL, openfile->current->data +
|
||||||
|
openfile->current_x, match_len + 1);
|
||||||
|
match[match_len] = '\0';
|
||||||
|
|
||||||
|
copy = replace_line(match);
|
||||||
|
|
||||||
|
free(match);
|
||||||
|
|
||||||
length_change = strlen(copy) -
|
length_change = strlen(copy) -
|
||||||
strlen(openfile->current->data);
|
strlen(openfile->current->data);
|
||||||
|
|
Loading…
Reference in New Issue