search: make the \b and \B anchors work correctly in both directions
That is: remove the special treatment of BOW anchors, and instead make regexes match against the whole line instead of against an artificially shortened one, because the latter method creates ghost matches: matches at the starting point of the search that aren't really matches when seen in the context of the whole line. This fixes https://savannah.gnu.org/bugs/?50030.master
parent
4ed3591703
commit
64aa8757a8
26
src/search.c
26
src/search.c
|
@ -38,8 +38,6 @@ static bool history_changed = FALSE;
|
|||
#ifdef HAVE_REGEX_H
|
||||
static bool regexp_compiled = FALSE;
|
||||
/* Have we compiled any regular expressions? */
|
||||
static bool bow_anchored = FALSE;
|
||||
/* Whether a regex starts with a beginning-of-word anchor. */
|
||||
|
||||
/* Compile the given regular expression and store it in search_regexp.
|
||||
* Return TRUE if the expression is valid, and FALSE otherwise. */
|
||||
|
@ -62,10 +60,6 @@ bool regexp_init(const char *regexp)
|
|||
|
||||
regexp_compiled = TRUE;
|
||||
|
||||
/* Remember whether the regex starts with a beginning-of-word anchor. */
|
||||
bow_anchored = (strncmp(regexp, "\\<", 2) == 0 ||
|
||||
strncmp(regexp, "\\b", 2) == 0);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
@ -302,24 +296,8 @@ int findnextstr(const char *needle, bool whole_word_only, size_t *match_len,
|
|||
if (found != NULL) {
|
||||
#ifdef HAVE_REGEX_H
|
||||
/* When doing a regex search, compute the length of the match. */
|
||||
if (ISSET(USE_REGEXP)) {
|
||||
if (ISSET(USE_REGEXP))
|
||||
found_len = regmatches[0].rm_eo - regmatches[0].rm_so;
|
||||
|
||||
/* If the regex starts with a BOW anchor, check that the found
|
||||
* match actually is the start of a word. If not, continue. */
|
||||
if (bow_anchored && found != line->data) {
|
||||
size_t before = move_mbleft(line->data, found - line->data);
|
||||
|
||||
/* If a word char is before the match, skip this match. */
|
||||
if (is_word_mbchar(line->data + before, FALSE)) {
|
||||
if (ISSET(BACKWARDS_SEARCH))
|
||||
from = line->data + before;
|
||||
else
|
||||
from = found + move_mbright(found, 0);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifndef DISABLE_SPELLER
|
||||
/* When we're spell checking, a match should be a separate word;
|
||||
|
@ -531,7 +509,7 @@ int replace_regexp(char *string, bool create)
|
|||
* subexpression match to the new line. */
|
||||
if (create) {
|
||||
strncpy(string, openfile->current->data +
|
||||
openfile->current_x + regmatches[num].rm_so, i);
|
||||
regmatches[num].rm_so, i);
|
||||
string += i;
|
||||
}
|
||||
}
|
||||
|
|
75
src/utils.c
75
src/utils.c
|
@ -315,41 +315,66 @@ bool is_separate_word(size_t position, size_t length, const char *buf)
|
|||
}
|
||||
#endif /* !DISABLE_SPELLER */
|
||||
|
||||
/* If we are searching backwards, we will find the last match that
|
||||
* starts no later than start. Otherwise we find the first match
|
||||
* starting no earlier than start. If we are doing a regexp search, we
|
||||
* fill in the global variable regmatches with at most 9 subexpression
|
||||
* matches. Also, all .rm_so elements are relative to the start of the
|
||||
* whole match, so regmatches[0].rm_so == 0. */
|
||||
/* Return the position of the needle in the haystack, or NULL if not found.
|
||||
* When searching backwards, we will find the last match that starts no later
|
||||
* than the given start; otherwise, we find the first match starting no earlier
|
||||
* than start. If we are doing a regexp search, and we find a match, we fill
|
||||
* in the global variable regmatches with at most 9 subexpression matches. */
|
||||
const char *strstrwrapper(const char *haystack, const char *needle,
|
||||
const char *start)
|
||||
{
|
||||
#ifdef HAVE_REGEX_H
|
||||
if (ISSET(USE_REGEXP)) {
|
||||
if (ISSET(BACKWARDS_SEARCH)) {
|
||||
if (regexec(&search_regexp, haystack, 1, regmatches, 0) == 0 &&
|
||||
haystack + regmatches[0].rm_so <= start) {
|
||||
const char *retval = haystack + regmatches[0].rm_so;
|
||||
size_t last_find, ceiling, far_end;
|
||||
size_t floor = 0, next_rung = 0;
|
||||
/* The start of the search range, and the next start. */
|
||||
|
||||
/* Search forward until there are no more matches. */
|
||||
while (regexec(&search_regexp, retval + 1, 1,
|
||||
regmatches, REG_NOTBOL) == 0 &&
|
||||
retval + regmatches[0].rm_so + 1 <= start)
|
||||
retval += regmatches[0].rm_so + 1;
|
||||
/* Finally, put the subexpression matches in global
|
||||
* variable regmatches. The REG_NOTBOL flag doesn't
|
||||
* matter now. */
|
||||
regexec(&search_regexp, retval, 10, regmatches, 0);
|
||||
return retval;
|
||||
if (regexec(&search_regexp, haystack, 1, regmatches, 0) != 0)
|
||||
return NULL;
|
||||
|
||||
far_end = strlen(haystack);
|
||||
ceiling = start - haystack;
|
||||
last_find = regmatches[0].rm_so;
|
||||
|
||||
/* A result beyond the search range also means: no match. */
|
||||
if (last_find > ceiling)
|
||||
return NULL;
|
||||
|
||||
/* Move the start-of-range forward until there is no more match;
|
||||
* then the last match found is the first match backwards. */
|
||||
while (regmatches[0].rm_so <= ceiling) {
|
||||
floor = next_rung;
|
||||
last_find = regmatches[0].rm_so;
|
||||
/* If this is the last possible match, don't try to advance. */
|
||||
if (last_find == ceiling)
|
||||
break;
|
||||
next_rung = move_mbright(haystack, last_find);
|
||||
regmatches[0].rm_so = next_rung;
|
||||
regmatches[0].rm_eo = far_end;
|
||||
if (regexec(&search_regexp, haystack, 1, regmatches,
|
||||
REG_STARTEND) != 0)
|
||||
break;
|
||||
}
|
||||
} else if (regexec(&search_regexp, start, 10, regmatches,
|
||||
(start > haystack) ? REG_NOTBOL : 0) == 0) {
|
||||
const char *retval = start + regmatches[0].rm_so;
|
||||
|
||||
regexec(&search_regexp, retval, 10, regmatches, 0);
|
||||
return retval;
|
||||
/* Find the last match again, to get possible submatches. */
|
||||
regmatches[0].rm_so = floor;
|
||||
regmatches[0].rm_eo = far_end;
|
||||
if (regexec(&search_regexp, haystack, 10, regmatches,
|
||||
REG_STARTEND) != 0)
|
||||
statusline(ALERT, "BAD: failed to refind the match!");
|
||||
|
||||
return haystack + regmatches[0].rm_so;
|
||||
}
|
||||
return NULL;
|
||||
|
||||
/* Do a forward regex search from the starting point. */
|
||||
regmatches[0].rm_so = start - haystack;
|
||||
regmatches[0].rm_eo = strlen(haystack);
|
||||
if (regexec(&search_regexp, haystack, 10, regmatches,
|
||||
REG_STARTEND) != 0)
|
||||
return NULL;
|
||||
else
|
||||
return haystack + regmatches[0].rm_so;
|
||||
}
|
||||
#endif /* HAVE_REGEX_H */
|
||||
if (ISSET(CASE_SENSITIVE)) {
|
||||
|
|
Loading…
Reference in New Issue