chars: reduce searching time with roughly 85 percent for plain ASCII
Make case-insensitive searching in a UTF-8 locale eight times faster when the actual characters involved are plain ASCII. This makes us faster than 'less', and as fast as Vim and Emacs. The disadvantage of this change is that searching for a string that begins with a multibyte character is nearly ten times slower than searching for one that begins with an ASCII character. This may be unsettling when searching a huge file first for a simple ASCII string and later for a UTF-8 one. Doing this second search, the user might get impatient: "Why is it taking so long?" (This patch fell through the cracks four years ago, when I worked on the searching code. It sat in a branch on top of other changes that I never applied because I made different improvements. The speedup at the time, on that machine, was only around sixty percent, though. But measuring it now again on the same machine, it clocks in at an 82 percent reduction with -O0 and an 87 percent reduction with -O2.)master
parent
c87bc1d55f
commit
5a635db262
16
src/chars.c
16
src/chars.c
|
@ -374,6 +374,22 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
|
|||
wchar_t wc1, wc2;
|
||||
|
||||
while (*s1 != '\0' && *s2 != '\0' && n > 0) {
|
||||
if ((signed char)*s1 >= 0 && (signed char)*s2 >= 0) {
|
||||
if ('A' <= (*s1 & 0x5F) && (*s1 & 0x5F) <= 'Z') {
|
||||
if ('A' <= (*s2 & 0x5F) && (*s2 & 0x5F) <= 'Z') {
|
||||
if ((*s1 & 0x5F) != (*s2 & 0x5F))
|
||||
return ((*s1 & 0x5F) - (*s2 & 0x5F));
|
||||
} else
|
||||
return ((*s1 | 0x20) - *s2);
|
||||
} else if ('A' <= (*s2 & 0x5F) && (*s2 & 0x5F) <= 'Z')
|
||||
return (*s1 - (*s2 | 0x20));
|
||||
else if (*s1 != *s2)
|
||||
return (*s1 - *s2);
|
||||
|
||||
s1++; s2++; n--;
|
||||
continue;
|
||||
}
|
||||
|
||||
bool bad1 = (mbtowc(&wc1, s1, MAXCHARLEN) < 0);
|
||||
bool bad2 = (mbtowc(&wc2, s2, MAXCHARLEN) < 0);
|
||||
|
||||
|
|
Loading…
Reference in New Issue