chars: probe for a valid UTF-8 starter byte, instead of overstepping
Instead of always stepping back four bytes and then tentatively moving forward again (which is wasteful when most codes are just one or two bytes long), inspect the preceding bytes one by one and begin the move forward at the first valid starter byte. This reduces the backwards searching time by close to 40 percent.master
parent
f162a6a2ab
commit
09cabcad5d
16
src/chars.c
16
src/chars.c
|
@ -383,8 +383,20 @@ size_t move_mbleft(const char *buf, size_t pos)
|
|||
* possible point. */
|
||||
if (pos < 4)
|
||||
before = 0;
|
||||
else
|
||||
before = pos - 4;
|
||||
else {
|
||||
const char *ptr = buf + pos;
|
||||
|
||||
if ((signed char)*(--ptr) > -65)
|
||||
before = pos - 1;
|
||||
else if ((signed char)*(--ptr) > -65)
|
||||
before = pos - 2;
|
||||
else if ((signed char)*(--ptr) > -65)
|
||||
before = pos - 3;
|
||||
else if ((signed char)*(--ptr) > -65)
|
||||
before = pos - 4;
|
||||
else
|
||||
before = pos - 1;
|
||||
}
|
||||
|
||||
while (before < pos) {
|
||||
char_len = parse_mbchar(buf + before, NULL, NULL);
|
||||
|
|
Loading…
Reference in New Issue