Performance improvements for regex searching, most notably:

- Use the grid data directly instead of copying it.

- Special case the most typical one byte character cells and use memcmp
  for multiple bytes instead of a handrolled loop.

- Hoist regcomp out of the loop into the calling functions.

GitHub issue 2143.

Also a man page from from jmc@.
This commit is contained in:
nicm 2020-04-01 07:35:10 +00:00 committed by Nicholas Marriott
parent 8dedccaa20
commit 0dbf414578

View File

@ -58,10 +58,6 @@ static int window_copy_search_lr(struct grid *, struct grid *, u_int *,
u_int, u_int, u_int, int); u_int, u_int, u_int, int);
static int window_copy_search_rl(struct grid *, struct grid *, u_int *, static int window_copy_search_rl(struct grid *, struct grid *, u_int *,
u_int, u_int, u_int, int); u_int, u_int, u_int, int);
static int window_copy_search_lr_regex(struct grid *, struct grid *,
u_int *, u_int *, u_int, u_int, u_int, int);
static int window_copy_search_rl_regex(struct grid *, struct grid *,
u_int *, u_int *, u_int, u_int, u_int, int);
static int window_copy_last_regex(struct grid *gd, u_int py, u_int first, static int window_copy_last_regex(struct grid *gd, u_int py, u_int first,
u_int last, u_int len, u_int *ppx, u_int *psx, u_int last, u_int len, u_int *ppx, u_int *psx,
const char *buf, const regex_t *preg, int eflags); const char *buf, const regex_t *preg, int eflags);
@ -2263,14 +2259,12 @@ window_copy_search_rl(struct grid *gd,
} }
static int static int
window_copy_search_lr_regex(struct grid *gd, struct grid *sgd, window_copy_search_lr_regex(struct grid *gd, u_int *ppx, u_int *psx, u_int py,
u_int *ppx, u_int *psx, u_int py, u_int first, u_int last, int cis) u_int first, u_int last, regex_t *reg)
{ {
int cflags = REG_EXTENDED, eflags = 0; int eflags = 0;
u_int endline, foundx, foundy, len, pywrap, size = 1; u_int endline, foundx, foundy, len, pywrap, size = 1;
u_int ssize = 1; char *buf;
char *buf, *sbuf;
regex_t reg;
regmatch_t regmatch; regmatch_t regmatch;
struct grid_line *gl; struct grid_line *gl;
@ -2281,19 +2275,7 @@ window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
if (first >= last) if (first >= last)
return (0); return (0);
sbuf = xmalloc(ssize);
sbuf[0] = '\0';
sbuf = window_copy_stringify(sgd, 0, 0, sgd->sx, sbuf, &ssize);
if (sbuf == NULL)
return (0);
/* Set flags for regex search. */ /* Set flags for regex search. */
if (cis)
cflags |= REG_ICASE;
if (regcomp(&reg, sbuf, cflags) != 0) {
free(sbuf);
return (0);
}
if (first != 0) if (first != 0)
eflags |= REG_NOTBOL; eflags |= REG_NOTBOL;
@ -2313,7 +2295,7 @@ window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
len += gd->sx; len += gd->sx;
} }
if (regexec(&reg, buf, 1, &regmatch, eflags) == 0) { if (regexec(reg, buf, 1, &regmatch, eflags) == 0) {
foundx = first; foundx = first;
foundy = py; foundy = py;
window_copy_cstrtocellpos(gd, len, &foundx, &foundy, window_copy_cstrtocellpos(gd, len, &foundx, &foundy,
@ -2329,15 +2311,11 @@ window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
foundy--; foundy--;
} }
*psx -= *ppx; *psx -= *ppx;
regfree(&reg);
free(sbuf);
free(buf); free(buf);
return (1); return (1);
} }
} }
regfree(&reg);
free(sbuf);
free(buf); free(buf);
*ppx = 0; *ppx = 0;
*psx = 0; *psx = 0;
@ -2345,28 +2323,15 @@ window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
} }
static int static int
window_copy_search_rl_regex(struct grid *gd, struct grid *sgd, window_copy_search_rl_regex(struct grid *gd, u_int *ppx, u_int *psx, u_int py,
u_int *ppx, u_int *psx, u_int py, u_int first, u_int last, int cis) u_int first, u_int last, regex_t *reg)
{ {
int cflags = REG_EXTENDED, eflags = 0; int eflags = 0;
u_int endline, len, pywrap, size = 1, ssize = 1; u_int endline, len, pywrap, size = 1;
char *buf, *sbuf; char *buf;
regex_t reg;
struct grid_line *gl; struct grid_line *gl;
sbuf = xmalloc(ssize);
sbuf[0] = '\0';
sbuf = window_copy_stringify(sgd, 0, 0, sgd->sx, sbuf, &ssize);
if (sbuf == NULL)
return (0);
/* Set flags for regex search. */ /* Set flags for regex search. */
if (cis)
cflags |= REG_ICASE;
if (regcomp(&reg, sbuf, cflags) != 0) {
free(sbuf);
return (0);
}
if (first != 0) if (first != 0)
eflags |= REG_NOTBOL; eflags |= REG_NOTBOL;
@ -2387,22 +2352,38 @@ window_copy_search_rl_regex(struct grid *gd, struct grid *sgd,
} }
if (window_copy_last_regex(gd, py, first, last, len, ppx, psx, buf, if (window_copy_last_regex(gd, py, first, last, len, ppx, psx, buf,
&reg, eflags)) reg, eflags))
{ {
regfree(&reg);
free(sbuf);
free(buf); free(buf);
return (1); return (1);
} }
regfree(&reg);
free(sbuf);
free(buf); free(buf);
*ppx = 0; *ppx = 0;
*psx = 0; *psx = 0;
return (0); return (0);
} }
static const char *
window_copy_cellstring(const struct grid_line *gl, u_int px, size_t *size)
{
struct grid_cell_entry *gce;
if (px >= gl->cellsize) {
*size = 1;
return (" ");
}
gce = &gl->celldata[px];
if (~gce->flags & GRID_FLAG_EXTENDED) {
*size = 1;
return (&gce->data.data);
}
*size = gl->extddata[gce->offset].data.size;
return (gl->extddata[gce->offset].data.data);
}
/* Find last match in given range. */ /* Find last match in given range. */
static int static int
window_copy_last_regex(struct grid *gd, u_int py, u_int first, u_int last, window_copy_last_regex(struct grid *gd, u_int py, u_int first, u_int last,
@ -2457,20 +2438,33 @@ static char *
window_copy_stringify(struct grid *gd, u_int py, u_int first, u_int last, window_copy_stringify(struct grid *gd, u_int py, u_int first, u_int last,
char *buf, u_int *size) char *buf, u_int *size)
{ {
u_int ax, bx, newsize; u_int ax, bx, newsize = *size;
struct grid_cell gc; const struct grid_line *gl;
const char *d;
size_t bufsize = 1024, dlen;
while (bufsize < newsize)
bufsize *= 2;
buf = xrealloc(buf, bufsize);
gl = grid_peek_line(gd, py);
bx = *size - 1; bx = *size - 1;
newsize = *size;
for (ax = first; ax < last; ax++) { for (ax = first; ax < last; ax++) {
grid_get_cell(gd, ax, py, &gc); d = window_copy_cellstring(gl, ax, &dlen);
newsize += gc.data.size; newsize += dlen;
buf = xrealloc(buf, newsize); while (bufsize < newsize) {
memcpy(buf + bx, gc.data.data, gc.data.size); bufsize *= 2;
bx += gc.data.size; buf = xrealloc(buf, bufsize);
}
if (dlen == 1)
buf[bx++] = *d;
else {
memcpy(buf + bx, d, dlen);
bx += dlen;
}
} }
buf[newsize - 1] = '\0'; buf[newsize - 1] = '\0';
*size = newsize; *size = newsize;
return (buf); return (buf);
} }
@ -2480,57 +2474,64 @@ static void
window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy, window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy,
const char *str) const char *str)
{ {
u_int cell, ccell, px, pywrap; u_int cell, ccell, px, pywrap, pos, len;
int match; int match;
const char *cstr; const struct grid_line *gl;
char *celldata, **cells; const char *d;
struct grid_cell gc; size_t dlen;
struct {
/* Set up staggered array of cell contents. This speeds up search. */ const char *d;
cells = xreallocarray(NULL, ncells, sizeof cells[0]); size_t dlen;
} *cells;
/* Populate the array of cell data. */ /* Populate the array of cell data. */
cells = xreallocarray(NULL, ncells, sizeof cells[0]);
cell = 0; cell = 0;
px = *ppx; px = *ppx;
pywrap = *ppy; pywrap = *ppy;
gl = grid_peek_line(gd, pywrap);
while (cell < ncells) { while (cell < ncells) {
grid_get_cell(gd, px, pywrap, &gc); cells[cell].d = window_copy_cellstring(gl, px,
celldata = xmalloc(gc.data.size + 1); &cells[cell].dlen);
memcpy(celldata, gc.data.data, gc.data.size);
celldata[gc.data.size] = '\0';
cells[cell] = celldata;
cell++; cell++;
px = (px + 1) % gd->sx; px = (px + 1) % gd->sx;
if (px == 0) if (px == 0) {
pywrap++; pywrap++;
gl = grid_peek_line(gd, pywrap);
}
} }
/* Locate starting cell. */ /* Locate starting cell. */
cell = 0; cell = 0;
len = strlen(str);
while (cell < ncells) { while (cell < ncells) {
ccell = cell; ccell = cell;
cstr = str; pos = 0;
match = 1; match = 1;
while (ccell < ncells) { while (ccell < ncells) {
/* Anchor found to the end. */ if (str[pos] == '\0') {
if (*cstr == '\0') {
match = 0; match = 0;
break; break;
} }
d = cells[ccell].d;
celldata = cells[ccell]; dlen = cells[ccell].dlen;
while (*celldata != '\0' && *cstr != '\0') { if (dlen == 1) {
if (*celldata++ != *cstr++) { if (str[pos] != *d) {
match = 0; match = 0;
break; break;
} }
pos++;
} else {
if (dlen > len - pos)
dlen = len - pos;
if (memcmp(str + pos, d, dlen) != 0) {
match = 0;
break;
}
pos += dlen;
} }
if (!match)
break;
ccell++; ccell++;
} }
if (match) if (match)
break; break;
cell++; cell++;
@ -2548,8 +2549,6 @@ window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy,
*ppy = pywrap; *ppy = pywrap;
/* Free cell data. */ /* Free cell data. */
for (cell = 0; cell < ncells; cell++)
free(cells[cell]);
free(cells); free(cells);
} }
@ -2609,29 +2608,45 @@ window_copy_search_jump(struct window_mode_entry *wme, struct grid *gd,
struct grid *sgd, u_int fx, u_int fy, u_int endline, int cis, int wrap, struct grid *sgd, u_int fx, u_int fy, u_int endline, int cis, int wrap,
int direction, int regex) int direction, int regex)
{ {
u_int i, px, sx; u_int i, px, sx, ssize = 1;
int found = 0; int found = 0, cflags = REG_EXTENDED;
char *sbuf;
regex_t reg;
if (regex) {
sbuf = xmalloc(ssize);
sbuf[0] = '\0';
sbuf = window_copy_stringify(sgd, 0, 0, sgd->sx, sbuf, &ssize);
if (cis)
cflags |= REG_ICASE;
if (regcomp(&reg, sbuf, cflags) != 0) {
free(sbuf);
return (0);
}
}
if (direction) { if (direction) {
for (i = fy; i <= endline; i++) { for (i = fy; i <= endline; i++) {
if (regex) if (regex) {
found = window_copy_search_lr_regex(gd, sgd, found = window_copy_search_lr_regex(gd,
&px, &sx, i, fx, gd->sx, cis); &px, &sx, i, fx, gd->sx, &reg);
else } else {
found = window_copy_search_lr(gd, sgd, found = window_copy_search_lr(gd, sgd,
&px, i, fx, gd->sx, cis); &px, i, fx, gd->sx, cis);
}
if (found) if (found)
break; break;
fx = 0; fx = 0;
} }
} else { } else {
for (i = fy + 1; endline < i; i--) { for (i = fy + 1; endline < i; i--) {
if (regex) if (regex) {
found = window_copy_search_rl_regex(gd, sgd, found = window_copy_search_rl_regex(gd,
&px, &sx, i - 1, 0, fx + 1, cis); &px, &sx, i - 1, 0, fx + 1, &reg);
else } else {
found = window_copy_search_rl(gd, sgd, found = window_copy_search_rl(gd, sgd,
&px, i - 1, 0, fx + 1, cis); &px, i - 1, 0, fx + 1, cis);
}
if (found) { if (found) {
i--; i--;
break; break;
@ -2639,6 +2654,10 @@ window_copy_search_jump(struct window_mode_entry *wme, struct grid *gd,
fx = gd->sx - 1; fx = gd->sx - 1;
} }
} }
if (regex) {
free(sbuf);
regfree(&reg);
}
if (found) { if (found) {
window_copy_scroll_to(wme, px, i); window_copy_scroll_to(wme, px, i);
@ -2710,7 +2729,11 @@ window_copy_search_marks(struct window_mode_entry *wme, struct screen *ssp,
struct screen_write_ctx ctx; struct screen_write_ctx ctx;
struct grid *gd = s->grid; struct grid *gd = s->grid;
int found, cis, which = -1; int found, cis, which = -1;
int cflags = REG_EXTENDED;
u_int px, py, b, nfound = 0, width; u_int px, py, b, nfound = 0, width;
u_int ssize = 1;
char *sbuf;
regex_t reg;
if (ssp == NULL) { if (ssp == NULL) {
width = screen_write_strlen("%s", data->searchstr); width = screen_write_strlen("%s", data->searchstr);
@ -2728,25 +2751,36 @@ window_copy_search_marks(struct window_mode_entry *wme, struct screen *ssp,
free(data->searchmark); free(data->searchmark);
data->searchmark = bit_alloc((gd->hsize + gd->sy) * gd->sx); data->searchmark = bit_alloc((gd->hsize + gd->sy) * gd->sx);
if (regex) {
sbuf = xmalloc(ssize);
sbuf[0] = '\0';
sbuf = window_copy_stringify(ssp->grid, 0, 0, ssp->grid->sx,
sbuf, &ssize);
if (cis)
cflags |= REG_ICASE;
if (regcomp(&reg, sbuf, cflags) != 0) {
free(sbuf);
return (0);
}
}
for (py = 0; py < gd->hsize + gd->sy; py++) { for (py = 0; py < gd->hsize + gd->sy; py++) {
px = 0; px = 0;
for (;;) { for (;;) {
if (regex) { if (regex) {
found = window_copy_search_lr_regex(gd, found = window_copy_search_lr_regex(gd,
ssp->grid, &px, &width, py, px, &px, &width, py, px, gd->sx, &reg);
gd->sx, cis);
if (!found) if (!found)
break; break;
} } else {
else {
found = window_copy_search_lr(gd, ssp->grid, found = window_copy_search_lr(gd, ssp->grid,
&px, py, px, gd->sx, cis); &px, py, px, gd->sx, cis);
if (!found) if (!found)
break; break;
} }
nfound++; nfound++;
if (px == data->cx && py == gd->hsize + data->cy - data->oy) if (px == data->cx &&
py == gd->hsize + data->cy - data->oy)
which = nfound; which = nfound;
b = (py * gd->sx) + px; b = (py * gd->sx) + px;
@ -2755,6 +2789,10 @@ window_copy_search_marks(struct window_mode_entry *wme, struct screen *ssp,
px++; px++;
} }
} }
if (regex) {
free(sbuf);
regfree(&reg);
}
if (which != -1) if (which != -1)
data->searchthis = 1 + nfound - which; data->searchthis = 1 + nfound - which;