Performance improvements for regex searching, most notably:

- Use the grid data directly instead of copying it.

- Special case the most typical one byte character cells and use memcmp
  for multiple bytes instead of a handrolled loop.

- Hoist regcomp out of the loop into the calling functions.

GitHub issue 2143.

Also a man page from from jmc@.
This commit is contained in:
nicm 2020-04-01 07:35:10 +00:00
parent 38f1546a66
commit 46ed81fc45
2 changed files with 142 additions and 103 deletions

3
tmux.1
View File

@ -2312,7 +2312,8 @@ and unzoomed (its normal position in the layout).
.Fl M .Fl M
begins mouse resizing (only valid if bound to a mouse key binding, see begins mouse resizing (only valid if bound to a mouse key binding, see
.Sx MOUSE SUPPORT ) . .Sx MOUSE SUPPORT ) .
.Pp T .Pp
.Fl T
trims all lines below the current cursor position and moves lines out of the trims all lines below the current cursor position and moves lines out of the
history to replace them. history to replace them.
.It Xo Ic resize-window .It Xo Ic resize-window

View File

@ -58,10 +58,6 @@ static int window_copy_search_lr(struct grid *, struct grid *, u_int *,
u_int, u_int, u_int, int); u_int, u_int, u_int, int);
static int window_copy_search_rl(struct grid *, struct grid *, u_int *, static int window_copy_search_rl(struct grid *, struct grid *, u_int *,
u_int, u_int, u_int, int); u_int, u_int, u_int, int);
static int window_copy_search_lr_regex(struct grid *, struct grid *,
u_int *, u_int *, u_int, u_int, u_int, int);
static int window_copy_search_rl_regex(struct grid *, struct grid *,
u_int *, u_int *, u_int, u_int, u_int, int);
static int window_copy_last_regex(struct grid *gd, u_int py, u_int first, static int window_copy_last_regex(struct grid *gd, u_int py, u_int first,
u_int last, u_int len, u_int *ppx, u_int *psx, u_int last, u_int len, u_int *ppx, u_int *psx,
const char *buf, const regex_t *preg, int eflags); const char *buf, const regex_t *preg, int eflags);
@ -2292,14 +2288,12 @@ window_copy_search_rl(struct grid *gd,
} }
static int static int
window_copy_search_lr_regex(struct grid *gd, struct grid *sgd, window_copy_search_lr_regex(struct grid *gd, u_int *ppx, u_int *psx, u_int py,
u_int *ppx, u_int *psx, u_int py, u_int first, u_int last, int cis) u_int first, u_int last, regex_t *reg)
{ {
int cflags = REG_EXTENDED, eflags = 0; int eflags = 0;
u_int endline, foundx, foundy, len, pywrap, size = 1; u_int endline, foundx, foundy, len, pywrap, size = 1;
u_int ssize = 1; char *buf;
char *buf, *sbuf;
regex_t reg;
regmatch_t regmatch; regmatch_t regmatch;
struct grid_line *gl; struct grid_line *gl;
@ -2310,19 +2304,7 @@ window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
if (first >= last) if (first >= last)
return (0); return (0);
sbuf = xmalloc(ssize);
sbuf[0] = '\0';
sbuf = window_copy_stringify(sgd, 0, 0, sgd->sx, sbuf, &ssize);
if (sbuf == NULL)
return (0);
/* Set flags for regex search. */ /* Set flags for regex search. */
if (cis)
cflags |= REG_ICASE;
if (regcomp(&reg, sbuf, cflags) != 0) {
free(sbuf);
return (0);
}
if (first != 0) if (first != 0)
eflags |= REG_NOTBOL; eflags |= REG_NOTBOL;
@ -2342,7 +2324,7 @@ window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
len += gd->sx; len += gd->sx;
} }
if (regexec(&reg, buf, 1, &regmatch, eflags) == 0) { if (regexec(reg, buf, 1, &regmatch, eflags) == 0) {
foundx = first; foundx = first;
foundy = py; foundy = py;
window_copy_cstrtocellpos(gd, len, &foundx, &foundy, window_copy_cstrtocellpos(gd, len, &foundx, &foundy,
@ -2358,15 +2340,11 @@ window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
foundy--; foundy--;
} }
*psx -= *ppx; *psx -= *ppx;
regfree(&reg);
free(sbuf);
free(buf); free(buf);
return (1); return (1);
} }
} }
regfree(&reg);
free(sbuf);
free(buf); free(buf);
*ppx = 0; *ppx = 0;
*psx = 0; *psx = 0;
@ -2374,28 +2352,15 @@ window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
} }
static int static int
window_copy_search_rl_regex(struct grid *gd, struct grid *sgd, window_copy_search_rl_regex(struct grid *gd, u_int *ppx, u_int *psx, u_int py,
u_int *ppx, u_int *psx, u_int py, u_int first, u_int last, int cis) u_int first, u_int last, regex_t *reg)
{ {
int cflags = REG_EXTENDED, eflags = 0; int eflags = 0;
u_int endline, len, pywrap, size = 1, ssize = 1; u_int endline, len, pywrap, size = 1;
char *buf, *sbuf; char *buf;
regex_t reg;
struct grid_line *gl; struct grid_line *gl;
sbuf = xmalloc(ssize);
sbuf[0] = '\0';
sbuf = window_copy_stringify(sgd, 0, 0, sgd->sx, sbuf, &ssize);
if (sbuf == NULL)
return (0);
/* Set flags for regex search. */ /* Set flags for regex search. */
if (cis)
cflags |= REG_ICASE;
if (regcomp(&reg, sbuf, cflags) != 0) {
free(sbuf);
return (0);
}
if (first != 0) if (first != 0)
eflags |= REG_NOTBOL; eflags |= REG_NOTBOL;
@ -2416,22 +2381,38 @@ window_copy_search_rl_regex(struct grid *gd, struct grid *sgd,
} }
if (window_copy_last_regex(gd, py, first, last, len, ppx, psx, buf, if (window_copy_last_regex(gd, py, first, last, len, ppx, psx, buf,
&reg, eflags)) reg, eflags))
{ {
regfree(&reg);
free(sbuf);
free(buf); free(buf);
return (1); return (1);
} }
regfree(&reg);
free(sbuf);
free(buf); free(buf);
*ppx = 0; *ppx = 0;
*psx = 0; *psx = 0;
return (0); return (0);
} }
static const char *
window_copy_cellstring(const struct grid_line *gl, u_int px, size_t *size)
{
struct grid_cell_entry *gce;
if (px >= gl->cellsize) {
*size = 1;
return (" ");
}
gce = &gl->celldata[px];
if (~gce->flags & GRID_FLAG_EXTENDED) {
*size = 1;
return (&gce->data.data);
}
*size = gl->extddata[gce->offset].data.size;
return (gl->extddata[gce->offset].data.data);
}
/* Find last match in given range. */ /* Find last match in given range. */
static int static int
window_copy_last_regex(struct grid *gd, u_int py, u_int first, u_int last, window_copy_last_regex(struct grid *gd, u_int py, u_int first, u_int last,
@ -2486,20 +2467,33 @@ static char *
window_copy_stringify(struct grid *gd, u_int py, u_int first, u_int last, window_copy_stringify(struct grid *gd, u_int py, u_int first, u_int last,
char *buf, u_int *size) char *buf, u_int *size)
{ {
u_int ax, bx, newsize; u_int ax, bx, newsize = *size;
struct grid_cell gc; const struct grid_line *gl;
const char *d;
size_t bufsize = 1024, dlen;
while (bufsize < newsize)
bufsize *= 2;
buf = xrealloc(buf, bufsize);
gl = grid_peek_line(gd, py);
bx = *size - 1; bx = *size - 1;
newsize = *size;
for (ax = first; ax < last; ax++) { for (ax = first; ax < last; ax++) {
grid_get_cell(gd, ax, py, &gc); d = window_copy_cellstring(gl, ax, &dlen);
newsize += gc.data.size; newsize += dlen;
buf = xrealloc(buf, newsize); while (bufsize < newsize) {
memcpy(buf + bx, gc.data.data, gc.data.size); bufsize *= 2;
bx += gc.data.size; buf = xrealloc(buf, bufsize);
}
if (dlen == 1)
buf[bx++] = *d;
else {
memcpy(buf + bx, d, dlen);
bx += dlen;
}
} }
buf[newsize - 1] = '\0'; buf[newsize - 1] = '\0';
*size = newsize; *size = newsize;
return (buf); return (buf);
} }
@ -2509,57 +2503,64 @@ static void
window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy, window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy,
const char *str) const char *str)
{ {
u_int cell, ccell, px, pywrap; u_int cell, ccell, px, pywrap, pos, len;
int match; int match;
const char *cstr; const struct grid_line *gl;
char *celldata, **cells; const char *d;
struct grid_cell gc; size_t dlen;
struct {
/* Set up staggered array of cell contents. This speeds up search. */ const char *d;
cells = xreallocarray(NULL, ncells, sizeof cells[0]); size_t dlen;
} *cells;
/* Populate the array of cell data. */ /* Populate the array of cell data. */
cells = xreallocarray(NULL, ncells, sizeof cells[0]);
cell = 0; cell = 0;
px = *ppx; px = *ppx;
pywrap = *ppy; pywrap = *ppy;
gl = grid_peek_line(gd, pywrap);
while (cell < ncells) { while (cell < ncells) {
grid_get_cell(gd, px, pywrap, &gc); cells[cell].d = window_copy_cellstring(gl, px,
celldata = xmalloc(gc.data.size + 1); &cells[cell].dlen);
memcpy(celldata, gc.data.data, gc.data.size);
celldata[gc.data.size] = '\0';
cells[cell] = celldata;
cell++; cell++;
px = (px + 1) % gd->sx; px = (px + 1) % gd->sx;
if (px == 0) if (px == 0) {
pywrap++; pywrap++;
gl = grid_peek_line(gd, pywrap);
}
} }
/* Locate starting cell. */ /* Locate starting cell. */
cell = 0; cell = 0;
len = strlen(str);
while (cell < ncells) { while (cell < ncells) {
ccell = cell; ccell = cell;
cstr = str; pos = 0;
match = 1; match = 1;
while (ccell < ncells) { while (ccell < ncells) {
/* Anchor found to the end. */ if (str[pos] == '\0') {
if (*cstr == '\0') {
match = 0; match = 0;
break; break;
} }
d = cells[ccell].d;
celldata = cells[ccell]; dlen = cells[ccell].dlen;
while (*celldata != '\0' && *cstr != '\0') { if (dlen == 1) {
if (*celldata++ != *cstr++) { if (str[pos] != *d) {
match = 0; match = 0;
break; break;
} }
pos++;
} else {
if (dlen > len - pos)
dlen = len - pos;
if (memcmp(str + pos, d, dlen) != 0) {
match = 0;
break;
}
pos += dlen;
} }
if (!match)
break;
ccell++; ccell++;
} }
if (match) if (match)
break; break;
cell++; cell++;
@ -2577,8 +2578,6 @@ window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy,
*ppy = pywrap; *ppy = pywrap;
/* Free cell data. */ /* Free cell data. */
for (cell = 0; cell < ncells; cell++)
free(cells[cell]);
free(cells); free(cells);
} }
@ -2638,29 +2637,45 @@ window_copy_search_jump(struct window_mode_entry *wme, struct grid *gd,
struct grid *sgd, u_int fx, u_int fy, u_int endline, int cis, int wrap, struct grid *sgd, u_int fx, u_int fy, u_int endline, int cis, int wrap,
int direction, int regex) int direction, int regex)
{ {
u_int i, px, sx; u_int i, px, sx, ssize = 1;
int found = 0; int found = 0, cflags = REG_EXTENDED;
char *sbuf;
regex_t reg;
if (regex) {
sbuf = xmalloc(ssize);
sbuf[0] = '\0';
sbuf = window_copy_stringify(sgd, 0, 0, sgd->sx, sbuf, &ssize);
if (cis)
cflags |= REG_ICASE;
if (regcomp(&reg, sbuf, cflags) != 0) {
free(sbuf);
return (0);
}
}
if (direction) { if (direction) {
for (i = fy; i <= endline; i++) { for (i = fy; i <= endline; i++) {
if (regex) if (regex) {
found = window_copy_search_lr_regex(gd, sgd, found = window_copy_search_lr_regex(gd,
&px, &sx, i, fx, gd->sx, cis); &px, &sx, i, fx, gd->sx, &reg);
else } else {
found = window_copy_search_lr(gd, sgd, found = window_copy_search_lr(gd, sgd,
&px, i, fx, gd->sx, cis); &px, i, fx, gd->sx, cis);
}
if (found) if (found)
break; break;
fx = 0; fx = 0;
} }
} else { } else {
for (i = fy + 1; endline < i; i--) { for (i = fy + 1; endline < i; i--) {
if (regex) if (regex) {
found = window_copy_search_rl_regex(gd, sgd, found = window_copy_search_rl_regex(gd,
&px, &sx, i - 1, 0, fx + 1, cis); &px, &sx, i - 1, 0, fx + 1, &reg);
else } else {
found = window_copy_search_rl(gd, sgd, found = window_copy_search_rl(gd, sgd,
&px, i - 1, 0, fx + 1, cis); &px, i - 1, 0, fx + 1, cis);
}
if (found) { if (found) {
i--; i--;
break; break;
@ -2668,6 +2683,10 @@ window_copy_search_jump(struct window_mode_entry *wme, struct grid *gd,
fx = gd->sx - 1; fx = gd->sx - 1;
} }
} }
if (regex) {
free(sbuf);
regfree(&reg);
}
if (found) { if (found) {
window_copy_scroll_to(wme, px, i); window_copy_scroll_to(wme, px, i);
@ -2739,7 +2758,11 @@ window_copy_search_marks(struct window_mode_entry *wme, struct screen *ssp,
struct screen_write_ctx ctx; struct screen_write_ctx ctx;
struct grid *gd = s->grid; struct grid *gd = s->grid;
int found, cis, which = -1; int found, cis, which = -1;
int cflags = REG_EXTENDED;
u_int px, py, b, nfound = 0, width; u_int px, py, b, nfound = 0, width;
u_int ssize = 1;
char *sbuf;
regex_t reg;
if (ssp == NULL) { if (ssp == NULL) {
width = screen_write_strlen("%s", data->searchstr); width = screen_write_strlen("%s", data->searchstr);
@ -2757,25 +2780,36 @@ window_copy_search_marks(struct window_mode_entry *wme, struct screen *ssp,
free(data->searchmark); free(data->searchmark);
data->searchmark = bit_alloc((gd->hsize + gd->sy) * gd->sx); data->searchmark = bit_alloc((gd->hsize + gd->sy) * gd->sx);
if (regex) {
sbuf = xmalloc(ssize);
sbuf[0] = '\0';
sbuf = window_copy_stringify(ssp->grid, 0, 0, ssp->grid->sx,
sbuf, &ssize);
if (cis)
cflags |= REG_ICASE;
if (regcomp(&reg, sbuf, cflags) != 0) {
free(sbuf);
return (0);
}
}
for (py = 0; py < gd->hsize + gd->sy; py++) { for (py = 0; py < gd->hsize + gd->sy; py++) {
px = 0; px = 0;
for (;;) { for (;;) {
if (regex) { if (regex) {
found = window_copy_search_lr_regex(gd, found = window_copy_search_lr_regex(gd,
ssp->grid, &px, &width, py, px, &px, &width, py, px, gd->sx, &reg);
gd->sx, cis);
if (!found) if (!found)
break; break;
} } else {
else {
found = window_copy_search_lr(gd, ssp->grid, found = window_copy_search_lr(gd, ssp->grid,
&px, py, px, gd->sx, cis); &px, py, px, gd->sx, cis);
if (!found) if (!found)
break; break;
} }
nfound++; nfound++;
if (px == data->cx && py == gd->hsize + data->cy - data->oy) if (px == data->cx &&
py == gd->hsize + data->cy - data->oy)
which = nfound; which = nfound;
b = (py * gd->sx) + px; b = (py * gd->sx) + px;
@ -2784,6 +2818,10 @@ window_copy_search_marks(struct window_mode_entry *wme, struct screen *ssp,
px++; px++;
} }
} }
if (regex) {
free(sbuf);
regfree(&reg);
}
if (which != -1) if (which != -1)
data->searchthis = 1 + nfound - which; data->searchthis = 1 + nfound - which;