mirror of
https://github.com/tmux/tmux.git
synced 2024-12-12 17:38:48 +00:00
Instead of storing all UTF-8 characters in the extended cell which means
that 14 bytes are wasted for each character in the BMP, only store characters of three bytes or less in the cell itself and store others (outside the BMP or with combining characters) in a separate global tree. Can reduce grid memory use for heavy Unicode users by around 30%.
This commit is contained in:
parent
14a9fd58d5
commit
3a5219c6d0
51
grid.c
51
grid.c
@ -100,11 +100,11 @@ grid_get_extended_cell(struct grid_line *gl, struct grid_cell_entry *gce,
|
||||
}
|
||||
|
||||
/* Set cell as extended. */
|
||||
static struct grid_cell *
|
||||
static struct grid_extd_entry *
|
||||
grid_extended_cell(struct grid_line *gl, struct grid_cell_entry *gce,
|
||||
const struct grid_cell *gc)
|
||||
{
|
||||
struct grid_cell *gcp;
|
||||
struct grid_extd_entry *gee;
|
||||
int flags = (gc->flags & ~GRID_FLAG_CLEARED);
|
||||
|
||||
if (~gce->flags & GRID_FLAG_EXTENDED)
|
||||
@ -113,10 +113,14 @@ grid_extended_cell(struct grid_line *gl, struct grid_cell_entry *gce,
|
||||
fatalx("offset too big");
|
||||
gl->flags |= GRID_LINE_EXTENDED;
|
||||
|
||||
gcp = &gl->extddata[gce->offset];
|
||||
memcpy(gcp, gc, sizeof *gcp);
|
||||
gcp->flags = flags;
|
||||
return (gcp);
|
||||
gee = &gl->extddata[gce->offset];
|
||||
gee->data = utf8_map_big(&gc->data);
|
||||
gee->attr = gc->attr;
|
||||
gee->flags = flags;
|
||||
gee->fg = gc->fg;
|
||||
gee->bg = gc->bg;
|
||||
gee->us = gc->us;
|
||||
return (gee);
|
||||
}
|
||||
|
||||
/* Free up unused extended cells. */
|
||||
@ -124,9 +128,9 @@ static void
|
||||
grid_compact_line(struct grid_line *gl)
|
||||
{
|
||||
int new_extdsize = 0;
|
||||
struct grid_cell *new_extddata;
|
||||
struct grid_extd_entry *new_extddata;
|
||||
struct grid_cell_entry *gce;
|
||||
struct grid_cell *gc;
|
||||
struct grid_extd_entry *gee;
|
||||
u_int px, idx;
|
||||
|
||||
if (gl->extdsize == 0)
|
||||
@ -150,8 +154,8 @@ grid_compact_line(struct grid_line *gl)
|
||||
for (px = 0; px < gl->cellsize; px++) {
|
||||
gce = &gl->celldata[px];
|
||||
if (gce->flags & GRID_FLAG_EXTENDED) {
|
||||
gc = &gl->extddata[gce->offset];
|
||||
memcpy(&new_extddata[idx], gc, sizeof *gc);
|
||||
gee = &gl->extddata[gce->offset];
|
||||
memcpy(&new_extddata[idx], gee, sizeof *gee);
|
||||
gce->offset = idx++;
|
||||
}
|
||||
}
|
||||
@ -181,17 +185,14 @@ grid_clear_cell(struct grid *gd, u_int px, u_int py, u_int bg)
|
||||
{
|
||||
struct grid_line *gl = &gd->linedata[py];
|
||||
struct grid_cell_entry *gce = &gl->celldata[px];
|
||||
struct grid_cell *gc;
|
||||
struct grid_extd_entry *gee;
|
||||
|
||||
memcpy(gce, &grid_cleared_entry, sizeof *gce);
|
||||
if (bg != 8) {
|
||||
if (bg & COLOUR_FLAG_RGB) {
|
||||
grid_get_extended_cell(gl, gce, gce->flags);
|
||||
gl->flags |= GRID_LINE_EXTENDED;
|
||||
|
||||
gc = &gl->extddata[gce->offset];
|
||||
memcpy(gc, &grid_cleared_cell, sizeof *gc);
|
||||
gc->bg = bg;
|
||||
gee = grid_extended_cell(gl, gce, &grid_cleared_cell);
|
||||
gee->bg = bg;
|
||||
} else {
|
||||
if (bg & COLOUR_FLAG_256)
|
||||
gce->flags |= GRID_FLAG_BG256;
|
||||
@ -483,12 +484,20 @@ static void
|
||||
grid_get_cell1(struct grid_line *gl, u_int px, struct grid_cell *gc)
|
||||
{
|
||||
struct grid_cell_entry *gce = &gl->celldata[px];
|
||||
struct grid_extd_entry *gee;
|
||||
|
||||
if (gce->flags & GRID_FLAG_EXTENDED) {
|
||||
if (gce->offset >= gl->extdsize)
|
||||
memcpy(gc, &grid_default_cell, sizeof *gc);
|
||||
else
|
||||
memcpy(gc, &gl->extddata[gce->offset], sizeof *gc);
|
||||
else {
|
||||
gee = &gl->extddata[gce->offset];
|
||||
gc->flags = gee->flags;
|
||||
gc->attr = gee->attr;
|
||||
gc->fg = gee->fg;
|
||||
gc->bg = gee->bg;
|
||||
gc->us = gee->us;
|
||||
utf8_get_big(gee->data, &gc->data);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@ -545,7 +554,7 @@ grid_set_cells(struct grid *gd, u_int px, u_int py, const struct grid_cell *gc,
|
||||
{
|
||||
struct grid_line *gl;
|
||||
struct grid_cell_entry *gce;
|
||||
struct grid_cell *gcp;
|
||||
struct grid_extd_entry *gee;
|
||||
u_int i;
|
||||
|
||||
if (grid_check_y(gd, __func__, py) != 0)
|
||||
@ -560,8 +569,8 @@ grid_set_cells(struct grid *gd, u_int px, u_int py, const struct grid_cell *gc,
|
||||
for (i = 0; i < slen; i++) {
|
||||
gce = &gl->celldata[px + i];
|
||||
if (grid_need_extended_cell(gce, gc)) {
|
||||
gcp = grid_extended_cell(gl, gce, gc);
|
||||
utf8_set(&gcp->data, s[i]);
|
||||
gee = grid_extended_cell(gl, gce, gc);
|
||||
gee->data = utf8_set_big(s[i], 1);
|
||||
} else
|
||||
grid_store_cell(gce, gc, s[i]);
|
||||
}
|
||||
|
31
tmux.h
31
tmux.h
@ -597,11 +597,11 @@ struct msg_write_close {
|
||||
#define MOTION_MOUSE_MODES (MODE_MOUSE_BUTTON|MODE_MOUSE_ALL)
|
||||
|
||||
/*
|
||||
* A single UTF-8 character. UTF8_SIZE must be big enough to hold
|
||||
* combining characters as well, currently at most five (of three
|
||||
* bytes) are supported.
|
||||
*/
|
||||
#define UTF8_SIZE 18
|
||||
* A single UTF-8 character. UTF8_SIZE must be big enough to hold combining
|
||||
* characters as well. It can't be more than 32 bytes without changes to how
|
||||
* big characters are stored.
|
||||
*/
|
||||
#define UTF8_SIZE 21
|
||||
struct utf8_data {
|
||||
u_char data[UTF8_SIZE];
|
||||
|
||||
@ -609,7 +609,7 @@ struct utf8_data {
|
||||
u_char size;
|
||||
|
||||
u_char width; /* 0xff if invalid */
|
||||
} __packed;
|
||||
};
|
||||
enum utf8_state {
|
||||
UTF8_MORE,
|
||||
UTF8_DONE,
|
||||
@ -663,13 +663,25 @@ enum utf8_state {
|
||||
|
||||
/* Grid cell data. */
|
||||
struct grid_cell {
|
||||
struct utf8_data data; /* 21 bytes */
|
||||
struct utf8_data data;
|
||||
u_short attr;
|
||||
u_char flags;
|
||||
int fg;
|
||||
int bg;
|
||||
int us;
|
||||
};
|
||||
|
||||
/* Grid extended cell entry. */
|
||||
struct grid_extd_entry {
|
||||
uint32_t data;
|
||||
u_short attr;
|
||||
u_char flags;
|
||||
int fg;
|
||||
int bg;
|
||||
int us;
|
||||
} __packed;
|
||||
|
||||
/* Grid cell entry. */
|
||||
struct grid_cell_entry {
|
||||
u_char flags;
|
||||
union {
|
||||
@ -690,7 +702,7 @@ struct grid_line {
|
||||
struct grid_cell_entry *celldata;
|
||||
|
||||
u_int extdsize;
|
||||
struct grid_cell *extddata;
|
||||
struct grid_extd_entry *extddata;
|
||||
|
||||
int flags;
|
||||
} __packed;
|
||||
@ -2877,6 +2889,9 @@ u_int session_group_attached_count(struct session_group *);
|
||||
void session_renumber_windows(struct session *);
|
||||
|
||||
/* utf8.c */
|
||||
uint32_t utf8_set_big(char, u_int);
|
||||
uint32_t utf8_map_big(const struct utf8_data *);
|
||||
void utf8_get_big(uint32_t, struct utf8_data *);
|
||||
void utf8_set(struct utf8_data *, u_char);
|
||||
void utf8_copy(struct utf8_data *, const struct utf8_data *);
|
||||
enum utf8_state utf8_open(struct utf8_data *, u_char);
|
||||
|
174
utf8.c
174
utf8.c
@ -29,6 +29,180 @@
|
||||
|
||||
static int utf8_width(wchar_t);
|
||||
|
||||
struct utf8_big_item {
|
||||
u_int index;
|
||||
RB_ENTRY(utf8_big_item) entry;
|
||||
|
||||
char data[UTF8_SIZE];
|
||||
u_char size;
|
||||
};
|
||||
RB_HEAD(utf8_big_tree, utf8_big_item);
|
||||
|
||||
static int
|
||||
utf8_big_cmp(struct utf8_big_item *bi1, struct utf8_big_item *bi2)
|
||||
{
|
||||
if (bi1->size < bi2->size)
|
||||
return (-1);
|
||||
if (bi1->size > bi2->size)
|
||||
return (1);
|
||||
return (memcmp(bi1->data, bi2->data, bi1->size));
|
||||
}
|
||||
RB_GENERATE_STATIC(utf8_big_tree, utf8_big_item, entry, utf8_big_cmp);
|
||||
static struct utf8_big_tree utf8_big_tree = RB_INITIALIZER(utf8_big_tree);
|
||||
|
||||
static struct utf8_big_item *utf8_big_list;
|
||||
static u_int utf8_big_list_size;
|
||||
static u_int utf8_big_list_used;
|
||||
|
||||
union utf8_big_map {
|
||||
uint32_t value;
|
||||
struct {
|
||||
u_char flags;
|
||||
#define UTF8_BIG_SIZE 0x1f
|
||||
#define UTF8_BIG_WIDTH2 0x20
|
||||
|
||||
u_char data[3];
|
||||
};
|
||||
} __packed;
|
||||
|
||||
static const union utf8_big_map utf8_big_space1 = {
|
||||
.flags = 1,
|
||||
.data = " "
|
||||
};
|
||||
static const union utf8_big_map utf8_big_space2 = {
|
||||
.flags = UTF8_BIG_WIDTH2|2,
|
||||
.data = " "
|
||||
};
|
||||
|
||||
/* Get a big item by index. */
|
||||
static struct utf8_big_item *
|
||||
utf8_get_big_item(const char *data, size_t size)
|
||||
{
|
||||
struct utf8_big_item bi;
|
||||
|
||||
memcpy(bi.data, data, size);
|
||||
bi.size = size;
|
||||
|
||||
return (RB_FIND(utf8_big_tree, &utf8_big_tree, &bi));
|
||||
}
|
||||
|
||||
/* Add a big item. */
|
||||
static int
|
||||
utf8_put_big_item(const char *data, size_t size, u_int *index)
|
||||
{
|
||||
struct utf8_big_item *bi;
|
||||
|
||||
bi = utf8_get_big_item(data, size);
|
||||
if (bi != NULL) {
|
||||
*index = bi->index;
|
||||
log_debug("%s: have %.*s at %u", __func__, (int)size, data,
|
||||
*index);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (utf8_big_list_used == utf8_big_list_size) {
|
||||
if (utf8_big_list_size == 0xffffff)
|
||||
return (-1);
|
||||
if (utf8_big_list_size == 0)
|
||||
utf8_big_list_size = 256;
|
||||
else if (utf8_big_list_size > 0x7fffff)
|
||||
utf8_big_list_size = 0xffffff;
|
||||
else
|
||||
utf8_big_list_size *= 2;
|
||||
utf8_big_list = xreallocarray(utf8_big_list, utf8_big_list_size,
|
||||
sizeof *utf8_big_list);
|
||||
}
|
||||
*index = utf8_big_list_used++;
|
||||
|
||||
bi = &utf8_big_list[*index];
|
||||
bi->index = *index;
|
||||
memcpy(bi->data, data, size);
|
||||
bi->size = size;
|
||||
RB_INSERT(utf8_big_tree, &utf8_big_tree, bi);
|
||||
|
||||
log_debug("%s: added %.*s at %u", __func__, (int)size, data, *index);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Get UTF-8 as index into buffer. */
|
||||
uint32_t
|
||||
utf8_map_big(const struct utf8_data *ud)
|
||||
{
|
||||
union utf8_big_map m = { .value = 0 };
|
||||
u_int o;
|
||||
const char *data = ud->data;
|
||||
size_t size = ud->size;
|
||||
|
||||
if (ud->width != 1 && ud->width != 2)
|
||||
return (utf8_big_space1.value);
|
||||
|
||||
if (size > UTF8_BIG_SIZE)
|
||||
goto fail;
|
||||
if (size == 1)
|
||||
return (utf8_set_big(data[0], 1));
|
||||
|
||||
m.flags = size;
|
||||
if (ud->width == 2)
|
||||
m.flags |= UTF8_BIG_WIDTH2;
|
||||
|
||||
if (size <= 3) {
|
||||
memcpy(&m.data, data, size);
|
||||
return (m.value);
|
||||
}
|
||||
|
||||
if (utf8_put_big_item(data, size, &o) != 0)
|
||||
goto fail;
|
||||
m.data[0] = (o & 0xff);
|
||||
m.data[1] = (o >> 8) & 0xff;
|
||||
m.data[2] = (o >> 16);
|
||||
return (m.value);
|
||||
|
||||
fail:
|
||||
if (ud->width == 1)
|
||||
return (utf8_big_space1.value);
|
||||
return (utf8_big_space2.value);
|
||||
}
|
||||
|
||||
/* Get UTF-8 from index into buffer. */
|
||||
void
|
||||
utf8_get_big(uint32_t v, struct utf8_data *ud)
|
||||
{
|
||||
union utf8_big_map m = { .value = v };
|
||||
struct utf8_big_item *bi;
|
||||
u_int o;
|
||||
|
||||
memset(ud, 0, sizeof *ud);
|
||||
ud->size = ud->have = (m.flags & UTF8_BIG_SIZE);
|
||||
if (m.flags & UTF8_BIG_WIDTH2)
|
||||
ud->width = 2;
|
||||
else
|
||||
ud->width = 1;
|
||||
|
||||
if (ud->size <= 3) {
|
||||
memcpy(ud->data, m.data, ud->size);
|
||||
return;
|
||||
}
|
||||
|
||||
o = ((uint32_t)m.data[2] << 16)|((uint32_t)m.data[1] << 8)|m.data[0];
|
||||
if (o >= utf8_big_list_used)
|
||||
memset(ud->data, ' ', ud->size);
|
||||
else {
|
||||
bi = &utf8_big_list[o];
|
||||
memcpy(ud->data, bi->data, ud->size);
|
||||
}
|
||||
}
|
||||
|
||||
/* Get big value for UTF-8 single character. */
|
||||
uint32_t
|
||||
utf8_set_big(char c, u_int width)
|
||||
{
|
||||
union utf8_big_map m = { .flags = 1, .data[0] = c };
|
||||
|
||||
if (width == 2)
|
||||
m.flags |= UTF8_BIG_WIDTH2;
|
||||
return (m.value);
|
||||
}
|
||||
|
||||
/* Set a single character. */
|
||||
void
|
||||
utf8_set(struct utf8_data *ud, u_char ch)
|
||||
|
@ -2551,23 +2551,33 @@ window_copy_search_rl_regex(struct grid *gd, u_int *ppx, u_int *psx, u_int py,
|
||||
}
|
||||
|
||||
static const char *
|
||||
window_copy_cellstring(const struct grid_line *gl, u_int px, size_t *size)
|
||||
window_copy_cellstring(const struct grid_line *gl, u_int px, size_t *size,
|
||||
int *allocated)
|
||||
{
|
||||
static struct utf8_data ud;
|
||||
struct grid_cell_entry *gce;
|
||||
char *copy;
|
||||
|
||||
if (px >= gl->cellsize) {
|
||||
*size = 1;
|
||||
*allocated = 0;
|
||||
return (" ");
|
||||
}
|
||||
|
||||
gce = &gl->celldata[px];
|
||||
if (~gce->flags & GRID_FLAG_EXTENDED) {
|
||||
*size = 1;
|
||||
*allocated = 0;
|
||||
return (&gce->data.data);
|
||||
}
|
||||
|
||||
*size = gl->extddata[gce->offset].data.size;
|
||||
return (gl->extddata[gce->offset].data.data);
|
||||
utf8_get_big(gl->extddata[gce->offset].data, &ud);
|
||||
*size = ud.size;
|
||||
*allocated = 1;
|
||||
|
||||
copy = xmalloc(ud.size);
|
||||
memcpy(copy, ud.data, ud.size);
|
||||
return (copy);
|
||||
}
|
||||
|
||||
/* Find last match in given range. */
|
||||
@ -2630,6 +2640,7 @@ window_copy_stringify(struct grid *gd, u_int py, u_int first, u_int last,
|
||||
const struct grid_line *gl;
|
||||
const char *d;
|
||||
size_t bufsize = 1024, dlen;
|
||||
int allocated;
|
||||
|
||||
while (bufsize < newsize)
|
||||
bufsize *= 2;
|
||||
@ -2638,7 +2649,7 @@ window_copy_stringify(struct grid *gd, u_int py, u_int first, u_int last,
|
||||
gl = grid_peek_line(gd, py);
|
||||
bx = *size - 1;
|
||||
for (ax = first; ax < last; ax++) {
|
||||
d = window_copy_cellstring(gl, ax, &dlen);
|
||||
d = window_copy_cellstring(gl, ax, &dlen, &allocated);
|
||||
newsize += dlen;
|
||||
while (bufsize < newsize) {
|
||||
bufsize *= 2;
|
||||
@ -2650,6 +2661,8 @@ window_copy_stringify(struct grid *gd, u_int py, u_int first, u_int last,
|
||||
memcpy(buf + bx, d, dlen);
|
||||
bx += dlen;
|
||||
}
|
||||
if (allocated)
|
||||
free((void *)d);
|
||||
}
|
||||
buf[newsize - 1] = '\0';
|
||||
|
||||
@ -2670,6 +2683,7 @@ window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy,
|
||||
struct {
|
||||
const char *d;
|
||||
size_t dlen;
|
||||
int allocated;
|
||||
} *cells;
|
||||
|
||||
/* Populate the array of cell data. */
|
||||
@ -2680,7 +2694,7 @@ window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy,
|
||||
gl = grid_peek_line(gd, pywrap);
|
||||
while (cell < ncells) {
|
||||
cells[cell].d = window_copy_cellstring(gl, px,
|
||||
&cells[cell].dlen);
|
||||
&cells[cell].dlen, &cells[cell].allocated);
|
||||
cell++;
|
||||
px++;
|
||||
if (px == gd->sx) {
|
||||
@ -2738,6 +2752,10 @@ window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy,
|
||||
*ppy = pywrap;
|
||||
|
||||
/* Free cell data. */
|
||||
for (cell = 0; cell < ncells; cell++) {
|
||||
if (cells[cell].allocated)
|
||||
free((void *)cells[cell].d);
|
||||
}
|
||||
free(cells);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user