Rewrite combined character handling to be more consistent and to support

newer Unicode combined characters (which we have to "know" are combined
since they are not width zero). GitHub issue 3600.
This commit is contained in:
nicm 2023-09-01 14:29:11 +00:00
parent c41d59f232
commit 9456258ccc
7 changed files with 1197 additions and 36 deletions

View File

@ -121,6 +121,7 @@ SRCS= alerts.c \
tty-term.c \ tty-term.c \
tty.c \ tty.c \
utf8.c \ utf8.c \
utf8-combined.c \
window-buffer.c \ window-buffer.c \
window-client.c \ window-client.c \
window-clock.c \ window-clock.c \

View File

@ -1840,46 +1840,37 @@ screen_write_cell(struct screen_write_ctx *ctx, const struct grid_cell *gc)
{ {
struct screen *s = ctx->s; struct screen *s = ctx->s;
struct grid *gd = s->grid; struct grid *gd = s->grid;
const struct utf8_data *ud = &gc->data; struct grid_cell copy;
const struct utf8_data zwj = { "\342\200\215", 0, 3, 0 }; const struct utf8_data *ud = &gc->data, *previous = NULL, *combine;
struct grid_line *gl; struct grid_line *gl;
struct grid_cell_entry *gce; struct grid_cell_entry *gce;
struct grid_cell tmp_gc, now_gc; struct grid_cell tmp_gc, now_gc;
struct tty_ctx ttyctx; struct tty_ctx ttyctx;
u_int sx = screen_size_x(s), sy = screen_size_y(s); u_int sx = screen_size_x(s), sy = screen_size_y(s);
u_int width = gc->data.width, xx, last, cx, cy; u_int width = ud->width, xx, last, cx, cy;
int selected, skip = 1; int selected, skip = 1;
/* Ignore padding cells. */ /* Ignore padding cells. */
if (gc->flags & GRID_FLAG_PADDING) if (gc->flags & GRID_FLAG_PADDING)
return; return;
/* /* Check if this cell needs to be combined with the previous cell. */
* If this is a zero width joiner, set the flag so the next character if (ctx->flags & SCREEN_WRITE_COMBINE)
* will be treated as zero width and appended. Note that we assume a previous = &ctx->previous;
* ZWJ will not change the width - the width of the first character is switch (utf8_try_combined(ud, previous, &combine, &width)) {
* used. case UTF8_DISCARD_NOW:
*/ log_debug("%s: UTF8_DISCARD_NOW (width %u)", __func__, width);
if (ud->size == 3 && memcmp(ud->data, "\342\200\215", 3) == 0) { ctx->flags &= ~SCREEN_WRITE_COMBINE;
log_debug("zero width joiner at %u,%u", s->cx, s->cy);
ctx->flags |= SCREEN_WRITE_ZWJ;
return; return;
} case UTF8_WRITE_NOW:
log_debug("%s: UTF8_WRITE_NOW (width %u)", __func__, width);
/* ctx->flags &= ~SCREEN_WRITE_COMBINE;
* If the width is zero, combine onto the previous character. We always break;
* combine with the cell to the left of the cursor position. In theory, case UTF8_COMBINE_NOW:
* the application could have moved the cursor somewhere else, but if log_debug("%s: UTF8_COMBINE_NOW (width %u)", __func__, width);
* they are silly enough to do that, who cares?
*/
if (ctx->flags & SCREEN_WRITE_ZWJ) {
screen_write_collect_flush(ctx, 0, __func__); screen_write_collect_flush(ctx, 0, __func__);
screen_write_combine(ctx, &zwj, &xx, &cx); gc = screen_write_combine(ctx, combine, &xx, &cx);
} if (gc != NULL) {
if (width == 0 || (ctx->flags & SCREEN_WRITE_ZWJ)) {
ctx->flags &= ~SCREEN_WRITE_ZWJ;
screen_write_collect_flush(ctx, 0, __func__);
if ((gc = screen_write_combine(ctx, ud, &xx, &cx)) != NULL) {
cy = s->cy; cy = s->cy;
screen_write_set_cursor(ctx, xx, s->cy); screen_write_set_cursor(ctx, xx, s->cy);
screen_write_initctx(ctx, &ttyctx, 0); screen_write_initctx(ctx, &ttyctx, 0);
@ -1887,8 +1878,27 @@ screen_write_cell(struct screen_write_ctx *ctx, const struct grid_cell *gc)
tty_write(tty_cmd_cell, &ttyctx); tty_write(tty_cmd_cell, &ttyctx);
s->cx = cx; s->cy = cy; s->cx = cx; s->cy = cy;
} }
ctx->flags &= ~SCREEN_WRITE_COMBINE;
return;
case UTF8_WRITE_MAYBE_COMBINE:
log_debug("%s: UTF8_WRITE_MAYBE_COMBINE (width %u)", __func__,
width);
utf8_copy(&ctx->previous, ud);
ctx->flags |= SCREEN_WRITE_COMBINE;
break;
case UTF8_DISCARD_MAYBE_COMBINE:
log_debug("%s: UTF8_DISCARD_MAYBE_COMBINE (width %u)", __func__,
width);
utf8_copy(&ctx->previous, ud);
ctx->flags |= SCREEN_WRITE_COMBINE;
return; return;
} }
if (width != ud->width) {
memcpy(&copy, gc, sizeof copy);
copy.data.width = width;
gc = ©
}
ud = NULL;
/* Flush any existing scrolling. */ /* Flush any existing scrolling. */
screen_write_collect_flush(ctx, 1, __func__); screen_write_collect_flush(ctx, 1, __func__);

View File

@ -205,6 +205,7 @@ server_start(struct tmuxproc *client, int flags, struct event_base *base,
fatal("pledge failed"); fatal("pledge failed");
input_key_build(); input_key_build();
utf8_build_combined();
RB_INIT(&windows); RB_INIT(&windows);
RB_INIT(&all_window_panes); RB_INIT(&all_window_panes);
TAILQ_INIT(&clients); TAILQ_INIT(&clients);

18
tmux.h
View File

@ -618,6 +618,15 @@ enum utf8_state {
UTF8_ERROR UTF8_ERROR
}; };
/* UTF-8 combine state. */
enum utf8_combine_state {
UTF8_DISCARD_NOW, /* discard immediately */
UTF8_WRITE_NOW, /* do not combine, write immediately */
UTF8_COMBINE_NOW, /* combine immediately */
UTF8_WRITE_MAYBE_COMBINE, /* write but try to combine the next */
UTF8_DISCARD_MAYBE_COMBINE /* discard but try to combine the next */
};
/* Colour flags. */ /* Colour flags. */
#define COLOUR_FLAG_256 0x01000000 #define COLOUR_FLAG_256 0x01000000
#define COLOUR_FLAG_RGB 0x02000000 #define COLOUR_FLAG_RGB 0x02000000
@ -890,7 +899,7 @@ struct screen_write_ctx {
int flags; int flags;
#define SCREEN_WRITE_SYNC 0x1 #define SCREEN_WRITE_SYNC 0x1
#define SCREEN_WRITE_ZWJ 0x2 #define SCREEN_WRITE_COMBINE 0x2
screen_write_init_ctx_cb init_ctx_cb; screen_write_init_ctx_cb init_ctx_cb;
void *arg; void *arg;
@ -898,6 +907,7 @@ struct screen_write_ctx {
struct screen_write_citem *item; struct screen_write_citem *item;
u_int scrolled; u_int scrolled;
u_int bg; u_int bg;
struct utf8_data previous;
}; };
/* Box border lines option. */ /* Box border lines option. */
@ -3285,6 +3295,12 @@ char *utf8_padcstr(const char *, u_int);
char *utf8_rpadcstr(const char *, u_int); char *utf8_rpadcstr(const char *, u_int);
int utf8_cstrhas(const char *, const struct utf8_data *); int utf8_cstrhas(const char *, const struct utf8_data *);
/* utf8-combined.c */
void utf8_build_combined(void);
int utf8_try_combined(const struct utf8_data *,
const struct utf8_data *, const struct utf8_data **,
u_int *width);
/* procname.c */ /* procname.c */
char *get_proc_name(int, char *); char *get_proc_name(int, char *);
char *get_proc_cwd(int); char *get_proc_cwd(int);

1135
utf8-combined.c Normal file

File diff suppressed because it is too large Load Diff

8
utf8.c
View File

@ -136,8 +136,8 @@ utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
goto fail; goto fail;
if (ud->size <= 3) { if (ud->size <= 3) {
index = (((utf8_char)ud->data[2] << 16)| index = (((utf8_char)ud->data[2] << 16)|
((utf8_char)ud->data[1] << 8)| ((utf8_char)ud->data[1] << 8)|
((utf8_char)ud->data[0])); ((utf8_char)ud->data[0]));
} else if (utf8_put_item(ud->data, ud->size, &index) != 0) } else if (utf8_put_item(ud->data, ud->size, &index) != 0)
goto fail; goto fail;
*uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|index; *uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|index;
@ -226,9 +226,9 @@ utf8_width(struct utf8_data *ud, int *width)
case 0: case 0:
return (UTF8_ERROR); return (UTF8_ERROR);
} }
log_debug("UTF-8 %.*s is %08X", (int)ud->size, ud->data, (u_int)wc); log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)wc);
*width = wcwidth(wc); *width = wcwidth(wc);
log_debug("wcwidth(%08X) returned %d", (u_int)wc, *width); log_debug("wcwidth(%05X) returned %d", (u_int)wc, *width);
if (*width < 0) { if (*width < 0) {
/* /*
* C1 control characters are nonprintable, so they are always * C1 control characters are nonprintable, so they are always

View File

@ -3763,8 +3763,7 @@ window_copy_search(struct window_mode_entry *wme, int direction, int regex)
} }
} }
endline = gd->hsize + gd->sy - 1; endline = gd->hsize + gd->sy - 1;
} } else {
else {
window_copy_move_left(s, &fx, &fy, wrapflag); window_copy_move_left(s, &fx, &fy, wrapflag);
endline = 0; endline = 0;
} }
@ -3806,8 +3805,7 @@ window_copy_search(struct window_mode_entry *wme, int direction, int regex)
data->cy = fy - screen_hsize(data->backing) + data->cy = fy - screen_hsize(data->backing) +
data-> oy; data-> oy;
} }
} } else {
else {
/* /*
* When searching backward, position the cursor at the * When searching backward, position the cursor at the
* beginning of the mark. * beginning of the mark.