Rewrite combined character handling to be more consistent and to support

newer Unicode combined characters (which we have to "know" are combined
since they are not width zero). GitHub issue 3600.
This commit is contained in:
nicm 2023-09-01 14:29:11 +00:00
parent c41d59f232
commit 9456258ccc
7 changed files with 1197 additions and 36 deletions

View File

@ -121,6 +121,7 @@ SRCS= alerts.c \
tty-term.c \
tty.c \
utf8.c \
utf8-combined.c \
window-buffer.c \
window-client.c \
window-clock.c \

View File

@ -1840,46 +1840,37 @@ screen_write_cell(struct screen_write_ctx *ctx, const struct grid_cell *gc)
{
struct screen *s = ctx->s;
struct grid *gd = s->grid;
const struct utf8_data *ud = &gc->data;
const struct utf8_data zwj = { "\342\200\215", 0, 3, 0 };
struct grid_cell copy;
const struct utf8_data *ud = &gc->data, *previous = NULL, *combine;
struct grid_line *gl;
struct grid_cell_entry *gce;
struct grid_cell tmp_gc, now_gc;
struct tty_ctx ttyctx;
u_int sx = screen_size_x(s), sy = screen_size_y(s);
u_int width = gc->data.width, xx, last, cx, cy;
u_int width = ud->width, xx, last, cx, cy;
int selected, skip = 1;
/* Ignore padding cells. */
if (gc->flags & GRID_FLAG_PADDING)
return;
/*
* If this is a zero width joiner, set the flag so the next character
* will be treated as zero width and appended. Note that we assume a
* ZWJ will not change the width - the width of the first character is
* used.
*/
if (ud->size == 3 && memcmp(ud->data, "\342\200\215", 3) == 0) {
log_debug("zero width joiner at %u,%u", s->cx, s->cy);
ctx->flags |= SCREEN_WRITE_ZWJ;
/* Check if this cell needs to be combined with the previous cell. */
if (ctx->flags & SCREEN_WRITE_COMBINE)
previous = &ctx->previous;
switch (utf8_try_combined(ud, previous, &combine, &width)) {
case UTF8_DISCARD_NOW:
log_debug("%s: UTF8_DISCARD_NOW (width %u)", __func__, width);
ctx->flags &= ~SCREEN_WRITE_COMBINE;
return;
}
/*
* If the width is zero, combine onto the previous character. We always
* combine with the cell to the left of the cursor position. In theory,
* the application could have moved the cursor somewhere else, but if
* they are silly enough to do that, who cares?
*/
if (ctx->flags & SCREEN_WRITE_ZWJ) {
case UTF8_WRITE_NOW:
log_debug("%s: UTF8_WRITE_NOW (width %u)", __func__, width);
ctx->flags &= ~SCREEN_WRITE_COMBINE;
break;
case UTF8_COMBINE_NOW:
log_debug("%s: UTF8_COMBINE_NOW (width %u)", __func__, width);
screen_write_collect_flush(ctx, 0, __func__);
screen_write_combine(ctx, &zwj, &xx, &cx);
}
if (width == 0 || (ctx->flags & SCREEN_WRITE_ZWJ)) {
ctx->flags &= ~SCREEN_WRITE_ZWJ;
screen_write_collect_flush(ctx, 0, __func__);
if ((gc = screen_write_combine(ctx, ud, &xx, &cx)) != NULL) {
gc = screen_write_combine(ctx, combine, &xx, &cx);
if (gc != NULL) {
cy = s->cy;
screen_write_set_cursor(ctx, xx, s->cy);
screen_write_initctx(ctx, &ttyctx, 0);
@ -1887,8 +1878,27 @@ screen_write_cell(struct screen_write_ctx *ctx, const struct grid_cell *gc)
tty_write(tty_cmd_cell, &ttyctx);
s->cx = cx; s->cy = cy;
}
ctx->flags &= ~SCREEN_WRITE_COMBINE;
return;
case UTF8_WRITE_MAYBE_COMBINE:
log_debug("%s: UTF8_WRITE_MAYBE_COMBINE (width %u)", __func__,
width);
utf8_copy(&ctx->previous, ud);
ctx->flags |= SCREEN_WRITE_COMBINE;
break;
case UTF8_DISCARD_MAYBE_COMBINE:
log_debug("%s: UTF8_DISCARD_MAYBE_COMBINE (width %u)", __func__,
width);
utf8_copy(&ctx->previous, ud);
ctx->flags |= SCREEN_WRITE_COMBINE;
return;
}
if (width != ud->width) {
memcpy(&copy, gc, sizeof copy);
copy.data.width = width;
gc = ©
}
ud = NULL;
/* Flush any existing scrolling. */
screen_write_collect_flush(ctx, 1, __func__);

View File

@ -205,6 +205,7 @@ server_start(struct tmuxproc *client, int flags, struct event_base *base,
fatal("pledge failed");
input_key_build();
utf8_build_combined();
RB_INIT(&windows);
RB_INIT(&all_window_panes);
TAILQ_INIT(&clients);

18
tmux.h
View File

@ -618,6 +618,15 @@ enum utf8_state {
UTF8_ERROR
};
/* UTF-8 combine state. */
enum utf8_combine_state {
UTF8_DISCARD_NOW, /* discard immediately */
UTF8_WRITE_NOW, /* do not combine, write immediately */
UTF8_COMBINE_NOW, /* combine immediately */
UTF8_WRITE_MAYBE_COMBINE, /* write but try to combine the next */
UTF8_DISCARD_MAYBE_COMBINE /* discard but try to combine the next */
};
/* Colour flags. */
#define COLOUR_FLAG_256 0x01000000
#define COLOUR_FLAG_RGB 0x02000000
@ -890,7 +899,7 @@ struct screen_write_ctx {
int flags;
#define SCREEN_WRITE_SYNC 0x1
#define SCREEN_WRITE_ZWJ 0x2
#define SCREEN_WRITE_COMBINE 0x2
screen_write_init_ctx_cb init_ctx_cb;
void *arg;
@ -898,6 +907,7 @@ struct screen_write_ctx {
struct screen_write_citem *item;
u_int scrolled;
u_int bg;
struct utf8_data previous;
};
/* Box border lines option. */
@ -3285,6 +3295,12 @@ char *utf8_padcstr(const char *, u_int);
char *utf8_rpadcstr(const char *, u_int);
int utf8_cstrhas(const char *, const struct utf8_data *);
/* utf8-combined.c */
void utf8_build_combined(void);
int utf8_try_combined(const struct utf8_data *,
const struct utf8_data *, const struct utf8_data **,
u_int *width);
/* procname.c */
char *get_proc_name(int, char *);
char *get_proc_cwd(int);

1135
utf8-combined.c Normal file

File diff suppressed because it is too large Load Diff

8
utf8.c
View File

@ -136,8 +136,8 @@ utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
goto fail;
if (ud->size <= 3) {
index = (((utf8_char)ud->data[2] << 16)|
((utf8_char)ud->data[1] << 8)|
((utf8_char)ud->data[0]));
((utf8_char)ud->data[1] << 8)|
((utf8_char)ud->data[0]));
} else if (utf8_put_item(ud->data, ud->size, &index) != 0)
goto fail;
*uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|index;
@ -226,9 +226,9 @@ utf8_width(struct utf8_data *ud, int *width)
case 0:
return (UTF8_ERROR);
}
log_debug("UTF-8 %.*s is %08X", (int)ud->size, ud->data, (u_int)wc);
log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)wc);
*width = wcwidth(wc);
log_debug("wcwidth(%08X) returned %d", (u_int)wc, *width);
log_debug("wcwidth(%05X) returned %d", (u_int)wc, *width);
if (*width < 0) {
/*
* C1 control characters are nonprintable, so they are always

View File

@ -3763,8 +3763,7 @@ window_copy_search(struct window_mode_entry *wme, int direction, int regex)
}
}
endline = gd->hsize + gd->sy - 1;
}
else {
} else {
window_copy_move_left(s, &fx, &fy, wrapflag);
endline = 0;
}
@ -3806,8 +3805,7 @@ window_copy_search(struct window_mode_entry *wme, int direction, int regex)
data->cy = fy - screen_hsize(data->backing) +
data-> oy;
}
}
else {
} else {
/*
* When searching backward, position the cursor at the
* beginning of the mark.