Handle regional indicators and emoji modifiers in a better way, GitHub

issue 3998.
This commit is contained in:
nicm
2025-11-01 16:44:24 +00:00
parent 8cb2805eb7
commit d90b414223
4 changed files with 117 additions and 68 deletions

View File

@@ -2050,11 +2050,9 @@ screen_write_combine(struct screen_write_ctx *ctx, const struct grid_cell *gc)
case HANGULJAMO_STATE_COMPOSABLE: case HANGULJAMO_STATE_COMPOSABLE:
break; break;
case HANGULJAMO_STATE_NOT_HANGULJAMO: case HANGULJAMO_STATE_NOT_HANGULJAMO:
if (utf8_is_modifier(ud)) { if (utf8_should_combine(&last.data, ud))
if (last.data.size < 2)
return (0);
force_wide = 1; force_wide = 1;
} else if (!utf8_has_zwj(&last.data)) else if (!utf8_has_zwj(&last.data))
return (0); return (0);
break; break;
} }

3
tmux.h
View File

@@ -3499,7 +3499,8 @@ int utf8_cstrhas(const char *, const struct utf8_data *);
int utf8_has_zwj(const struct utf8_data *); int utf8_has_zwj(const struct utf8_data *);
int utf8_is_zwj(const struct utf8_data *); int utf8_is_zwj(const struct utf8_data *);
int utf8_is_vs(const struct utf8_data *); int utf8_is_vs(const struct utf8_data *);
int utf8_is_modifier(const struct utf8_data *); int utf8_should_combine(const struct utf8_data *,
const struct utf8_data *);
enum hanguljamo_state hanguljamo_check_state(const struct utf8_data *, enum hanguljamo_state hanguljamo_check_state(const struct utf8_data *,
const struct utf8_data *); const struct utf8_data *);

View File

@@ -73,49 +73,99 @@ utf8_is_vs(const struct utf8_data *ud)
return (memcmp(ud->data, "\357\270\217", 3) == 0); return (memcmp(ud->data, "\357\270\217", 3) == 0);
} }
/* Is this in the modifier table? */ /* Should these two characters combine? */
int int
utf8_is_modifier(const struct utf8_data *ud) utf8_should_combine(const struct utf8_data *with, const struct utf8_data *add)
{ {
wchar_t wc; wchar_t w, a;
if (utf8_towc(ud, &wc) != UTF8_DONE) if (utf8_towc(with, &w) != UTF8_DONE)
return (0); return (0);
switch (wc) { if (utf8_towc(add, &a) != UTF8_DONE)
case 0x1F1E6: return (0);
case 0x1F1E7:
case 0x1F1E8: /* Regional indicators. */
case 0x1F1E9: if ((a >= 0x1F1E6 && a <= 0x1F1FF) && (w >= 0x1F1E6 && w <= 0x1F1FF))
case 0x1F1EA:
case 0x1F1EB:
case 0x1F1EC:
case 0x1F1ED:
case 0x1F1EE:
case 0x1F1EF:
case 0x1F1F0:
case 0x1F1F1:
case 0x1F1F2:
case 0x1F1F3:
case 0x1F1F4:
case 0x1F1F5:
case 0x1F1F6:
case 0x1F1F7:
case 0x1F1F8:
case 0x1F1F9:
case 0x1F1FA:
case 0x1F1FB:
case 0x1F1FC:
case 0x1F1FD:
case 0x1F1FE:
case 0x1F1FF:
case 0x1F3FB:
case 0x1F3FC:
case 0x1F3FD:
case 0x1F3FE:
case 0x1F3FF:
return (1); return (1);
/* Emoji skin tone modifiers. */
switch (a) {
case 0x1F44B:
case 0x1F44C:
case 0x1F44D:
case 0x1F44E:
case 0x1F44F:
case 0x1F450:
case 0x1F466:
case 0x1F467:
case 0x1F468:
case 0x1F469:
case 0x1F46E:
case 0x1F470:
case 0x1F471:
case 0x1F472:
case 0x1F473:
case 0x1F474:
case 0x1F475:
case 0x1F476:
case 0x1F477:
case 0x1F478:
case 0x1F47C:
case 0x1F481:
case 0x1F482:
case 0x1F485:
case 0x1F486:
case 0x1F487:
case 0x1F4AA:
case 0x1F575:
case 0x1F57A:
case 0x1F590:
case 0x1F595:
case 0x1F596:
case 0x1F645:
case 0x1F646:
case 0x1F647:
case 0x1F64B:
case 0x1F64C:
case 0x1F64D:
case 0x1F64E:
case 0x1F64F:
case 0x1F6B4:
case 0x1F6B5:
case 0x1F6B6:
case 0x1F926:
case 0x1F937:
case 0x1F938:
case 0x1F939:
case 0x1F93D:
case 0x1F93E:
case 0x1F9B5:
case 0x1F9B6:
case 0x1F9B8:
case 0x1F9B9:
case 0x1F9CD:
case 0x1F9CE:
case 0x1F9CF:
case 0x1F9D1:
case 0x1F9D2:
case 0x1F9D3:
case 0x1F9D4:
case 0x1F9D5:
case 0x1F9D6:
case 0x1F9D7:
case 0x1F9D8:
case 0x1F9D9:
case 0x1F9DA:
case 0x1F9DB:
case 0x1F9DC:
case 0x1F9DD:
case 0x1F9DE:
case 0x1F9DF:
if (w >= 0x1F3FB && w <= 0x1F3FF)
return (1);
break;
} }
return (0); return 0;
} }
static enum hanguljamo_subclass static enum hanguljamo_subclass

52
utf8.c
View File

@@ -56,32 +56,32 @@ static struct utf8_width_item utf8_default_width_cache[] = {
{ .wc = 0x0270B, .width = 2 }, { .wc = 0x0270B, .width = 2 },
{ .wc = 0x0270C, .width = 2 }, { .wc = 0x0270C, .width = 2 },
{ .wc = 0x0270D, .width = 2 }, { .wc = 0x0270D, .width = 2 },
{ .wc = 0x1F1E6, .width = 2 }, { .wc = 0x1F1E6, .width = 1 },
{ .wc = 0x1F1E7, .width = 2 }, { .wc = 0x1F1E7, .width = 1 },
{ .wc = 0x1F1E8, .width = 2 }, { .wc = 0x1F1E8, .width = 1 },
{ .wc = 0x1F1E9, .width = 2 }, { .wc = 0x1F1E9, .width = 1 },
{ .wc = 0x1F1EA, .width = 2 }, { .wc = 0x1F1EA, .width = 1 },
{ .wc = 0x1F1EB, .width = 2 }, { .wc = 0x1F1EB, .width = 1 },
{ .wc = 0x1F1EC, .width = 2 }, { .wc = 0x1F1EC, .width = 1 },
{ .wc = 0x1F1ED, .width = 2 }, { .wc = 0x1F1ED, .width = 1 },
{ .wc = 0x1F1EE, .width = 2 }, { .wc = 0x1F1EE, .width = 1 },
{ .wc = 0x1F1EF, .width = 2 }, { .wc = 0x1F1EF, .width = 1 },
{ .wc = 0x1F1F0, .width = 2 }, { .wc = 0x1F1F0, .width = 1 },
{ .wc = 0x1F1F1, .width = 2 }, { .wc = 0x1F1F1, .width = 1 },
{ .wc = 0x1F1F2, .width = 2 }, { .wc = 0x1F1F2, .width = 1 },
{ .wc = 0x1F1F3, .width = 2 }, { .wc = 0x1F1F3, .width = 1 },
{ .wc = 0x1F1F4, .width = 2 }, { .wc = 0x1F1F4, .width = 1 },
{ .wc = 0x1F1F5, .width = 2 }, { .wc = 0x1F1F5, .width = 1 },
{ .wc = 0x1F1F6, .width = 2 }, { .wc = 0x1F1F6, .width = 1 },
{ .wc = 0x1F1F7, .width = 2 }, { .wc = 0x1F1F7, .width = 1 },
{ .wc = 0x1F1F8, .width = 2 }, { .wc = 0x1F1F8, .width = 1 },
{ .wc = 0x1F1F9, .width = 2 }, { .wc = 0x1F1F9, .width = 1 },
{ .wc = 0x1F1FA, .width = 2 }, { .wc = 0x1F1FA, .width = 1 },
{ .wc = 0x1F1FB, .width = 2 }, { .wc = 0x1F1FB, .width = 1 },
{ .wc = 0x1F1FC, .width = 2 }, { .wc = 0x1F1FC, .width = 1 },
{ .wc = 0x1F1FD, .width = 2 }, { .wc = 0x1F1FD, .width = 1 },
{ .wc = 0x1F1FE, .width = 2 }, { .wc = 0x1F1FE, .width = 1 },
{ .wc = 0x1F1FF, .width = 2 }, { .wc = 0x1F1FF, .width = 1 },
{ .wc = 0x1F385, .width = 2 }, { .wc = 0x1F385, .width = 2 },
{ .wc = 0x1F3C2, .width = 2 }, { .wc = 0x1F3C2, .width = 2 },
{ .wc = 0x1F3C3, .width = 2 }, { .wc = 0x1F3C3, .width = 2 },