From 3051076dd16643a683a860c8ff5a24b47559261e Mon Sep 17 00:00:00 2001 From: nicm Date: Mon, 3 Nov 2025 09:27:06 +0000 Subject: [PATCH] Ignore Hangul filler character. There doesn't seem to be much agreement on what to do with this but ignoring it seems rightand does improve things. GitHub issue 3998. --- screen-write.c | 4 ++++ tmux.h | 1 + utf8-combined.c | 13 +++++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/screen-write.c b/screen-write.c index 1014ae80..8e04681a 100644 --- a/screen-write.c +++ b/screen-write.c @@ -2006,6 +2006,10 @@ screen_write_combine(struct screen_write_ctx *ctx, const struct grid_cell *gc) struct tty_ctx ttyctx; int force_wide = 0, zero_width = 0; + /* Ignore U+3164 HANGUL_FILLER entirely. */ + if (utf8_is_hangul_filler(ud)) + return (1); + /* * Is this character which makes no sense without being combined? If * this is true then flag it here and discard the character (return 1) diff --git a/tmux.h b/tmux.h index aa25a408..356d1157 100644 --- a/tmux.h +++ b/tmux.h @@ -3499,6 +3499,7 @@ int utf8_cstrhas(const char *, const struct utf8_data *); int utf8_has_zwj(const struct utf8_data *); int utf8_is_zwj(const struct utf8_data *); int utf8_is_vs(const struct utf8_data *); +int utf8_is_hangul_filler(const struct utf8_data *); int utf8_should_combine(const struct utf8_data *, const struct utf8_data *); enum hanguljamo_state hanguljamo_check_state(const struct utf8_data *, diff --git a/utf8-combined.c b/utf8-combined.c index 91ddaf75..635ae92c 100644 --- a/utf8-combined.c +++ b/utf8-combined.c @@ -55,7 +55,7 @@ utf8_has_zwj(const struct utf8_data *ud) return (memcmp(ud->data + ud->size - 3, "\342\200\215", 3) == 0); } -/* Is this a zero width joiner? */ +/* Is this zero width joiner U+200D? */ int utf8_is_zwj(const struct utf8_data *ud) { @@ -64,7 +64,7 @@ utf8_is_zwj(const struct utf8_data *ud) return (memcmp(ud->data, "\342\200\215", 3) == 0); } -/* Is this a variation selector? */ +/* Is this variation selector U+FE0F? */ int utf8_is_vs(const struct utf8_data *ud) { @@ -73,6 +73,15 @@ utf8_is_vs(const struct utf8_data *ud) return (memcmp(ud->data, "\357\270\217", 3) == 0); } +/* Is this Hangul filler U+3164? */ +int +utf8_is_hangul_filler(const struct utf8_data *ud) +{ + if (ud->size != 3) + return (0); + return (memcmp(ud->data, "\343\205\244", 3) == 0); +} + /* Should these two characters combine? */ int utf8_should_combine(const struct utf8_data *with, const struct utf8_data *add)