From 3ae0d560bee1eb6ec8ccd89d87f5b0493c9f928c Mon Sep 17 00:00:00 2001 From: nicm Date: Tue, 12 Aug 2025 12:33:05 +0000 Subject: [PATCH] Replace invalid UTF-8 with the placeholder instead of ignoring them (this is what most terminals do), GitHub issue 4514. --- input.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/input.c b/input.c index 02bd5458..14aea52a 100644 --- a/input.c +++ b/input.c @@ -747,6 +747,20 @@ input_table_compare(const void *key, const void *value) return (strcmp(ictx->interm_buf, entry->interm)); } +/* Stop UTF-8 and enter an invalid character. */ +static void +input_stop_utf8(struct input_ctx *ictx) +{ + struct screen_write_ctx *sctx = &ictx->ctx; + static struct utf8_data rc = { "\357\277\275", 3, 3, 1 }; + + if (ictx->utf8started) { + utf8_copy(&ictx->cell.cell.data, &rc); + screen_write_collect_add(sctx, &ictx->cell.cell); + } + ictx->utf8started = 0; +} + /* * Timer - if this expires then have been waiting for a terminator for too * long, so reset to ground. @@ -1144,7 +1158,7 @@ input_print(struct input_ctx *ictx) struct screen_write_ctx *sctx = &ictx->ctx; int set; - ictx->utf8started = 0; /* can't be valid UTF-8 */ + input_stop_utf8(ictx); /* can't be valid UTF-8 */ set = ictx->cell.set == 0 ? ictx->cell.g0set : ictx->cell.g1set; if (set == 1) @@ -1224,7 +1238,7 @@ input_c0_dispatch(struct input_ctx *ictx) u_int width; int has_content = 0; - ictx->utf8started = 0; /* can't be valid UTF-8 */ + input_stop_utf8(ictx); /* can't be valid UTF-8 */ log_debug("%s: '%c'", __func__, ictx->ch); @@ -2551,9 +2565,9 @@ input_top_bit_set(struct input_ctx *ictx) ictx->flags &= ~INPUT_LAST; if (!ictx->utf8started) { - if (utf8_open(ud, ictx->ch) != UTF8_MORE) - return (0); ictx->utf8started = 1; + if (utf8_open(ud, ictx->ch) != UTF8_MORE) + input_stop_utf8(ictx); return (0); } @@ -2561,7 +2575,7 @@ input_top_bit_set(struct input_ctx *ictx) case UTF8_MORE: return (0); case UTF8_ERROR: - ictx->utf8started = 0; + input_stop_utf8(ictx); return (0); case UTF8_DONE: break;