From 7ced0a03d2ff51274d5fa5fb6eeaa6f4aac9f2f4 Mon Sep 17 00:00:00 2001 From: nicm Date: Sun, 8 Jan 2023 22:15:30 +0000 Subject: [PATCH] Restore code to handle wcwidth failure so that unknown codepoints still do the most likely right thing. GitHub issue 3427, patch based on an diff from Jesse Luehrs in GitHub issue 3003. --- utf8.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/utf8.c b/utf8.c index 03918cd2..3c6f88ff 100644 --- a/utf8.c +++ b/utf8.c @@ -226,9 +226,16 @@ utf8_width(struct utf8_data *ud, int *width) case 0: return (UTF8_ERROR); } + log_debug("UTF-8 %.*s is %08X", (int)ud->size, ud->data, (u_int)wc); *width = wcwidth(wc); - log_debug("UTF-8 %.*s %#x, wcwidth() %d", (int)ud->size, ud->data, - (u_int)wc, *width); + log_debug("wcwidth(%08X) returned %d", (u_int)wc, *width); + if (*width < 0) { + /* + * C1 control characters are nonprintable, so they are always + * zero width. + */ + *width = (wc >= 0x80 && wc <= 0x9f) ? 0 : 1; + } if (*width >= 0 && *width <= 0xff) return (UTF8_DONE); return (UTF8_ERROR);