From 2a32565e0c882c9e78ef9c7d52476c3574331f62 Mon Sep 17 00:00:00 2001 From: Nicholas Marriott Date: Sun, 8 Jan 2023 22:15:38 +0000 Subject: [PATCH] Restore code to handle wcwidth failure so that unknown codepoints still do the most likely right thing. GitHub issue 3427, patch based on an diff from Jesse Luehrs in GitHub issue 3003. --- utf8.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/utf8.c b/utf8.c index 05ab9cfe..042ddf89 100644 --- a/utf8.c +++ b/utf8.c @@ -229,13 +229,21 @@ utf8_width(struct utf8_data *ud, int *width) case 0: return (UTF8_ERROR); } + log_debug("UTF-8 %.*s is %08X", (int)ud->size, ud->data, (u_int)wc); #ifdef HAVE_UTF8PROC *width = utf8proc_wcwidth(wc); + log_debug("utf8proc_wcwidth(%08X) returned %d", (u_int)wc, *width); #else *width = wcwidth(wc); + log_debug("wcwidth(%08X) returned %d", (u_int)wc, *width); + if (*width < 0) { + /* + * C1 control characters are nonprintable, so they are always + * zero width. + */ + *width = (wc >= 0x80 && wc <= 0x9f) ? 0 : 1; + } #endif - log_debug("UTF-8 %.*s %#x, wcwidth() %d", (int)ud->size, ud->data, - (u_int)wc, *width); if (*width >= 0 && *width <= 0xff) return (UTF8_DONE); return (UTF8_ERROR);