Handle wcwidth() and mbtowc() failures in better style and drop

characters where we can't find the width (wcwidth() fails) on input, the same as we drop invalid UTF-8. Suggested by schwarze@.
2025-12-22 07:16:06 +00:00 · 2016-03-02 15:36:02 +00:00
parent d980d965dd
commit b8a102d26f
5 changed files with 48 additions and 21 deletions
--- a/input.c
+++ b/input.c
@@ -1960,8 +1960,14 @@ input_utf8_close(struct input_ctx *ictx)
 {
 	struct utf8_data	*ud = &ictx->utf8data;
-	if (utf8_append(ud, ictx->ch) != UTF8_DONE)
+	if (utf8_append(ud, ictx->ch) != UTF8_DONE) {
-		fatalx("UTF-8 close invalid %#x", ictx->ch);
+		/*
 		 * An error here could be invalid UTF-8 or it could be a
 		 * nonprintable character for which we can't get the
 		 * width. Drop it.
 		 */
 		return (0);
 	}
 	log_debug("%s %hhu '%*s' (width %hhu)", __func__, ud->size,
 	    (int)ud->size, ud->data, ud->width);
--- a/key-string.c
+++ b/key-string.c
@@ -149,6 +149,7 @@ key_string_lookup_string(const char *string)
 	struct utf8_data	 ud;
 	u_int			 i;
 	enum utf8_state		 more;
 	wchar_t			 wc;
 	/* Is this no key? */
 	if (strcasecmp(string, "None") == 0)
@@ -185,8 +186,9 @@ key_string_lookup_string(const char *string)
 				more = utf8_append(&ud, (u_char)string[i]);
 			if (more != UTF8_DONE)
 				return (KEYC_UNKNOWN);
-			key = utf8_combine(&ud);
+			if (utf8_combine(&ud, &wc) != UTF8_DONE)
-			return (key | modifiers);
+				return (KEYC_UNKNOWN);
 			return (wc | modifiers);
 		}
 		/* Otherwise look the key up in the table. */
--- a/tmux.h
+++ b/tmux.h
@@ -2316,8 +2316,7 @@ void		 utf8_set(struct utf8_data *, u_char);
 void		 utf8_copy(struct utf8_data *, const struct utf8_data *);
 enum utf8_state	 utf8_open(struct utf8_data *, u_char);
 enum utf8_state	 utf8_append(struct utf8_data *, u_char);
-u_int		 utf8_width(wchar_t);
+enum utf8_state	 utf8_combine(const struct utf8_data *, wchar_t *);
 wchar_t		 utf8_combine(const struct utf8_data *);
 enum utf8_state	 utf8_split(wchar_t, struct utf8_data *);
 int		 utf8_strvis(char *, const char *, size_t, int);
 char		*utf8_sanitize(const char *);
--- a/tty-keys.c
+++ b/tty-keys.c
@@ -477,6 +477,7 @@ tty_keys_next(struct tty *tty)
 	struct utf8_data	 ud;
 	enum utf8_state		 more;
 	u_int			 i;
 	wchar_t			 wc;
 	/* Get key buffer. */
 	buf = EVBUFFER_DATA(tty->event->input);
@@ -552,7 +553,11 @@ first_key:
 			more = utf8_append(&ud, (u_char)buf[i]);
 		if (more != UTF8_DONE)
 			goto discard_key;
-		key = utf8_combine(&ud);
+
 		if (utf8_combine(&ud, &wc) != UTF8_DONE)
 			goto discard_key;
 		key = wc;
 		log_debug("UTF-8 key %.*s %#llx", (int)size, buf, key);
 		goto complete_key;
 	}
--- a/utf8.c
+++ b/utf8.c
@@ -25,6 +25,8 @@
 #include "tmux.h"
 static int	utf8_width(wchar_t);
 /* Set a single character. */
 void
 utf8_set(struct utf8_data *ud, u_char ch)
@@ -80,6 +82,9 @@ utf8_open(struct utf8_data *ud, u_char ch)
 enum utf8_state
 utf8_append(struct utf8_data *ud, u_char ch)
 {
 	wchar_t	wc;
 	int	width;
 	if (ud->have >= ud->size)
 		fatalx("UTF-8 character overflow");
 	if (ud->size > sizeof ud->data)
@@ -94,39 +99,49 @@ utf8_append(struct utf8_data *ud, u_char ch)
 	if (ud->width == 0xff)
 		return (UTF8_ERROR);
-	ud->width = utf8_width(utf8_combine(ud));
+
 	if (utf8_combine(ud, &wc) != UTF8_DONE)
 		return (UTF8_ERROR);
 	if ((width = utf8_width(wc)) < 0)
 		return (UTF8_ERROR);
 	ud->width = width;
 	return (UTF8_DONE);
 }
 /* Get width of Unicode character. */
-u_int
+static int
 utf8_width(wchar_t wc)
 {
-	int width;
+	int	width;
 	width = wcwidth(wc);
-	if (width < 0)
+	if (width < 0 || width > 0xff)
-		return (0);
+		return (-1);
 	return (width);
 }
 /* Combine UTF-8 into Unicode. */
-wchar_t
+enum utf8_state
-utf8_combine(const struct utf8_data *ud)
+utf8_combine(const struct utf8_data *ud, wchar_t *wc)
 {
-	wchar_t wc;
+	switch (mbtowc(wc, ud->data, ud->size)) {
-
+	case -1:
-	if (mbtowc(&wc, ud->data, ud->size) <= 0)
+		mbtowc(NULL, NULL, MB_CUR_MAX);
-		return (0xfffd);
+		return (UTF8_ERROR);
-	return (wc);
+	case 0:
 		return (UTF8_ERROR);
 	default:
 		return (UTF8_DONE);
 	}
 }
 /* Split Unicode into UTF-8. */
 enum utf8_state
 utf8_split(wchar_t wc, struct utf8_data *ud)
 {
-	char s[MB_CUR_MAX];
+	char	s[MB_LEN_MAX];
-	int  slen;
+	int	slen;
 	slen = wctomb(s, wc);
 	if (slen <= 0 || slen > (int)sizeof ud->data)