Use the internal representation for UTF-8 keys instead of wchar_t and

drop some code only needed for that.
2026-01-14 10:40:23 +00:00 · 2020-05-25 18:57:24 +00:00
parent 35779d655d
commit 6f03e49e68
9 changed files with 48 additions and 80 deletions
--- a/cmd-parse.y
+++ b/cmd-parse.y
@@ -26,6 +26,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <wchar.h>

 #include "tmux.h"

@@ -1251,10 +1252,9 @@ error:
 static int
 yylex_token_escape(char **buf, size_t *len)
 {
-	int			 ch, type, o2, o3;
-	u_int			 size, i, tmp;
-	char			 s[9];
-	struct utf8_data	 ud;
+	int	 ch, type, o2, o3, mlen;
+	u_int	 size, i, tmp;
+	char	 s[9], m[MB_LEN_MAX];

 	ch = yylex_getc();

@@ -1339,11 +1339,12 @@ unicode:
 		yyerror("invalid \\%c argument", type);
 		return (0);
 	}
-	if (utf8_split(tmp, &ud) != UTF8_DONE) {
+	mlen = wctomb(m, tmp);
+	if (mlen <= 0 || mlen > (int)sizeof m) {
 		yyerror("invalid \\%c argument", type);
 		return (0);
 	}
-	yylex_append(buf, len, ud.data, ud.size);
+	yylex_append(buf, len, m, mlen);
 	return (1);
 }

--- a/cmd-send-keys.c
+++ b/cmd-send-keys.c
@@ -91,8 +91,8 @@ cmd_send_keys_inject_string(struct cmdq_item *item, struct cmdq_item *after,
    struct args *args, int i)
 {
 	const char		*s = args->argv[i];
-	struct utf8_data	*ud, *uc;
-	wchar_t			 wc;
+	struct utf8_data	*ud, *loop;
+	utf8_char		 uc;
 	key_code		 key;
 	char			*endptr;
 	long			 n;
@@ -117,10 +117,10 @@ cmd_send_keys_inject_string(struct cmdq_item *item, struct cmdq_item *after,
 	}
 	if (literal) {
 		ud = utf8_fromcstr(s);
-		for (uc = ud; uc->size != 0; uc++) {
-			if (utf8_combine(uc, &wc) != UTF8_DONE)
+		for (loop = ud; loop->size != 0; loop++) {
+			if (utf8_from_data(loop, &uc) != UTF8_DONE)
 				continue;
-			after = cmd_send_keys_inject_key(item, after, wc);
+			after = cmd_send_keys_inject_key(item, after, uc);
 		}
 		free(ud);
 	}
--- a/grid.c
+++ b/grid.c
@@ -76,7 +76,7 @@ grid_need_extended_cell(const struct grid_cell_entry *gce,
 		return (1);
 	if (gc->attr > 0xff)
 		return (1);
-	if (gc->data.size != 1 || gc->data.width != 1)
+	if (gc->data.size > 1 || gc->data.width > 1)
 		return (1);
 	if ((gc->fg & COLOUR_FLAG_RGB) || (gc->bg & COLOUR_FLAG_RGB))
 		return (1);
@@ -496,6 +496,7 @@ grid_get_cell1(struct grid_line *gl, u_int px, struct grid_cell *gc)
 			gc->fg = gee->fg;
 			gc->bg = gee->bg;
 			gc->us = gee->us;
+			log_debug("!!! %x", gc->flags);
 			utf8_to_data(gee->data, &gc->data);
 		}
 		return;
@@ -541,6 +542,7 @@ grid_set_cell(struct grid *gd, u_int px, u_int py, const struct grid_cell *gc)
 		gl->cellused = px + 1;

 	gce = &gl->celldata[px];
+	if (gc->flags & GRID_FLAG_PADDING) log_debug("!!! padding %d\n", grid_need_extended_cell(gce, gc));
 	if (grid_need_extended_cell(gce, gc))
 		grid_extended_cell(gl, gce, gc);
 	else
--- a/input-keys.c
+++ b/input-keys.c
@@ -469,10 +469,9 @@ input_key(struct screen *s, struct bufferevent *bev, key_code key)
 		return (0);
 	}
 	if (justkey > 0x7f && justkey < KEYC_BASE) {
-		if (utf8_split(justkey, &ud) != UTF8_DONE)
-			return (-1);
 		if (key & KEYC_META)
 			bufferevent_write(bev, "\033", 1);
+		utf8_to_data(justkey, &ud);
 		bufferevent_write(bev, ud.data, ud.size);
 		return (0);
 	}
--- a/key-string.c
+++ b/key-string.c
@@ -169,7 +169,7 @@ key_string_lookup_string(const char *string)
 	struct utf8_data	 ud;
 	u_int			 i;
 	enum utf8_state		 more;
-	wchar_t			 wc;
+	utf8_char		 uc;

 	/* Is this no key or any key? */
 	if (strcasecmp(string, "None") == 0)
@@ -210,9 +210,9 @@ key_string_lookup_string(const char *string)
 				more = utf8_append(&ud, (u_char)string[i]);
 			if (more != UTF8_DONE)
 				return (KEYC_UNKNOWN);
-			if (utf8_combine(&ud, &wc) != UTF8_DONE)
+			if (utf8_from_data(&ud, &uc) != UTF8_DONE)
 				return (KEYC_UNKNOWN);
-			return (wc|modifiers);
+			return (uc|modifiers);
 		}

 		/* Otherwise look the key up in the table. */
@@ -349,12 +349,11 @@ key_string_lookup_key(key_code key, int with_flags)

 	/* Is this a UTF-8 key? */
 	if (key > 127 && key < KEYC_BASE) {
-		if (utf8_split(key, &ud) == UTF8_DONE) {
-			off = strlen(out);
-			memcpy(out + off, ud.data, ud.size);
-			out[off + ud.size] = '\0';
-			goto out;
-		}
+		utf8_to_data(key, &ud);
+		off = strlen(out);
+		memcpy(out + off, ud.data, ud.size);
+		out[off + ud.size] = '\0';
+		goto out;
 	}

 	/* Invalid keys are errors. */
--- a/status.c
+++ b/status.c
@@ -1275,8 +1275,7 @@ process_key:
 append_key:
 	if (key <= 0x1f || key >= KEYC_BASE)
 		return (0);
-	if (utf8_split(key, &tmp) != UTF8_DONE)
-		return (0);
+	utf8_to_data(key, &tmp);

 	c->prompt_buffer = xreallocarray(c->prompt_buffer, size + 2,
 	    sizeof *c->prompt_buffer);
--- a/tmux.h
+++ b/tmux.h
@@ -30,7 +30,6 @@
 #include <stdint.h>
 #include <stdio.h>
 #include <termios.h>
-#include <wchar.h>

 #include "xmalloc.h"

@@ -2899,8 +2898,6 @@ void		 utf8_set(struct utf8_data *, u_char);
 void		 utf8_copy(struct utf8_data *, const struct utf8_data *);
 enum utf8_state	 utf8_open(struct utf8_data *, u_char);
 enum utf8_state	 utf8_append(struct utf8_data *, u_char);
-enum utf8_state	 utf8_combine(const struct utf8_data *, wchar_t *);
-enum utf8_state	 utf8_split(wchar_t, struct utf8_data *);
 int		 utf8_isvalid(const char *);
 int		 utf8_strvis(char *, const char *, size_t, int);
 int		 utf8_stravis(char **, const char *, int);
--- a/tty-keys.c
+++ b/tty-keys.c
@@ -578,8 +578,8 @@ tty_keys_next1(struct tty *tty, const char *buf, size_t len, key_code *key,
 	struct tty_key		*tk, *tk1;
 	struct utf8_data	 ud;
 	enum utf8_state		 more;
+	utf8_char		 uc;
 	u_int			 i;
-	wchar_t			 wc;

 	log_debug("%s: next key is %zu (%.*s) (expired=%d)", c->name, len,
 	    (int)len, buf, expired);
@@ -611,12 +611,12 @@ tty_keys_next1(struct tty *tty, const char *buf, size_t len, key_code *key,
 		if (more != UTF8_DONE)
 			return (-1);

-		if (utf8_combine(&ud, &wc) != UTF8_DONE)
+		if (utf8_from_data(&ud, &uc) != UTF8_DONE)
 			return (-1);
-		*key = wc;
+		*key = uc;

 		log_debug("%s: UTF-8 key %.*s %#llx", c->name, (int)ud.size,
-		    buf, *key);
+		    ud.data, *key);
 		return (0);
 	}

--- a/utf8.c
+++ b/utf8.c
@@ -230,17 +230,27 @@ utf8_copy(struct utf8_data *to, const struct utf8_data *from)
 }

 /* Get width of Unicode character. */
-static int
-utf8_width(wchar_t wc)
+static enum utf8_state
+utf8_width(struct utf8_data *ud, int *width)
 {
-	int	width;
+	wchar_t	wc;

-	width = wcwidth(wc);
-	if (width < 0 || width > 0xff) {
-		log_debug("Unicode %04lx, wcwidth() %d", (long)wc, width);
-		return (-1);
+	switch (mbtowc(&wc, ud->data, ud->size)) {
+	case -1:
+		log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data,
+		    errno);
+		mbtowc(NULL, NULL, MB_CUR_MAX);
+		return (UTF8_ERROR);
+	case 0:
+		return (UTF8_ERROR);
 	}
-	return (width);
+	*width = wcwidth(wc);
+	if (*width < 0 || *width > 0xff) {
+		log_debug("UTF-8 %.*s, wcwidth() %d", (int)ud->size, ud->data,
+		    *width);
+		return (UTF8_ERROR);
+	}
+	return (UTF8_DONE);
 }

 /*
@@ -270,7 +280,6 @@ utf8_open(struct utf8_data *ud, u_char ch)
 enum utf8_state
 utf8_append(struct utf8_data *ud, u_char ch)
 {
-	wchar_t	wc;
 	int	width;

 	if (ud->have >= ud->size)
@@ -287,51 +296,13 @@ utf8_append(struct utf8_data *ud, u_char ch)

 	if (ud->width == 0xff)
 		return (UTF8_ERROR);
-
-	if (utf8_combine(ud, &wc) != UTF8_DONE)
-		return (UTF8_ERROR);
-	if ((width = utf8_width(wc)) < 0)
+	if (utf8_width(ud, &width) != UTF8_DONE)
 		return (UTF8_ERROR);
 	ud->width = width;

 	return (UTF8_DONE);
 }

-/* Combine UTF-8 into Unicode. */
-enum utf8_state
-utf8_combine(const struct utf8_data *ud, wchar_t *wc)
-{
-	switch (mbtowc(wc, ud->data, ud->size)) {
-	case -1:
-		log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data,
-		    errno);
-		mbtowc(NULL, NULL, MB_CUR_MAX);
-		return (UTF8_ERROR);
-	case 0:
-		return (UTF8_ERROR);
-	default:
-		return (UTF8_DONE);
-	}
-}
-
-/* Split Unicode into UTF-8. */
-enum utf8_state
-utf8_split(wchar_t wc, struct utf8_data *ud)
-{
-	char	s[MB_LEN_MAX];
-	int	slen;
-
-	slen = wctomb(s, wc);
-	if (slen <= 0 || slen > (int)sizeof ud->data)
-		return (UTF8_ERROR);
-
-	memcpy(ud->data, s, slen);
-	ud->size = slen;
-
-	ud->width = utf8_width(wc);
-	return (UTF8_DONE);
-}
-
 /*
 * Encode len characters from src into dst, which is guaranteed to have four
 * bytes available for each character from src (for \abc or UTF-8) plus space