From d6daf37df4ccd7589d2d8f6911bf7270f12d1672 Mon Sep 17 00:00:00 2001
From: nicm <nicm>
Date: Thu, 12 Nov 2015 12:19:57 +0000
Subject: [PATCH 1/3] Tidy utf8.c a little: build table on first use, and make
 utf8_width take a u_int rather than splitting and then combining again in
 utf8_split.

---
 server.c |  1 -
 tmux.h   |  2 +-
 utf8.c   | 81 ++++++++++++++++++++------------------------------------
 3 files changed, 30 insertions(+), 54 deletions(-)
diff --git a/server.c b/server.c
index bc3fa51d..2808c0cc 100644
--- a/server.c
+++ b/server.c
@@ -186,7 +186,6 @@ server_start(struct event_base *base, int lockfd, char *lockfile)
 	TAILQ_INIT(&session_groups);
 	mode_key_init_trees();
 	key_bindings_init();
-	utf8_build();
 
 	start_time = time(NULL);
 
diff --git a/tmux.h b/tmux.h
index de19159d..0d2d773d 100644
--- a/tmux.h
+++ b/tmux.h
@@ -2181,7 +2181,7 @@ void		 session_group_synchronize1(struct session *, struct session *);
 void		 session_renumber_windows(struct session *);
 
 /* utf8.c */
-void		 utf8_build(void);
+u_int		 utf8_width(u_int);
 void		 utf8_set(struct utf8_data *, u_char);
 int		 utf8_open(struct utf8_data *, u_char);
 int		 utf8_append(struct utf8_data *, u_char);
diff --git a/utf8.c b/utf8.c
index cb20ea6d..0926f4bc 100644
--- a/utf8.c
+++ b/utf8.c
@@ -35,7 +35,7 @@ struct utf8_width_entry {
 };
 
 /* Sorted, then repeatedly split in the middle to balance the tree. */
-struct utf8_width_entry utf8_width_table[] = {
+static struct utf8_width_entry utf8_width_table[] = {
 	{ 0x00b41, 0x00b44, 0, NULL, NULL },
 	{ 0x008e4, 0x00902, 0, NULL, NULL },
 	{ 0x006d6, 0x006dd, 0, NULL, NULL },
@@ -344,12 +344,9 @@ struct utf8_width_entry utf8_width_table[] = {
 	{ 0xe0100, 0xe01ef, 0, NULL, NULL },
 	{ 0x100000, 0x10fffd, 0, NULL, NULL },
 };
+static struct utf8_width_entry	*utf8_width_root = NULL;
 
-struct utf8_width_entry	*utf8_width_root = NULL;
-
-int	utf8_overlap(struct utf8_width_entry *, struct utf8_width_entry *);
-u_int	utf8_combine(const struct utf8_data *);
-u_int	utf8_width(const struct utf8_data *);
+static void	utf8_build(void);
 
 /* Set a single character. */
 void
@@ -405,40 +402,20 @@ utf8_append(struct utf8_data *utf8data, u_char ch)
 	if (utf8data->have != utf8data->size)
 		return (1);
 
-	utf8data->width = utf8_width(utf8data);
-	return (0);
-}
-
-/* Check if two width tree entries overlap. */
-int
-utf8_overlap(struct utf8_width_entry *item1, struct utf8_width_entry *item2)
-{
-	if (item1->first >= item2->first && item1->first <= item2->last)
-		return (1);
-	if (item1->last >= item2->first && item1->last <= item2->last)
-		return (1);
-	if (item2->first >= item1->first && item2->first <= item1->last)
-		return (1);
-	if (item2->last >= item1->first && item2->last <= item1->last)
-		return (1);
+	utf8data->width = utf8_width(utf8_combine(utf8data));
 	return (0);
 }
 
 /* Build UTF-8 width tree. */
-void
+static void
 utf8_build(void)
 {
 	struct utf8_width_entry	**ptr, *item, *node;
-	u_int			  i, j;
+	u_int			  i;
 
 	for (i = 0; i < nitems(utf8_width_table); i++) {
 		item = &utf8_width_table[i];
 
-		for (j = 0; j < nitems(utf8_width_table); j++) {
-			if (i != j && utf8_overlap(item, &utf8_width_table[j]))
-				log_fatalx("utf8 overlap: %u %u", i, j);
-		}
-
 		ptr = &utf8_width_root;
 		while (*ptr != NULL) {
 			node = *ptr;
@@ -451,6 +428,27 @@ utf8_build(void)
 	}
 }
 
+/* Lookup width of UTF-8 data in tree. */
+u_int
+utf8_width(u_int uc)
+{
+	struct utf8_width_entry	*item;
+
+	if (utf8_width_root == NULL)
+		utf8_build();
+
+	item = utf8_width_root;
+	while (item != NULL) {
+		if (uc < item->first)
+			item = item->left;
+		else if (uc > item->last)
+			item = item->right;
+		else
+			return (item->width);
+	}
+	return (1);
+}
+
 /* Combine UTF-8 into 32-bit Unicode. */
 u_int
 utf8_combine(const struct utf8_data *utf8data)
@@ -481,7 +479,7 @@ utf8_combine(const struct utf8_data *utf8data)
 	return (value);
 }
 
-/* Split a UTF-8 character. */
+/* Split 32-bit Unicode into UTF-8. */
 int
 utf8_split(u_int uc, struct utf8_data *utf8data)
 {
@@ -505,7 +503,7 @@ utf8_split(u_int uc, struct utf8_data *utf8data)
 		utf8data->data[3] = 0x80 | (uc & 0x3f);
 	} else
 		return (-1);
-	utf8data->width = utf8_width(utf8data);
+	utf8data->width = utf8_width(uc);
 	return (0);
 }
 
@@ -522,27 +520,6 @@ utf8_split2(u_int uc, u_char *ptr)
 	return (1);
 }
 
-/* Lookup width of UTF-8 data in tree. */
-u_int
-utf8_width(const struct utf8_data *utf8data)
-{
-	struct utf8_width_entry	*item;
-	u_int			 value;
-
-	value = utf8_combine(utf8data);
-
-	item = utf8_width_root;
-	while (item != NULL) {
-		if (value < item->first)
-			item = item->left;
-		else if (value > item->last)
-			item = item->right;
-		else
-			return (item->width);
-	}
-	return (1);
-}
-
 /*
  * Encode len characters from src into dst, which is guaranteed to have four
  * bytes available for each character from src (for \abc or UTF-8) plus space

From 1da7475d0e2cbfb8b301fcad5cbcfb3ee4c087bb Mon Sep 17 00:00:00 2001
From: jmc <jmc>
Date: Thu, 12 Nov 2015 12:36:34 +0000
Subject: [PATCH 2/3] tweak previous; ok nicm

---
 tmux.1 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tmux.1 b/tmux.1
index d84265c6..e0646697 100644
--- a/tmux.1
+++ b/tmux.1
@@ -192,8 +192,8 @@ that UTF-8 is supported.
 .Pp
 Note that
 .Nm
-itself always accepts UTF-8, this controls whether it will send UTF-8
-characters to the terminal it is running it (if not, they are replaced by
+itself always accepts UTF-8; this controls whether it will send UTF-8
+characters to the terminal it is running (if not, they are replaced by
 .Ql _ ) .
 .It Fl v
 Request verbose logging.
@@ -2796,8 +2796,8 @@ As with
 .Ic status-left ,
 .Ar string
 will be passed to
-.Xr strftime 3 ,
-character pairs are replaced.
+.Xr strftime 3
+and character pairs are replaced.
 .It Ic status-right-length Ar length
 Set the maximum
 .Ar length

From a209ea3953ba16742f6f6bb19b76ffdb1200960e Mon Sep 17 00:00:00 2001
From: nicm <nicm>
Date: Thu, 12 Nov 2015 12:43:36 +0000
Subject: [PATCH 3/3] Add utf8_padcstr and use it to align columns in
 list-keys.

---
 cmd-list-keys.c | 27 +++++++++++++++++----------
 tmux.h          |  1 +
 utf8.c          | 21 +++++++++++++++++++++
 3 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/cmd-list-keys.c b/cmd-list-keys.c
index c76f9f47..3b6afa3e 100644
--- a/cmd-list-keys.c
+++ b/cmd-list-keys.c
@@ -18,6 +18,7 @@
 
 #include <sys/types.h>
 
+#include <stdlib.h>
 #include <string.h>
 
 #include "tmux.h"
@@ -54,10 +55,9 @@ cmd_list_keys_exec(struct cmd *self, struct cmd_q *cmdq)
 	struct key_table	*table;
 	struct key_binding	*bd;
 	const char		*key, *tablename, *r;
-	char			 tmp[BUFSIZ];
+	char			*cp, tmp[BUFSIZ];
 	size_t			 used;
 	int			 repeat, width, tablewidth, keywidth;
-	u_int			 i;
 
 	if (self->entry == &cmd_list_commands_entry)
 		return (cmd_list_keys_commands(self, cmdq));
@@ -82,7 +82,7 @@ cmd_list_keys_exec(struct cmd *self, struct cmd_q *cmdq)
 			if (bd->can_repeat)
 				repeat = 1;
 
-			width = strlen(table->name);
+			width = utf8_cstrwidth(table->name);
 			if (width > tablewidth)
 				tablewidth = width;
 			width = utf8_cstrwidth(key);
@@ -103,13 +103,20 @@ cmd_list_keys_exec(struct cmd *self, struct cmd_q *cmdq)
 				r = "-r ";
 			else
 				r = "   ";
-			used = xsnprintf(tmp, sizeof tmp, "%s-T %-*s %s", r,
-			    (int)tablewidth, table->name, key);
-			for (i = 0; i < keywidth - utf8_cstrwidth(key); i++) {
-				if (strlcat(tmp, " ", sizeof tmp) < sizeof tmp)
-					used++;
-			}
-			if (used < sizeof tmp) {
+			xsnprintf(tmp, sizeof tmp, "%s-T ", r);
+
+			cp = utf8_padcstr(table->name, tablewidth);
+			strlcat(tmp, cp, sizeof tmp);
+			strlcat(tmp, " ", sizeof tmp);
+			free(cp);
+
+			cp = utf8_padcstr(key, keywidth);
+			strlcat(tmp, cp, sizeof tmp);
+			strlcat(tmp, " ", sizeof tmp);
+			free(cp);
+
+			used = strlen(tmp);
+			if (used < (sizeof tmp) - 1) {
 				cmd_list_print(bd->cmdlist, tmp + used,
 				    (sizeof tmp) - used);
 			}
diff --git a/tmux.h b/tmux.h
index 0d2d773d..cb7ed2c3 100644
--- a/tmux.h
+++ b/tmux.h
@@ -2194,6 +2194,7 @@ struct utf8_data *utf8_fromcstr(const char *);
 char		*utf8_tocstr(struct utf8_data *);
 u_int		 utf8_cstrwidth(const char *);
 char		*utf8_trimcstr(const char *, u_int);
+char		*utf8_padcstr(const char *, u_int);
 
 /* procname.c */
 char   *get_proc_name(int, char *);
diff --git a/utf8.c b/utf8.c
index 0926f4bc..db738020 100644
--- a/utf8.c
+++ b/utf8.c
@@ -713,3 +713,24 @@ utf8_trimcstr(const char *s, u_int width)
 	free(tmp);
 	return (out);
 }
+
+/* Pad UTF-8 string to width. Caller frees. */
+char *
+utf8_padcstr(const char *s, u_int width)
+{
+	size_t	 slen;
+	char	*out;
+	u_int	  n, i;
+
+	n = utf8_cstrwidth(s);
+	if (n >= width)
+		return (xstrdup(s));
+
+	slen = strlen(s);
+	out = xmalloc(slen + 1 + (width - n));
+	memcpy(out, s, slen);
+	for (i = n; i < width; i++)
+		out[slen++] = ' ';
+	out[slen] = '\0';
+	return (out);
+}