From d6daf37df4ccd7589d2d8f6911bf7270f12d1672 Mon Sep 17 00:00:00 2001 From: nicm Date: Thu, 12 Nov 2015 12:19:57 +0000 Subject: [PATCH 1/3] Tidy utf8.c a little: build table on first use, and make utf8_width take a u_int rather than splitting and then combining again in utf8_split. --- server.c | 1 - tmux.h | 2 +- utf8.c | 81 ++++++++++++++++++++------------------------------------ 3 files changed, 30 insertions(+), 54 deletions(-) diff --git a/server.c b/server.c index bc3fa51d..2808c0cc 100644 --- a/server.c +++ b/server.c @@ -186,7 +186,6 @@ server_start(struct event_base *base, int lockfd, char *lockfile) TAILQ_INIT(&session_groups); mode_key_init_trees(); key_bindings_init(); - utf8_build(); start_time = time(NULL); diff --git a/tmux.h b/tmux.h index de19159d..0d2d773d 100644 --- a/tmux.h +++ b/tmux.h @@ -2181,7 +2181,7 @@ void session_group_synchronize1(struct session *, struct session *); void session_renumber_windows(struct session *); /* utf8.c */ -void utf8_build(void); +u_int utf8_width(u_int); void utf8_set(struct utf8_data *, u_char); int utf8_open(struct utf8_data *, u_char); int utf8_append(struct utf8_data *, u_char); diff --git a/utf8.c b/utf8.c index cb20ea6d..0926f4bc 100644 --- a/utf8.c +++ b/utf8.c @@ -35,7 +35,7 @@ struct utf8_width_entry { }; /* Sorted, then repeatedly split in the middle to balance the tree. */ -struct utf8_width_entry utf8_width_table[] = { +static struct utf8_width_entry utf8_width_table[] = { { 0x00b41, 0x00b44, 0, NULL, NULL }, { 0x008e4, 0x00902, 0, NULL, NULL }, { 0x006d6, 0x006dd, 0, NULL, NULL }, @@ -344,12 +344,9 @@ struct utf8_width_entry utf8_width_table[] = { { 0xe0100, 0xe01ef, 0, NULL, NULL }, { 0x100000, 0x10fffd, 0, NULL, NULL }, }; +static struct utf8_width_entry *utf8_width_root = NULL; -struct utf8_width_entry *utf8_width_root = NULL; - -int utf8_overlap(struct utf8_width_entry *, struct utf8_width_entry *); -u_int utf8_combine(const struct utf8_data *); -u_int utf8_width(const struct utf8_data *); +static void utf8_build(void); /* Set a single character. */ void @@ -405,40 +402,20 @@ utf8_append(struct utf8_data *utf8data, u_char ch) if (utf8data->have != utf8data->size) return (1); - utf8data->width = utf8_width(utf8data); - return (0); -} - -/* Check if two width tree entries overlap. */ -int -utf8_overlap(struct utf8_width_entry *item1, struct utf8_width_entry *item2) -{ - if (item1->first >= item2->first && item1->first <= item2->last) - return (1); - if (item1->last >= item2->first && item1->last <= item2->last) - return (1); - if (item2->first >= item1->first && item2->first <= item1->last) - return (1); - if (item2->last >= item1->first && item2->last <= item1->last) - return (1); + utf8data->width = utf8_width(utf8_combine(utf8data)); return (0); } /* Build UTF-8 width tree. */ -void +static void utf8_build(void) { struct utf8_width_entry **ptr, *item, *node; - u_int i, j; + u_int i; for (i = 0; i < nitems(utf8_width_table); i++) { item = &utf8_width_table[i]; - for (j = 0; j < nitems(utf8_width_table); j++) { - if (i != j && utf8_overlap(item, &utf8_width_table[j])) - log_fatalx("utf8 overlap: %u %u", i, j); - } - ptr = &utf8_width_root; while (*ptr != NULL) { node = *ptr; @@ -451,6 +428,27 @@ utf8_build(void) } } +/* Lookup width of UTF-8 data in tree. */ +u_int +utf8_width(u_int uc) +{ + struct utf8_width_entry *item; + + if (utf8_width_root == NULL) + utf8_build(); + + item = utf8_width_root; + while (item != NULL) { + if (uc < item->first) + item = item->left; + else if (uc > item->last) + item = item->right; + else + return (item->width); + } + return (1); +} + /* Combine UTF-8 into 32-bit Unicode. */ u_int utf8_combine(const struct utf8_data *utf8data) @@ -481,7 +479,7 @@ utf8_combine(const struct utf8_data *utf8data) return (value); } -/* Split a UTF-8 character. */ +/* Split 32-bit Unicode into UTF-8. */ int utf8_split(u_int uc, struct utf8_data *utf8data) { @@ -505,7 +503,7 @@ utf8_split(u_int uc, struct utf8_data *utf8data) utf8data->data[3] = 0x80 | (uc & 0x3f); } else return (-1); - utf8data->width = utf8_width(utf8data); + utf8data->width = utf8_width(uc); return (0); } @@ -522,27 +520,6 @@ utf8_split2(u_int uc, u_char *ptr) return (1); } -/* Lookup width of UTF-8 data in tree. */ -u_int -utf8_width(const struct utf8_data *utf8data) -{ - struct utf8_width_entry *item; - u_int value; - - value = utf8_combine(utf8data); - - item = utf8_width_root; - while (item != NULL) { - if (value < item->first) - item = item->left; - else if (value > item->last) - item = item->right; - else - return (item->width); - } - return (1); -} - /* * Encode len characters from src into dst, which is guaranteed to have four * bytes available for each character from src (for \abc or UTF-8) plus space From 1da7475d0e2cbfb8b301fcad5cbcfb3ee4c087bb Mon Sep 17 00:00:00 2001 From: jmc Date: Thu, 12 Nov 2015 12:36:34 +0000 Subject: [PATCH 2/3] tweak previous; ok nicm --- tmux.1 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tmux.1 b/tmux.1 index d84265c6..e0646697 100644 --- a/tmux.1 +++ b/tmux.1 @@ -192,8 +192,8 @@ that UTF-8 is supported. .Pp Note that .Nm -itself always accepts UTF-8, this controls whether it will send UTF-8 -characters to the terminal it is running it (if not, they are replaced by +itself always accepts UTF-8; this controls whether it will send UTF-8 +characters to the terminal it is running (if not, they are replaced by .Ql _ ) . .It Fl v Request verbose logging. @@ -2796,8 +2796,8 @@ As with .Ic status-left , .Ar string will be passed to -.Xr strftime 3 , -character pairs are replaced. +.Xr strftime 3 +and character pairs are replaced. .It Ic status-right-length Ar length Set the maximum .Ar length From a209ea3953ba16742f6f6bb19b76ffdb1200960e Mon Sep 17 00:00:00 2001 From: nicm Date: Thu, 12 Nov 2015 12:43:36 +0000 Subject: [PATCH 3/3] Add utf8_padcstr and use it to align columns in list-keys. --- cmd-list-keys.c | 27 +++++++++++++++++---------- tmux.h | 1 + utf8.c | 21 +++++++++++++++++++++ 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/cmd-list-keys.c b/cmd-list-keys.c index c76f9f47..3b6afa3e 100644 --- a/cmd-list-keys.c +++ b/cmd-list-keys.c @@ -18,6 +18,7 @@ #include +#include #include #include "tmux.h" @@ -54,10 +55,9 @@ cmd_list_keys_exec(struct cmd *self, struct cmd_q *cmdq) struct key_table *table; struct key_binding *bd; const char *key, *tablename, *r; - char tmp[BUFSIZ]; + char *cp, tmp[BUFSIZ]; size_t used; int repeat, width, tablewidth, keywidth; - u_int i; if (self->entry == &cmd_list_commands_entry) return (cmd_list_keys_commands(self, cmdq)); @@ -82,7 +82,7 @@ cmd_list_keys_exec(struct cmd *self, struct cmd_q *cmdq) if (bd->can_repeat) repeat = 1; - width = strlen(table->name); + width = utf8_cstrwidth(table->name); if (width > tablewidth) tablewidth = width; width = utf8_cstrwidth(key); @@ -103,13 +103,20 @@ cmd_list_keys_exec(struct cmd *self, struct cmd_q *cmdq) r = "-r "; else r = " "; - used = xsnprintf(tmp, sizeof tmp, "%s-T %-*s %s", r, - (int)tablewidth, table->name, key); - for (i = 0; i < keywidth - utf8_cstrwidth(key); i++) { - if (strlcat(tmp, " ", sizeof tmp) < sizeof tmp) - used++; - } - if (used < sizeof tmp) { + xsnprintf(tmp, sizeof tmp, "%s-T ", r); + + cp = utf8_padcstr(table->name, tablewidth); + strlcat(tmp, cp, sizeof tmp); + strlcat(tmp, " ", sizeof tmp); + free(cp); + + cp = utf8_padcstr(key, keywidth); + strlcat(tmp, cp, sizeof tmp); + strlcat(tmp, " ", sizeof tmp); + free(cp); + + used = strlen(tmp); + if (used < (sizeof tmp) - 1) { cmd_list_print(bd->cmdlist, tmp + used, (sizeof tmp) - used); } diff --git a/tmux.h b/tmux.h index 0d2d773d..cb7ed2c3 100644 --- a/tmux.h +++ b/tmux.h @@ -2194,6 +2194,7 @@ struct utf8_data *utf8_fromcstr(const char *); char *utf8_tocstr(struct utf8_data *); u_int utf8_cstrwidth(const char *); char *utf8_trimcstr(const char *, u_int); +char *utf8_padcstr(const char *, u_int); /* procname.c */ char *get_proc_name(int, char *); diff --git a/utf8.c b/utf8.c index 0926f4bc..db738020 100644 --- a/utf8.c +++ b/utf8.c @@ -713,3 +713,24 @@ utf8_trimcstr(const char *s, u_int width) free(tmp); return (out); } + +/* Pad UTF-8 string to width. Caller frees. */ +char * +utf8_padcstr(const char *s, u_int width) +{ + size_t slen; + char *out; + u_int n, i; + + n = utf8_cstrwidth(s); + if (n >= width) + return (xstrdup(s)); + + slen = strlen(s); + out = xmalloc(slen + 1 + (width - n)); + memcpy(out, s, slen); + for (i = n; i < width; i++) + out[slen++] = ' '; + out[slen] = '\0'; + return (out); +}