From 806520f025dccdcbf266bb9c7cbd984bef00d733 Mon Sep 17 00:00:00 2001 From: nicm Date: Thu, 17 Apr 2014 15:37:55 +0000 Subject: [PATCH] Add some UTF-8 utility functions and use them to prevent the width limit on formats from splitting UTF-8 characters improperly. --- format.c | 13 +++--- tmux.h | 19 +++++---- utf8.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+), 12 deletions(-) diff --git a/format.c b/format.c index f462a2a5..7c8ce779 100644 --- a/format.c +++ b/format.c @@ -194,10 +194,10 @@ int format_replace(struct format_tree *ft, const char *key, size_t keylen, char **buf, size_t *len, size_t *off) { - char *copy, *copy0, *endptr, *ptr, *saved; + char *copy, *copy0, *endptr, *ptr, *saved, *trimmed; const char *value; size_t valuelen; - u_long limit = ULONG_MAX; + u_long limit = 0; /* Make a copy of the key. */ copy0 = copy = xmalloc(keylen + 1); @@ -256,11 +256,14 @@ format_replace(struct format_tree *ft, const char *key, size_t keylen, value = ""; saved = NULL; } - valuelen = strlen(value); /* Truncate the value if needed. */ - if (valuelen > limit) - valuelen = limit; + if (limit != 0) { + value = trimmed = utf8_trimcstr(value, limit); + free(saved); + saved = trimmed; + } + valuelen = strlen(value); /* Expand the buffer and copy in the value. */ while (*len - *off < valuelen + 1) { diff --git a/tmux.h b/tmux.h index 6787783e..ed0f2e44 100644 --- a/tmux.h +++ b/tmux.h @@ -2306,7 +2306,7 @@ struct winlink *session_new(struct session *, const char *, const char *, struct winlink *session_attach( struct session *, struct window *, int, char **); int session_detach(struct session *, struct winlink *); -struct winlink* session_has(struct session *, struct window *); +struct winlink *session_has(struct session *, struct window *); int session_next(struct session *, int); int session_previous(struct session *, int); int session_select(struct session *, int); @@ -2322,12 +2322,17 @@ void session_group_synchronize1(struct session *, struct session *); void session_renumber_windows(struct session *); /* utf8.c */ -void utf8_build(void); -int utf8_open(struct utf8_data *, u_char); -int utf8_append(struct utf8_data *, u_char); -u_int utf8_combine(const struct utf8_data *); -u_int utf8_split2(u_int, u_char *); -int utf8_strvis(char *, const char *, size_t, int); +void utf8_build(void); +void utf8_set(struct utf8_data *, u_char); +int utf8_open(struct utf8_data *, u_char); +int utf8_append(struct utf8_data *, u_char); +u_int utf8_combine(const struct utf8_data *); +u_int utf8_split2(u_int, u_char *); +int utf8_strvis(char *, const char *, size_t, int); +struct utf8_data *utf8_fromcstr(const char *); +char *utf8_tocstr(struct utf8_data *); +u_int utf8_cstrwidth(const char *); +char *utf8_trimcstr(const char *, u_int); /* procname.c */ char *get_proc_name(int, char *); diff --git a/utf8.c b/utf8.c index 945715dd..78ed1675 100644 --- a/utf8.c +++ b/utf8.c @@ -18,6 +18,7 @@ #include +#include #include #include @@ -199,6 +200,16 @@ int utf8_overlap(struct utf8_width_entry *, struct utf8_width_entry *); u_int utf8_combine(const struct utf8_data *); u_int utf8_width(const struct utf8_data *); +/* Set a single character. */ +void +utf8_set(struct utf8_data *utf8data, u_char ch) +{ + *utf8data->data = ch; + utf8data->size = 1; + + utf8data->width = 1; +} + /* * Open UTF-8 sequence. * @@ -392,3 +403,111 @@ utf8_strvis(char *dst, const char *src, size_t len, int flag) *dst = '\0'; return (dst - start); } + +/* + * Convert a string into a buffer of UTF-8 characters. Terminated by size == 0. + * Caller frees. + */ +struct utf8_data * +utf8_fromcstr(const char *src) +{ + struct utf8_data *dst; + size_t n; + int more; + + dst = NULL; + + n = 0; + while (*src != '\0') { + dst = xrealloc(dst, n + 1, sizeof *dst); + if (utf8_open(&dst[n], *src)) { + more = 1; + while (*++src != '\0' && more) + more = utf8_append(&dst[n], *src); + if (!more) { + n++; + continue; + } + src -= dst[n].have; + } + utf8_set(&dst[n], *src); + src++; + + n++; + } + + dst = xrealloc(dst, n + 1, sizeof *dst); + dst[n].size = 0; + return (dst); +} + +/* Convert from a buffer of UTF-8 characters into a string. Caller frees. */ +char * +utf8_tocstr(struct utf8_data *src) +{ + char *dst; + size_t n; + + dst = NULL; + + n = 0; + for(; src->size != 0; src++) { + dst = xrealloc(dst, n + src->size, 1); + memcpy(dst + n, src->data, src->size); + n += src->size; + } + + dst = xrealloc(dst, n + 1, 1); + dst[n] = '\0'; + return (dst); +} + +/* Get width of UTF-8 string. */ +u_int +utf8_cstrwidth(const char *s) +{ + struct utf8_data tmp; + u_int width; + int more; + + width = 0; + while (*s != '\0') { + if (utf8_open(&tmp, *s)) { + more = 1; + while (*++s != '\0' && more) + more = utf8_append(&tmp, *s); + if (!more) { + width += tmp.width; + continue; + } + s -= tmp.have; + } + width++; + s++; + } + return (width); +} + +/* Trim UTF-8 string to width. Caller frees. */ +char * +utf8_trimcstr(const char *s, u_int width) +{ + struct utf8_data *tmp, *next; + char *out; + u_int at; + + tmp = utf8_fromcstr(s); + + at = 0; + for (next = tmp; next->size != 0; next++) { + if (at + next->width > width) { + next->size = 0; + break; + } + at += next->width; + } + + out = utf8_tocstr(tmp); + free(tmp); + return (out); +}