Add some UTF-8 utility functions and use them to prevent the width limit

on formats from splitting UTF-8 characters improperly.
2025-12-24 17:56:02 +00:00 · 2014-04-17 15:37:55 +00:00
parent a5d4b7f3d9
commit 806520f025
3 changed files with 139 additions and 12 deletions
--- a/format.c
+++ b/format.c
@@ -194,10 +194,10 @@ int
 format_replace(struct format_tree *ft, const char *key, size_t keylen,
    char **buf, size_t *len, size_t *off)
 {
-	char		*copy, *copy0, *endptr, *ptr, *saved;
+	char		*copy, *copy0, *endptr, *ptr, *saved, *trimmed;
 	const char	*value;
 	size_t		 valuelen;
-	u_long		 limit = ULONG_MAX;
+	u_long		 limit = 0;
 	/* Make a copy of the key. */
 	copy0 = copy = xmalloc(keylen + 1);
@@ -256,11 +256,14 @@ format_replace(struct format_tree *ft, const char *key, size_t keylen,
 			value = "";
 		saved = NULL;
 	}
 	valuelen = strlen(value);
 	/* Truncate the value if needed. */
-	if (valuelen > limit)
+	if (limit != 0) {
-		valuelen = limit;
+		value = trimmed = utf8_trimcstr(value, limit);
 		free(saved);
 		saved = trimmed;
 	}
 	valuelen = strlen(value);
 	/* Expand the buffer and copy in the value. */
 	while (*len - *off < valuelen + 1) {
--- a/tmux.h
+++ b/tmux.h
@@ -2306,7 +2306,7 @@ struct winlink	*session_new(struct session *, const char *, const char *,
 struct winlink	*session_attach(
 		     struct session *, struct window *, int, char **);
 int		 session_detach(struct session *, struct winlink *);
-struct winlink*	 session_has(struct session *, struct window *);
+struct winlink	*session_has(struct session *, struct window *);
 int		 session_next(struct session *, int);
 int		 session_previous(struct session *, int);
 int		 session_select(struct session *, int);
@@ -2322,12 +2322,17 @@ void		 session_group_synchronize1(struct session *, struct session *);
 void		 session_renumber_windows(struct session *);
 /* utf8.c */
-void	utf8_build(void);
+void		 utf8_build(void);
-int	utf8_open(struct utf8_data *, u_char);
+void		 utf8_set(struct utf8_data *, u_char);
-int	utf8_append(struct utf8_data *, u_char);
+int		 utf8_open(struct utf8_data *, u_char);
-u_int	utf8_combine(const struct utf8_data *);
+int		 utf8_append(struct utf8_data *, u_char);
-u_int	utf8_split2(u_int, u_char *);
+u_int		 utf8_combine(const struct utf8_data *);
-int	utf8_strvis(char *, const char *, size_t, int);
+u_int		 utf8_split2(u_int, u_char *);
 int		 utf8_strvis(char *, const char *, size_t, int);
 struct utf8_data *utf8_fromcstr(const char *);
 char		*utf8_tocstr(struct utf8_data *);
 u_int		 utf8_cstrwidth(const char *);
 char		*utf8_trimcstr(const char *, u_int);
 /* procname.c */
 char   *get_proc_name(int, char *);
--- a/utf8.c
+++ b/utf8.c
@@ -18,6 +18,7 @@
 #include <sys/types.h>
 #include <stdlib.h>
 #include <string.h>
 #include <vis.h>
@@ -199,6 +200,16 @@ int	utf8_overlap(struct utf8_width_entry *, struct utf8_width_entry *);
 u_int	utf8_combine(const struct utf8_data *);
 u_int	utf8_width(const struct utf8_data *);
 /* Set a single character. */
 void
 utf8_set(struct utf8_data *utf8data, u_char ch)
 {
 	*utf8data->data = ch;
 	utf8data->size = 1;
 	utf8data->width = 1;
 }
 /*
 * Open UTF-8 sequence.
 *
@@ -392,3 +403,111 @@ utf8_strvis(char *dst, const char *src, size_t len, int flag)
 	*dst = '\0';
 	return (dst - start);
 }
 /*
 * Convert a string into a buffer of UTF-8 characters. Terminated by size == 0.
 * Caller frees.
 */
 struct utf8_data *
 utf8_fromcstr(const char *src)
 {
 	struct utf8_data	*dst;
 	size_t			 n;
 	int			 more;
 	dst = NULL;
 	n = 0;
 	while (*src != '\0') {
 		dst = xrealloc(dst, n + 1, sizeof *dst);
 		if (utf8_open(&dst[n], *src)) {
 			more = 1;
 			while (*++src != '\0' && more)
 				more = utf8_append(&dst[n], *src);
 			if (!more) {
 				n++;
 				continue;
 			}
 			src -= dst[n].have;
 		}
 		utf8_set(&dst[n], *src);
 		src++;
 		n++;
 	}
 	dst = xrealloc(dst, n + 1, sizeof *dst);
 	dst[n].size = 0;
 	return (dst);
 }
 /* Convert from a buffer of UTF-8 characters into a string. Caller frees. */
 char *
 utf8_tocstr(struct utf8_data *src)
 {
 	char	*dst;
 	size_t	 n;
 	dst = NULL;
 	n = 0;
 	for(; src->size != 0; src++) {
 		dst = xrealloc(dst, n + src->size, 1);
 		memcpy(dst + n, src->data, src->size);
 		n += src->size;
 	}
 	dst = xrealloc(dst, n + 1, 1);
 	dst[n] = '\0';
 	return (dst);
 }
 /* Get width of UTF-8 string. */
 u_int
 utf8_cstrwidth(const char *s)
 {
 	struct utf8_data	tmp;
 	u_int			width;
 	int			more;
 	width = 0;
 	while (*s != '\0') {
 		if (utf8_open(&tmp, *s)) {
 			more = 1;
 			while (*++s != '\0' && more)
 				more = utf8_append(&tmp, *s);
 			if (!more) {
 				width += tmp.width;
 				continue;
 			}
 			s -= tmp.have;
 		}
 		width++;
 		s++;
 	}
 	return (width);
 }
 /* Trim UTF-8 string to width. Caller frees. */
 char *
 utf8_trimcstr(const char *s, u_int width)
 {
 	struct utf8_data	*tmp, *next;
 	char			*out;
 	u_int			 at;
 	tmp = utf8_fromcstr(s);
 	at = 0;
 	for (next = tmp; next->size != 0; next++) {
 		if (at + next->width > width) {
 			next->size = 0;
 			break;
 		}
 		at += next->width;
 	}
 	out = utf8_tocstr(tmp);
 	free(tmp);
 	return (out);
 }