Add some UTF-8 utility functions and use them to prevent the width limit

on formats from splitting UTF-8 characters improperly.
This commit is contained in:
nicm 2014-04-17 15:37:55 +00:00
parent a5d4b7f3d9
commit 806520f025
3 changed files with 139 additions and 12 deletions

View File

@ -194,10 +194,10 @@ int
format_replace(struct format_tree *ft, const char *key, size_t keylen, format_replace(struct format_tree *ft, const char *key, size_t keylen,
char **buf, size_t *len, size_t *off) char **buf, size_t *len, size_t *off)
{ {
char *copy, *copy0, *endptr, *ptr, *saved; char *copy, *copy0, *endptr, *ptr, *saved, *trimmed;
const char *value; const char *value;
size_t valuelen; size_t valuelen;
u_long limit = ULONG_MAX; u_long limit = 0;
/* Make a copy of the key. */ /* Make a copy of the key. */
copy0 = copy = xmalloc(keylen + 1); copy0 = copy = xmalloc(keylen + 1);
@ -256,11 +256,14 @@ format_replace(struct format_tree *ft, const char *key, size_t keylen,
value = ""; value = "";
saved = NULL; saved = NULL;
} }
valuelen = strlen(value);
/* Truncate the value if needed. */ /* Truncate the value if needed. */
if (valuelen > limit) if (limit != 0) {
valuelen = limit; value = trimmed = utf8_trimcstr(value, limit);
free(saved);
saved = trimmed;
}
valuelen = strlen(value);
/* Expand the buffer and copy in the value. */ /* Expand the buffer and copy in the value. */
while (*len - *off < valuelen + 1) { while (*len - *off < valuelen + 1) {

19
tmux.h
View File

@ -2306,7 +2306,7 @@ struct winlink *session_new(struct session *, const char *, const char *,
struct winlink *session_attach( struct winlink *session_attach(
struct session *, struct window *, int, char **); struct session *, struct window *, int, char **);
int session_detach(struct session *, struct winlink *); int session_detach(struct session *, struct winlink *);
struct winlink* session_has(struct session *, struct window *); struct winlink *session_has(struct session *, struct window *);
int session_next(struct session *, int); int session_next(struct session *, int);
int session_previous(struct session *, int); int session_previous(struct session *, int);
int session_select(struct session *, int); int session_select(struct session *, int);
@ -2322,12 +2322,17 @@ void session_group_synchronize1(struct session *, struct session *);
void session_renumber_windows(struct session *); void session_renumber_windows(struct session *);
/* utf8.c */ /* utf8.c */
void utf8_build(void); void utf8_build(void);
int utf8_open(struct utf8_data *, u_char); void utf8_set(struct utf8_data *, u_char);
int utf8_append(struct utf8_data *, u_char); int utf8_open(struct utf8_data *, u_char);
u_int utf8_combine(const struct utf8_data *); int utf8_append(struct utf8_data *, u_char);
u_int utf8_split2(u_int, u_char *); u_int utf8_combine(const struct utf8_data *);
int utf8_strvis(char *, const char *, size_t, int); u_int utf8_split2(u_int, u_char *);
int utf8_strvis(char *, const char *, size_t, int);
struct utf8_data *utf8_fromcstr(const char *);
char *utf8_tocstr(struct utf8_data *);
u_int utf8_cstrwidth(const char *);
char *utf8_trimcstr(const char *, u_int);
/* procname.c */ /* procname.c */
char *get_proc_name(int, char *); char *get_proc_name(int, char *);

119
utf8.c
View File

@ -18,6 +18,7 @@
#include <sys/types.h> #include <sys/types.h>
#include <stdlib.h>
#include <string.h> #include <string.h>
#include <vis.h> #include <vis.h>
@ -199,6 +200,16 @@ int utf8_overlap(struct utf8_width_entry *, struct utf8_width_entry *);
u_int utf8_combine(const struct utf8_data *); u_int utf8_combine(const struct utf8_data *);
u_int utf8_width(const struct utf8_data *); u_int utf8_width(const struct utf8_data *);
/* Set a single character. */
void
utf8_set(struct utf8_data *utf8data, u_char ch)
{
*utf8data->data = ch;
utf8data->size = 1;
utf8data->width = 1;
}
/* /*
* Open UTF-8 sequence. * Open UTF-8 sequence.
* *
@ -392,3 +403,111 @@ utf8_strvis(char *dst, const char *src, size_t len, int flag)
*dst = '\0'; *dst = '\0';
return (dst - start); return (dst - start);
} }
/*
* Convert a string into a buffer of UTF-8 characters. Terminated by size == 0.
* Caller frees.
*/
struct utf8_data *
utf8_fromcstr(const char *src)
{
struct utf8_data *dst;
size_t n;
int more;
dst = NULL;
n = 0;
while (*src != '\0') {
dst = xrealloc(dst, n + 1, sizeof *dst);
if (utf8_open(&dst[n], *src)) {
more = 1;
while (*++src != '\0' && more)
more = utf8_append(&dst[n], *src);
if (!more) {
n++;
continue;
}
src -= dst[n].have;
}
utf8_set(&dst[n], *src);
src++;
n++;
}
dst = xrealloc(dst, n + 1, sizeof *dst);
dst[n].size = 0;
return (dst);
}
/* Convert from a buffer of UTF-8 characters into a string. Caller frees. */
char *
utf8_tocstr(struct utf8_data *src)
{
char *dst;
size_t n;
dst = NULL;
n = 0;
for(; src->size != 0; src++) {
dst = xrealloc(dst, n + src->size, 1);
memcpy(dst + n, src->data, src->size);
n += src->size;
}
dst = xrealloc(dst, n + 1, 1);
dst[n] = '\0';
return (dst);
}
/* Get width of UTF-8 string. */
u_int
utf8_cstrwidth(const char *s)
{
struct utf8_data tmp;
u_int width;
int more;
width = 0;
while (*s != '\0') {
if (utf8_open(&tmp, *s)) {
more = 1;
while (*++s != '\0' && more)
more = utf8_append(&tmp, *s);
if (!more) {
width += tmp.width;
continue;
}
s -= tmp.have;
}
width++;
s++;
}
return (width);
}
/* Trim UTF-8 string to width. Caller frees. */
char *
utf8_trimcstr(const char *s, u_int width)
{
struct utf8_data *tmp, *next;
char *out;
u_int at;
tmp = utf8_fromcstr(s);
at = 0;
for (next = tmp; next->size != 0; next++) {
if (at + next->width > width) {
next->size = 0;
break;
}
at += next->width;
}
out = utf8_tocstr(tmp);
free(tmp);
return (out);
}