Handle wcwidth() and mbtowc() failures in better style and drop

characters where we can't find the width (wcwidth() fails) on input, the
same as we drop invalid UTF-8. Suggested by schwarze@.
This commit is contained in:
nicm 2016-03-02 15:36:02 +00:00
parent d980d965dd
commit b8a102d26f
5 changed files with 48 additions and 21 deletions

10
input.c
View File

@ -1960,8 +1960,14 @@ input_utf8_close(struct input_ctx *ictx)
{ {
struct utf8_data *ud = &ictx->utf8data; struct utf8_data *ud = &ictx->utf8data;
if (utf8_append(ud, ictx->ch) != UTF8_DONE) if (utf8_append(ud, ictx->ch) != UTF8_DONE) {
fatalx("UTF-8 close invalid %#x", ictx->ch); /*
* An error here could be invalid UTF-8 or it could be a
* nonprintable character for which we can't get the
* width. Drop it.
*/
return (0);
}
log_debug("%s %hhu '%*s' (width %hhu)", __func__, ud->size, log_debug("%s %hhu '%*s' (width %hhu)", __func__, ud->size,
(int)ud->size, ud->data, ud->width); (int)ud->size, ud->data, ud->width);

View File

@ -149,6 +149,7 @@ key_string_lookup_string(const char *string)
struct utf8_data ud; struct utf8_data ud;
u_int i; u_int i;
enum utf8_state more; enum utf8_state more;
wchar_t wc;
/* Is this no key? */ /* Is this no key? */
if (strcasecmp(string, "None") == 0) if (strcasecmp(string, "None") == 0)
@ -185,8 +186,9 @@ key_string_lookup_string(const char *string)
more = utf8_append(&ud, (u_char)string[i]); more = utf8_append(&ud, (u_char)string[i]);
if (more != UTF8_DONE) if (more != UTF8_DONE)
return (KEYC_UNKNOWN); return (KEYC_UNKNOWN);
key = utf8_combine(&ud); if (utf8_combine(&ud, &wc) != UTF8_DONE)
return (key | modifiers); return (KEYC_UNKNOWN);
return (wc | modifiers);
} }
/* Otherwise look the key up in the table. */ /* Otherwise look the key up in the table. */

3
tmux.h
View File

@ -2316,8 +2316,7 @@ void utf8_set(struct utf8_data *, u_char);
void utf8_copy(struct utf8_data *, const struct utf8_data *); void utf8_copy(struct utf8_data *, const struct utf8_data *);
enum utf8_state utf8_open(struct utf8_data *, u_char); enum utf8_state utf8_open(struct utf8_data *, u_char);
enum utf8_state utf8_append(struct utf8_data *, u_char); enum utf8_state utf8_append(struct utf8_data *, u_char);
u_int utf8_width(wchar_t); enum utf8_state utf8_combine(const struct utf8_data *, wchar_t *);
wchar_t utf8_combine(const struct utf8_data *);
enum utf8_state utf8_split(wchar_t, struct utf8_data *); enum utf8_state utf8_split(wchar_t, struct utf8_data *);
int utf8_strvis(char *, const char *, size_t, int); int utf8_strvis(char *, const char *, size_t, int);
char *utf8_sanitize(const char *); char *utf8_sanitize(const char *);

View File

@ -477,6 +477,7 @@ tty_keys_next(struct tty *tty)
struct utf8_data ud; struct utf8_data ud;
enum utf8_state more; enum utf8_state more;
u_int i; u_int i;
wchar_t wc;
/* Get key buffer. */ /* Get key buffer. */
buf = EVBUFFER_DATA(tty->event->input); buf = EVBUFFER_DATA(tty->event->input);
@ -552,7 +553,11 @@ first_key:
more = utf8_append(&ud, (u_char)buf[i]); more = utf8_append(&ud, (u_char)buf[i]);
if (more != UTF8_DONE) if (more != UTF8_DONE)
goto discard_key; goto discard_key;
key = utf8_combine(&ud);
if (utf8_combine(&ud, &wc) != UTF8_DONE)
goto discard_key;
key = wc;
log_debug("UTF-8 key %.*s %#llx", (int)size, buf, key); log_debug("UTF-8 key %.*s %#llx", (int)size, buf, key);
goto complete_key; goto complete_key;
} }

43
utf8.c
View File

@ -25,6 +25,8 @@
#include "tmux.h" #include "tmux.h"
static int utf8_width(wchar_t);
/* Set a single character. */ /* Set a single character. */
void void
utf8_set(struct utf8_data *ud, u_char ch) utf8_set(struct utf8_data *ud, u_char ch)
@ -80,6 +82,9 @@ utf8_open(struct utf8_data *ud, u_char ch)
enum utf8_state enum utf8_state
utf8_append(struct utf8_data *ud, u_char ch) utf8_append(struct utf8_data *ud, u_char ch)
{ {
wchar_t wc;
int width;
if (ud->have >= ud->size) if (ud->have >= ud->size)
fatalx("UTF-8 character overflow"); fatalx("UTF-8 character overflow");
if (ud->size > sizeof ud->data) if (ud->size > sizeof ud->data)
@ -94,39 +99,49 @@ utf8_append(struct utf8_data *ud, u_char ch)
if (ud->width == 0xff) if (ud->width == 0xff)
return (UTF8_ERROR); return (UTF8_ERROR);
ud->width = utf8_width(utf8_combine(ud));
if (utf8_combine(ud, &wc) != UTF8_DONE)
return (UTF8_ERROR);
if ((width = utf8_width(wc)) < 0)
return (UTF8_ERROR);
ud->width = width;
return (UTF8_DONE); return (UTF8_DONE);
} }
/* Get width of Unicode character. */ /* Get width of Unicode character. */
u_int static int
utf8_width(wchar_t wc) utf8_width(wchar_t wc)
{ {
int width; int width;
width = wcwidth(wc); width = wcwidth(wc);
if (width < 0) if (width < 0 || width > 0xff)
return (0); return (-1);
return (width); return (width);
} }
/* Combine UTF-8 into Unicode. */ /* Combine UTF-8 into Unicode. */
wchar_t enum utf8_state
utf8_combine(const struct utf8_data *ud) utf8_combine(const struct utf8_data *ud, wchar_t *wc)
{ {
wchar_t wc; switch (mbtowc(wc, ud->data, ud->size)) {
case -1:
if (mbtowc(&wc, ud->data, ud->size) <= 0) mbtowc(NULL, NULL, MB_CUR_MAX);
return (0xfffd); return (UTF8_ERROR);
return (wc); case 0:
return (UTF8_ERROR);
default:
return (UTF8_DONE);
}
} }
/* Split Unicode into UTF-8. */ /* Split Unicode into UTF-8. */
enum utf8_state enum utf8_state
utf8_split(wchar_t wc, struct utf8_data *ud) utf8_split(wchar_t wc, struct utf8_data *ud)
{ {
char s[MB_CUR_MAX]; char s[MB_LEN_MAX];
int slen; int slen;
slen = wctomb(s, wc); slen = wctomb(s, wc);
if (slen <= 0 || slen > (int)sizeof ud->data) if (slen <= 0 || slen > (int)sizeof ud->data)