mirror of
				https://github.com/tmux/tmux.git
				synced 2025-11-04 09:26:05 +00:00 
			
		
		
		
	Be more strict about invalid UTF-8.
This commit is contained in:
		
							
								
								
									
										13
									
								
								input.c
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								input.c
									
									
									
									
									
								
							@@ -446,11 +446,11 @@ const struct input_transition input_state_ground_table[] = {
 | 
			
		||||
	{ 0x1c, 0x1f, input_c0_dispatch, NULL },
 | 
			
		||||
	{ 0x20, 0x7e, input_print,	 NULL },
 | 
			
		||||
	{ 0x7f, 0x7f, NULL,		 NULL },
 | 
			
		||||
	{ 0x80, 0xc1, input_print,	 NULL },
 | 
			
		||||
	{ 0x80, 0xc1, NULL,		 NULL },
 | 
			
		||||
	{ 0xc2, 0xdf, input_utf8_open,	 &input_state_utf8_one },
 | 
			
		||||
	{ 0xe0, 0xef, input_utf8_open,	 &input_state_utf8_two },
 | 
			
		||||
	{ 0xf0, 0xf4, input_utf8_open,	 &input_state_utf8_three },
 | 
			
		||||
	{ 0xf5, 0xff, input_print,	 NULL },
 | 
			
		||||
	{ 0xf5, 0xff, NULL,		 NULL },
 | 
			
		||||
 | 
			
		||||
	{ -1, -1, NULL, NULL }
 | 
			
		||||
};
 | 
			
		||||
@@ -1923,7 +1923,8 @@ input_utf8_open(struct input_ctx *ictx)
 | 
			
		||||
{
 | 
			
		||||
	struct utf8_data	*ud = &ictx->utf8data;
 | 
			
		||||
 | 
			
		||||
	utf8_open(ud, ictx->ch);
 | 
			
		||||
	if (!utf8_open(ud, ictx->ch))
 | 
			
		||||
		log_fatalx("UTF-8 open invalid %#hhx", ictx->ch);
 | 
			
		||||
 | 
			
		||||
	log_debug("%s %hhu", __func__, ud->size);
 | 
			
		||||
 | 
			
		||||
@@ -1936,7 +1937,8 @@ input_utf8_add(struct input_ctx *ictx)
 | 
			
		||||
{
 | 
			
		||||
	struct utf8_data	*ud = &ictx->utf8data;
 | 
			
		||||
 | 
			
		||||
	utf8_append(ud, ictx->ch);
 | 
			
		||||
	if (utf8_append(ud, ictx->ch) != 1)
 | 
			
		||||
		log_fatalx("UTF-8 add invalid %#hhx", ictx->ch);
 | 
			
		||||
 | 
			
		||||
	log_debug("%s", __func__);
 | 
			
		||||
 | 
			
		||||
@@ -1949,7 +1951,8 @@ input_utf8_close(struct input_ctx *ictx)
 | 
			
		||||
{
 | 
			
		||||
	struct utf8_data	*ud = &ictx->utf8data;
 | 
			
		||||
 | 
			
		||||
	utf8_append(ud, ictx->ch);
 | 
			
		||||
	if (utf8_append(ud, ictx->ch) != 0)
 | 
			
		||||
		log_fatalx("UTF-8 close invalid %#hhx", ictx->ch);
 | 
			
		||||
 | 
			
		||||
	log_debug("%s %hhu '%*s' (width %hhu)", __func__, ud->size,
 | 
			
		||||
	    (int)ud->size, ud->data, ud->width);
 | 
			
		||||
 
 | 
			
		||||
@@ -144,7 +144,7 @@ key_string_lookup_string(const char *string)
 | 
			
		||||
	static const char	*other = "!#()+,-.0123456789:;<=>?'\r\t";
 | 
			
		||||
	key_code		 key;
 | 
			
		||||
	u_short			 u;
 | 
			
		||||
	int			 size;
 | 
			
		||||
	int			 size, more;
 | 
			
		||||
	key_code		 modifiers;
 | 
			
		||||
	struct utf8_data	 ud;
 | 
			
		||||
	u_int			 i;
 | 
			
		||||
@@ -177,7 +177,9 @@ key_string_lookup_string(const char *string)
 | 
			
		||||
			if (strlen(string) != ud.size)
 | 
			
		||||
				return (KEYC_NONE);
 | 
			
		||||
			for (i = 1; i < ud.size; i++)
 | 
			
		||||
				utf8_append(&ud, (u_char)string[i]);
 | 
			
		||||
				more = utf8_append(&ud, (u_char)string[i]);
 | 
			
		||||
			if (more != 0)
 | 
			
		||||
				return (KEYC_NONE);
 | 
			
		||||
			key = utf8_combine(&ud);
 | 
			
		||||
			return (key | modifiers);
 | 
			
		||||
		}
 | 
			
		||||
 
 | 
			
		||||
@@ -115,6 +115,7 @@ screen_write_strlen(const char *fmt, ...)
 | 
			
		||||
	struct utf8_data	ud;
 | 
			
		||||
	u_char 	      	       *ptr;
 | 
			
		||||
	size_t			left, size = 0;
 | 
			
		||||
	int			more;
 | 
			
		||||
 | 
			
		||||
	va_start(ap, fmt);
 | 
			
		||||
	xvasprintf(&msg, fmt, ap);
 | 
			
		||||
@@ -128,11 +129,12 @@ screen_write_strlen(const char *fmt, ...)
 | 
			
		||||
			left = strlen(ptr);
 | 
			
		||||
			if (left < (size_t)ud.size - 1)
 | 
			
		||||
				break;
 | 
			
		||||
			while (utf8_append(&ud, *ptr))
 | 
			
		||||
			while ((more = utf8_append(&ud, *ptr)) == 1)
 | 
			
		||||
				ptr++;
 | 
			
		||||
			ptr++;
 | 
			
		||||
 | 
			
		||||
			size += ud.width;
 | 
			
		||||
			if (more == 0)
 | 
			
		||||
				size += ud.width;
 | 
			
		||||
		} else {
 | 
			
		||||
			if (*ptr > 0x1f && *ptr < 0x7f)
 | 
			
		||||
				size++;
 | 
			
		||||
@@ -176,6 +178,7 @@ screen_write_vnputs(struct screen_write_ctx *ctx, ssize_t maxlen,
 | 
			
		||||
	struct utf8_data	ud;
 | 
			
		||||
	u_char 		       *ptr;
 | 
			
		||||
	size_t		 	left, size = 0;
 | 
			
		||||
	int			more;
 | 
			
		||||
 | 
			
		||||
	xvasprintf(&msg, fmt, ap);
 | 
			
		||||
 | 
			
		||||
@@ -187,22 +190,24 @@ screen_write_vnputs(struct screen_write_ctx *ctx, ssize_t maxlen,
 | 
			
		||||
			left = strlen(ptr);
 | 
			
		||||
			if (left < (size_t)ud.size - 1)
 | 
			
		||||
				break;
 | 
			
		||||
			while (utf8_append(&ud, *ptr))
 | 
			
		||||
			while ((more = utf8_append(&ud, *ptr)) == 1)
 | 
			
		||||
				ptr++;
 | 
			
		||||
			ptr++;
 | 
			
		||||
 | 
			
		||||
			if (maxlen > 0 &&
 | 
			
		||||
			    size + ud.width > (size_t) maxlen) {
 | 
			
		||||
				while (size < (size_t) maxlen) {
 | 
			
		||||
					screen_write_putc(ctx, gc, ' ');
 | 
			
		||||
					size++;
 | 
			
		||||
			if (more == 0) {
 | 
			
		||||
				if (maxlen > 0 &&
 | 
			
		||||
				    size + ud.width > (size_t) maxlen) {
 | 
			
		||||
					while (size < (size_t) maxlen) {
 | 
			
		||||
						screen_write_putc(ctx, gc, ' ');
 | 
			
		||||
						size++;
 | 
			
		||||
					}
 | 
			
		||||
					break;
 | 
			
		||||
				}
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
			size += ud.width;
 | 
			
		||||
				size += ud.width;
 | 
			
		||||
 | 
			
		||||
			utf8_copy(&gc->data, &ud);
 | 
			
		||||
			screen_write_cell(ctx, gc);
 | 
			
		||||
				utf8_copy(&gc->data, &ud);
 | 
			
		||||
				screen_write_cell(ctx, gc);
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			if (maxlen > 0 && size + 1 > (size_t) maxlen)
 | 
			
		||||
				break;
 | 
			
		||||
@@ -231,6 +236,7 @@ screen_write_cnputs(struct screen_write_ctx *ctx, ssize_t maxlen,
 | 
			
		||||
	char			*msg;
 | 
			
		||||
	u_char 			*ptr, *last;
 | 
			
		||||
	size_t			 left, size = 0;
 | 
			
		||||
	int			 more;
 | 
			
		||||
 | 
			
		||||
	va_start(ap, fmt);
 | 
			
		||||
	xvasprintf(&msg, fmt, ap);
 | 
			
		||||
@@ -260,22 +266,24 @@ screen_write_cnputs(struct screen_write_ctx *ctx, ssize_t maxlen,
 | 
			
		||||
			left = strlen(ptr);
 | 
			
		||||
			if (left < (size_t)ud.size - 1)
 | 
			
		||||
				break;
 | 
			
		||||
			while (utf8_append(&ud, *ptr))
 | 
			
		||||
			while ((more = utf8_append(&ud, *ptr)) == 1)
 | 
			
		||||
				ptr++;
 | 
			
		||||
			ptr++;
 | 
			
		||||
 | 
			
		||||
			if (maxlen > 0 &&
 | 
			
		||||
			    size + ud.width > (size_t) maxlen) {
 | 
			
		||||
				while (size < (size_t) maxlen) {
 | 
			
		||||
					screen_write_putc(ctx, gc, ' ');
 | 
			
		||||
					size++;
 | 
			
		||||
			if (more == 0) {
 | 
			
		||||
				if (maxlen > 0 &&
 | 
			
		||||
				    size + ud.width > (size_t) maxlen) {
 | 
			
		||||
					while (size < (size_t) maxlen) {
 | 
			
		||||
						screen_write_putc(ctx, gc, ' ');
 | 
			
		||||
						size++;
 | 
			
		||||
					}
 | 
			
		||||
					break;
 | 
			
		||||
				}
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
			size += ud.width;
 | 
			
		||||
				size += ud.width;
 | 
			
		||||
 | 
			
		||||
			utf8_copy(&lgc.data, &ud);
 | 
			
		||||
			screen_write_cell(ctx, &lgc);
 | 
			
		||||
				utf8_copy(&lgc.data, &ud);
 | 
			
		||||
				screen_write_cell(ctx, &lgc);
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			if (maxlen > 0 && size + 1 > (size_t) maxlen)
 | 
			
		||||
				break;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										2
									
								
								tmux.h
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								tmux.h
									
									
									
									
									
								
							@@ -624,7 +624,7 @@ struct utf8_data {
 | 
			
		||||
	u_char	have;
 | 
			
		||||
	u_char	size;
 | 
			
		||||
 | 
			
		||||
	u_char	width;
 | 
			
		||||
	u_char	width;	/* 0xff if invalid */
 | 
			
		||||
} __packed;
 | 
			
		||||
 | 
			
		||||
/* Grid attributes. */
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										11
									
								
								tty-keys.c
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								tty-keys.c
									
									
									
									
									
								
							@@ -472,7 +472,7 @@ tty_keys_next(struct tty *tty)
 | 
			
		||||
	const char		*buf;
 | 
			
		||||
	size_t			 len, size;
 | 
			
		||||
	cc_t			 bspace;
 | 
			
		||||
	int			 delay, expired = 0;
 | 
			
		||||
	int			 delay, expired = 0, more;
 | 
			
		||||
	key_code		 key;
 | 
			
		||||
	struct utf8_data	 ud;
 | 
			
		||||
	u_int			 i;
 | 
			
		||||
@@ -547,7 +547,9 @@ first_key:
 | 
			
		||||
			goto partial_key;
 | 
			
		||||
		}
 | 
			
		||||
		for (i = 1; i < size; i++)
 | 
			
		||||
			utf8_append(&ud, (u_char)buf[i]);
 | 
			
		||||
			more = utf8_append(&ud, (u_char)buf[i]);
 | 
			
		||||
		if (more != 0)
 | 
			
		||||
			goto discard_key;
 | 
			
		||||
		key = utf8_combine(&ud);
 | 
			
		||||
		log_debug("UTF-8 key %.*s %#llx", (int)size, buf, key);
 | 
			
		||||
		goto complete_key;
 | 
			
		||||
@@ -653,6 +655,7 @@ tty_keys_mouse(struct tty *tty, const char *buf, size_t len, size_t *size)
 | 
			
		||||
	struct utf8_data	 ud;
 | 
			
		||||
	u_int			 i, value, x, y, b, sgr_b;
 | 
			
		||||
	u_char			 sgr_type, c;
 | 
			
		||||
	int			 more;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Standard mouse sequences are \033[M followed by three characters
 | 
			
		||||
@@ -699,7 +702,9 @@ tty_keys_mouse(struct tty *tty, const char *buf, size_t len, size_t *size)
 | 
			
		||||
					(*size)++;
 | 
			
		||||
					if (len <= *size)
 | 
			
		||||
						return (1);
 | 
			
		||||
					utf8_append(&ud, buf[*size]);
 | 
			
		||||
					more = utf8_append(&ud, buf[*size]);
 | 
			
		||||
					if (more != 0)
 | 
			
		||||
						return (-1);
 | 
			
		||||
					value = utf8_combine(&ud);
 | 
			
		||||
				} else
 | 
			
		||||
					value = (u_char)buf[*size];
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										42
									
								
								utf8.c
									
									
									
									
									
								
							
							
						
						
									
										42
									
								
								utf8.c
									
									
									
									
									
								
							@@ -403,22 +403,26 @@ utf8_open(struct utf8_data *ud, u_char ch)
 | 
			
		||||
/*
 | 
			
		||||
 * Append character to UTF-8, closing if finished.
 | 
			
		||||
 *
 | 
			
		||||
 * Returns 1 if more UTF-8 data to come, 0 if finished.
 | 
			
		||||
 * Returns 1 if more UTF-8 data to come, 0 if finished and valid, -1 if
 | 
			
		||||
 * finished and invalid.
 | 
			
		||||
 */
 | 
			
		||||
int
 | 
			
		||||
utf8_append(struct utf8_data *ud, u_char ch)
 | 
			
		||||
{
 | 
			
		||||
	/* XXX this should do validity checks too! */
 | 
			
		||||
 | 
			
		||||
	if (ud->have >= ud->size)
 | 
			
		||||
		fatalx("UTF-8 character overflow");
 | 
			
		||||
	if (ud->size > sizeof ud->data)
 | 
			
		||||
		fatalx("UTF-8 character size too large");
 | 
			
		||||
 | 
			
		||||
	if (ud->have != 0 && (ch & 0xc0) != 0x80)
 | 
			
		||||
		ud->width = 0xff;
 | 
			
		||||
 | 
			
		||||
	ud->data[ud->have++] = ch;
 | 
			
		||||
	if (ud->have != ud->size)
 | 
			
		||||
		return (1);
 | 
			
		||||
 | 
			
		||||
	if (ud->width == 0xff)
 | 
			
		||||
		return (-1);
 | 
			
		||||
	ud->width = utf8_width(utf8_combine(ud));
 | 
			
		||||
	return (0);
 | 
			
		||||
}
 | 
			
		||||
@@ -556,15 +560,15 @@ utf8_strvis(char *dst, const char *src, size_t len, int flag)
 | 
			
		||||
	while (src < end) {
 | 
			
		||||
		if (utf8_open(&ud, *src)) {
 | 
			
		||||
			more = 1;
 | 
			
		||||
			while (++src < end && more)
 | 
			
		||||
			while (++src < end && more == 1)
 | 
			
		||||
				more = utf8_append(&ud, *src);
 | 
			
		||||
			if (!more) {
 | 
			
		||||
			if (more == 0) {
 | 
			
		||||
				/* UTF-8 character finished. */
 | 
			
		||||
				for (i = 0; i < ud.size; i++)
 | 
			
		||||
					*dst++ = ud.data[i];
 | 
			
		||||
				continue;
 | 
			
		||||
			} else if (ud.have > 0) {
 | 
			
		||||
				/* Not a complete UTF-8 character. */
 | 
			
		||||
				/* Not a complete, valid UTF-8 character. */
 | 
			
		||||
				src -= ud.have;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
@@ -600,9 +604,9 @@ utf8_sanitize(const char *src)
 | 
			
		||||
		dst = xreallocarray(dst, n + 1, sizeof *dst);
 | 
			
		||||
		if (utf8_open(&ud, *src)) {
 | 
			
		||||
			more = 1;
 | 
			
		||||
			while (*++src != '\0' && more)
 | 
			
		||||
			while (*++src != '\0' && more == 1)
 | 
			
		||||
				more = utf8_append(&ud, *src);
 | 
			
		||||
			if (!more) {
 | 
			
		||||
			if (more != 1) {
 | 
			
		||||
				dst = xreallocarray(dst, n + ud.width,
 | 
			
		||||
				    sizeof *dst);
 | 
			
		||||
				for (i = 0; i < ud.width; i++)
 | 
			
		||||
@@ -612,10 +616,8 @@ utf8_sanitize(const char *src)
 | 
			
		||||
			src -= ud.have;
 | 
			
		||||
		}
 | 
			
		||||
		if (*src > 0x1f && *src < 0x7f)
 | 
			
		||||
			dst[n] = *src;
 | 
			
		||||
			dst[n++] = *src;
 | 
			
		||||
		src++;
 | 
			
		||||
 | 
			
		||||
		n++;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	dst = xreallocarray(dst, n + 1, sizeof *dst);
 | 
			
		||||
@@ -641,18 +643,19 @@ utf8_fromcstr(const char *src)
 | 
			
		||||
		dst = xreallocarray(dst, n + 1, sizeof *dst);
 | 
			
		||||
		if (utf8_open(&dst[n], *src)) {
 | 
			
		||||
			more = 1;
 | 
			
		||||
			while (*++src != '\0' && more)
 | 
			
		||||
			while (*++src != '\0' && more == 1)
 | 
			
		||||
				more = utf8_append(&dst[n], *src);
 | 
			
		||||
			if (!more) {
 | 
			
		||||
			if (more != 1) {
 | 
			
		||||
				n++;
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
			src -= dst[n].have;
 | 
			
		||||
		}
 | 
			
		||||
		utf8_set(&dst[n], *src);
 | 
			
		||||
		if (*src > 0x1f && *src < 0x7f) {
 | 
			
		||||
			utf8_set(&dst[n], *src);
 | 
			
		||||
			n++;
 | 
			
		||||
		}
 | 
			
		||||
		src++;
 | 
			
		||||
 | 
			
		||||
		n++;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	dst = xreallocarray(dst, n + 1, sizeof *dst);
 | 
			
		||||
@@ -693,15 +696,16 @@ utf8_cstrwidth(const char *s)
 | 
			
		||||
	while (*s != '\0') {
 | 
			
		||||
		if (utf8_open(&tmp, *s)) {
 | 
			
		||||
			more = 1;
 | 
			
		||||
			while (*++s != '\0' && more)
 | 
			
		||||
			while (*++s != '\0' && more == 1)
 | 
			
		||||
				more = utf8_append(&tmp, *s);
 | 
			
		||||
			if (!more) {
 | 
			
		||||
			if (more != 1) {
 | 
			
		||||
				width += tmp.width;
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
			s -= tmp.have;
 | 
			
		||||
		}
 | 
			
		||||
		width++;
 | 
			
		||||
		if (*s > 0x1f && *s < 0x7f)
 | 
			
		||||
			width++;
 | 
			
		||||
		s++;
 | 
			
		||||
	}
 | 
			
		||||
	return (width);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user