From ee19d304ff366741f6f94334bbc82124a4c44dbc Mon Sep 17 00:00:00 2001 From: nicm Date: Mon, 31 Mar 2014 21:43:35 +0000 Subject: [PATCH] In four byte UTF-8 sequences, only three bits of the first byte should be used. Fix from Koga Osamu. --- utf8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utf8.c b/utf8.c index 1c81392b..85889dcb 100644 --- a/utf8.c +++ b/utf8.c @@ -311,7 +311,7 @@ utf8_combine(const struct utf8_data *utf8data) value = utf8data->data[3] & 0x3f; value |= (utf8data->data[2] & 0x3f) << 6; value |= (utf8data->data[1] & 0x3f) << 12; - value |= (utf8data->data[0] & 0x3f) << 18; + value |= (utf8data->data[0] & 0x07) << 18; break; } return (value);