From 7019f77c05f45ea9267f7e768d0abb0b6a928a25 Mon Sep 17 00:00:00 2001 From: Nicholas Marriott Date: Sat, 8 Mar 2014 16:27:45 +0000 Subject: [PATCH] In four byte UTF-8 sequences, only three bits of the first byte should be used. Fix from Koga Osamu. --- utf8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utf8.c b/utf8.c index 63723d7f..5babcb3b 100644 --- a/utf8.c +++ b/utf8.c @@ -313,7 +313,7 @@ utf8_combine(const struct utf8_data *utf8data) value = utf8data->data[3] & 0x3f; value |= (utf8data->data[2] & 0x3f) << 6; value |= (utf8data->data[1] & 0x3f) << 12; - value |= (utf8data->data[0] & 0x3f) << 18; + value |= (utf8data->data[0] & 0x07) << 18; break; } return (value);