mirror of
https://github.com/tmux/tmux.git
synced 2025-04-25 06:08:53 +00:00
Use bitshifts instead of a union for encoding UTF-8 into 32 bits, which
is more friendly to GCC3. Reported by and ok aoyama@.
This commit is contained in:
parent
d919fa1ed0
commit
a4a3d89598
91
utf8.c
91
utf8.c
@ -52,32 +52,11 @@ static struct utf8_item *utf8_list;
|
|||||||
static u_int utf8_list_size;
|
static u_int utf8_list_size;
|
||||||
static u_int utf8_list_used;
|
static u_int utf8_list_used;
|
||||||
|
|
||||||
union utf8_map {
|
#define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f)
|
||||||
utf8_char uc;
|
#define UTF8_GET_WIDTH(flags) (((uc) >> 29) - 1)
|
||||||
struct {
|
|
||||||
u_char flags;
|
|
||||||
u_char data[3];
|
|
||||||
};
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
#define UTF8_GET_SIZE(flags) ((flags) & 0x1f)
|
#define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24)
|
||||||
#define UTF8_GET_WIDTH(flags) (((flags) >> 5) - 1)
|
#define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29)
|
||||||
|
|
||||||
#define UTF8_SET_SIZE(size) (size)
|
|
||||||
#define UTF8_SET_WIDTH(width) ((width + 1) << 5)
|
|
||||||
|
|
||||||
static const union utf8_map utf8_space0 = {
|
|
||||||
.flags = UTF8_SET_WIDTH(0)|UTF8_SET_SIZE(0),
|
|
||||||
.data = ""
|
|
||||||
};
|
|
||||||
static const union utf8_map utf8_space1 = {
|
|
||||||
.flags = UTF8_SET_WIDTH(1)|UTF8_SET_SIZE(1),
|
|
||||||
.data = " "
|
|
||||||
};
|
|
||||||
static const union utf8_map utf8_space2 = {
|
|
||||||
.flags = UTF8_SET_WIDTH(2)|UTF8_SET_SIZE(2),
|
|
||||||
.data = " "
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Get a UTF-8 item by offset. */
|
/* Get a UTF-8 item by offset. */
|
||||||
static struct utf8_item *
|
static struct utf8_item *
|
||||||
@ -139,34 +118,31 @@ utf8_put_item(const char *data, size_t size, u_int *offset)
|
|||||||
enum utf8_state
|
enum utf8_state
|
||||||
utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
|
utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
|
||||||
{
|
{
|
||||||
union utf8_map m = { .uc = 0 };
|
u_int offset;
|
||||||
u_int offset;
|
|
||||||
|
|
||||||
if (ud->width > 2)
|
if (ud->width > 2)
|
||||||
fatalx("invalid UTF-8 width");
|
fatalx("invalid UTF-8 width");
|
||||||
|
|
||||||
if (ud->size > UTF8_SIZE)
|
if (ud->size > UTF8_SIZE)
|
||||||
goto fail;
|
goto fail;
|
||||||
m.flags = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width);
|
if (ud->size <= 3) {
|
||||||
if (ud->size <= 3)
|
offset = (((utf8_char)ud->data[2] << 16)|
|
||||||
memcpy(m.data, ud->data, ud->size);
|
((utf8_char)ud->data[1] << 8)|
|
||||||
else {
|
((utf8_char)ud->data[0]));
|
||||||
if (utf8_put_item(ud->data, ud->size, &offset) != 0)
|
} else if (utf8_put_item(ud->data, ud->size, &offset) != 0)
|
||||||
goto fail;
|
goto fail;
|
||||||
m.data[0] = (offset & 0xff);
|
*uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|offset;
|
||||||
m.data[1] = (offset >> 8) & 0xff;
|
log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size,
|
||||||
m.data[2] = (offset >> 16);
|
(int)ud->size, ud->data, *uc);
|
||||||
}
|
|
||||||
*uc = htonl(m.uc);
|
|
||||||
return (UTF8_DONE);
|
return (UTF8_DONE);
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
if (ud->width == 0)
|
if (ud->width == 0)
|
||||||
*uc = htonl(utf8_space0.uc);
|
*uc = UTF8_SET_SIZE(0)|UTF8_SET_WIDTH(0);
|
||||||
else if (ud->width == 1)
|
else if (ud->width == 1)
|
||||||
*uc = htonl(utf8_space1.uc);
|
*uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x20;
|
||||||
else
|
else
|
||||||
*uc = htonl(utf8_space2.uc);
|
*uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x2020;
|
||||||
return (UTF8_ERROR);
|
return (UTF8_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -174,37 +150,36 @@ fail:
|
|||||||
void
|
void
|
||||||
utf8_to_data(utf8_char uc, struct utf8_data *ud)
|
utf8_to_data(utf8_char uc, struct utf8_data *ud)
|
||||||
{
|
{
|
||||||
union utf8_map m = { .uc = ntohl(uc) };
|
|
||||||
struct utf8_item *ui;
|
struct utf8_item *ui;
|
||||||
u_int offset;
|
u_int offset;
|
||||||
|
|
||||||
memset(ud, 0, sizeof *ud);
|
memset(ud, 0, sizeof *ud);
|
||||||
ud->size = ud->have = UTF8_GET_SIZE(m.flags);
|
ud->size = ud->have = UTF8_GET_SIZE(uc);
|
||||||
ud->width = UTF8_GET_WIDTH(m.flags);
|
ud->width = UTF8_GET_WIDTH(uc);
|
||||||
|
|
||||||
if (ud->size <= 3) {
|
if (ud->size <= 3) {
|
||||||
memcpy(ud->data, m.data, ud->size);
|
ud->data[2] = (uc >> 16);
|
||||||
return;
|
ud->data[1] = ((uc >> 8) & 0xff);
|
||||||
|
ud->data[0] = (uc & 0xff);
|
||||||
|
} else {
|
||||||
|
offset = (uc & 0xffffff);
|
||||||
|
if (offset >= utf8_list_used)
|
||||||
|
memset(ud->data, ' ', ud->size);
|
||||||
|
else {
|
||||||
|
ui = &utf8_list[offset];
|
||||||
|
memcpy(ud->data, ui->data, ud->size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
offset = ((u_int)m.data[2] << 16)|((u_int)m.data[1] << 8)|m.data[0];
|
log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size,
|
||||||
if (offset >= utf8_list_used)
|
(int)ud->size, ud->data);
|
||||||
memset(ud->data, ' ', ud->size);
|
|
||||||
else {
|
|
||||||
ui = &utf8_list[offset];
|
|
||||||
memcpy(ud->data, ui->data, ud->size);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get UTF-8 character from a single ASCII character. */
|
/* Get UTF-8 character from a single ASCII character. */
|
||||||
u_int
|
u_int
|
||||||
utf8_build_one(u_char ch)
|
utf8_build_one(u_char ch)
|
||||||
{
|
{
|
||||||
union utf8_map m;
|
return (UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|ch);
|
||||||
|
|
||||||
m.flags = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1);
|
|
||||||
m.data[0] = ch;
|
|
||||||
return (htonl(m.uc));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set a single character. */
|
/* Set a single character. */
|
||||||
|
Loading…
Reference in New Issue
Block a user