diff --git a/screen-write.c b/screen-write.c index a8211c0d..2dcbe809 100644 --- a/screen-write.c +++ b/screen-write.c @@ -2133,17 +2133,27 @@ screen_write_combine(struct screen_write_ctx *ctx, const struct grid_cell *gc) return (zero_width); /* - * Check if we need to combine characters. This could be zero width - * (set above), a modifier character (with an existing Unicode - * character) or a previous ZWJ. + * Check if we need to combine characters. This could be a Korean + * Hangul Jamo character, zero width (set above), a modifier character + * (with an existing Unicode character) or a previous ZWJ. */ if (!zero_width) { - if (utf8_is_modifier(ud)) { - if (last.data.size < 2) - return (0); - force_wide = 1; - } else if (!utf8_has_zwj(&last.data)) + switch (hanguljamo_check_state(&last.data, ud)) { + case HANGULJAMO_STATE_NOT_COMPOSABLE: + return (1); + case HANGULJAMO_STATE_CHOSEONG: return (0); + case HANGULJAMO_STATE_COMPOSABLE: + break; + case HANGULJAMO_STATE_NOT_HANGULJAMO: + if (utf8_is_modifier(ud)) { + if (last.data.size < 2) + return (0); + force_wide = 1; + } else if (!utf8_has_zwj(&last.data)) + return (0); + break; + } } /* Check if this combined character would be too long. */ diff --git a/tmux.h b/tmux.h index ed314c6d..c996d5b9 100644 --- a/tmux.h +++ b/tmux.h @@ -686,6 +686,14 @@ enum utf8_state { UTF8_ERROR }; +/* State for processing of Korean characters. */ +enum hanguljamo_state { + HANGULJAMO_STATE_NOT_HANGULJAMO, + HANGULJAMO_STATE_CHOSEONG, + HANGULJAMO_STATE_COMPOSABLE, + HANGULJAMO_STATE_NOT_COMPOSABLE +}; + /* Colour flags. */ #define COLOUR_FLAG_256 0x01000000 #define COLOUR_FLAG_RGB 0x02000000 @@ -3518,6 +3526,8 @@ int utf8_has_zwj(const struct utf8_data *); int utf8_is_zwj(const struct utf8_data *); int utf8_is_vs(const struct utf8_data *); int utf8_is_modifier(const struct utf8_data *); +enum hanguljamo_state hanguljamo_check_state(const struct utf8_data *, + const struct utf8_data *); /* procname.c */ char *get_proc_name(int, char *); diff --git a/utf8-combined.c b/utf8-combined.c index 16ecb453..885dd6a4 100644 --- a/utf8-combined.c +++ b/utf8-combined.c @@ -24,6 +24,28 @@ #include "tmux.h" +enum hanguljamo_subclass { + HANGULJAMO_SUBCLASS_NOT_HANGULJAMO, + HANGULJAMO_SUBCLASS_CHOSEONG, // U+1100 - U+1112 + HANGULJAMO_SUBCLASS_OLD_CHOSEONG, // U+1113 - U+115E + HANGULJAMO_SUBCLASS_CHOSEONG_FILLER, // U+115F + HANGULJAMO_SUBCLASS_JUNGSEONG_FILLER, // U+1160 + HANGULJAMO_SUBCLASS_JUNGSEONG, // U+1161 - U+1175 + HANGULJAMO_SUBCLASS_OLD_JUNGSEONG, // U+1176 - U+11A7 + HANGULJAMO_SUBCLASS_JONGSEONG, // U+11A8 - U+11C2 + HANGULJAMO_SUBCLASS_OLD_JONGSEONG, // U+11C3 - U+11FF + HANGULJAMO_SUBCLASS_EXTENDED_OLD_CHOSEONG, // U+A960 - U+A97C + HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG, // U+D7B0 - U+D7C6 + HANGULJAMO_SUBCLASS_EXTENDED_OLD_JONGSEONG // U+D7CB - U+D7FB +}; + +enum hanguljamo_class { + HANGULJAMO_CLASS_NOT_HANGULJAMO, + HANGULJAMO_CLASS_CHOSEONG, + HANGULJAMO_CLASS_JUNGSEONG, + HANGULJAMO_CLASS_JONGSEONG +}; + /* Has this got a zero width joiner at the end? */ int utf8_has_zwj(const struct utf8_data *ud) @@ -95,3 +117,114 @@ utf8_is_modifier(const struct utf8_data *ud) } return (0); } + +static enum hanguljamo_subclass +hanguljamo_get_subclass(const u_char *s) +{ + switch (s[0]) { + case 0xE1: + switch (s[1]) { + case 0x84: + if (s[2] >= 0x80 && s[2] <= 0x92) + return (HANGULJAMO_SUBCLASS_CHOSEONG); + if (s[2] >= 0x93 && s[2] <= 0xBF) + return (HANGULJAMO_SUBCLASS_OLD_CHOSEONG); + break; + case 0x85: + if (s[2] == 0x9F) + return (HANGULJAMO_SUBCLASS_CHOSEONG_FILLER); + if (s[2] == 0xA0) + return (HANGULJAMO_SUBCLASS_JUNGSEONG_FILLER); + if (s[2] >= 0x80 && s[2] <= 0x9E) + return (HANGULJAMO_SUBCLASS_OLD_CHOSEONG); + if (s[2] >= 0xA1 && s[2] <= 0xB5) + return (HANGULJAMO_SUBCLASS_JUNGSEONG); + if (s[2] >= 0xB6 && s[2] <= 0xBF) + return (HANGULJAMO_SUBCLASS_OLD_JUNGSEONG); + break; + case 0x86: + if (s[2] >= 0x80 && s[2] <= 0xA7) + return (HANGULJAMO_SUBCLASS_OLD_JUNGSEONG); + if (s[2] >= 0xA8 && s[2] <= 0xBF) + return (HANGULJAMO_SUBCLASS_JONGSEONG); + break; + case 0x87: + if (s[2] >= 0x80 && s[2] <= 0x82) + return (HANGULJAMO_SUBCLASS_JONGSEONG); + if (s[2] >= 0x83 && s[2] <= 0xBF) + return (HANGULJAMO_SUBCLASS_OLD_JONGSEONG); + break; + } + break; + case 0xEA: + if (s[1] == 0xA5 && s[2] >= 0xA0 && s[2] <= 0xBC) + return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_CHOSEONG); + break; + case 0xED: + if (s[1] == 0x9E && s[2] >= 0xB0 && s[2] <= 0xBF) + return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG); + if (s[1] != 0x9F) + break; + if (s[2] >= 0x80 && s[2] <= 0x86) + return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG); + if (s[2] >= 0x8B && s[2] <= 0xBB) + return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_JONGSEONG); + break; + } + return (HANGULJAMO_SUBCLASS_NOT_HANGULJAMO); +} + +static enum hanguljamo_class +hanguljamo_get_class(const u_char *s) +{ + switch (hanguljamo_get_subclass(s)) { + case HANGULJAMO_SUBCLASS_CHOSEONG: + case HANGULJAMO_SUBCLASS_CHOSEONG_FILLER: + case HANGULJAMO_SUBCLASS_OLD_CHOSEONG: + case HANGULJAMO_SUBCLASS_EXTENDED_OLD_CHOSEONG: + return (HANGULJAMO_CLASS_CHOSEONG); + case HANGULJAMO_SUBCLASS_JUNGSEONG: + case HANGULJAMO_SUBCLASS_JUNGSEONG_FILLER: + case HANGULJAMO_SUBCLASS_OLD_JUNGSEONG: + case HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG: + return (HANGULJAMO_CLASS_JUNGSEONG); + case HANGULJAMO_SUBCLASS_JONGSEONG: + case HANGULJAMO_SUBCLASS_OLD_JONGSEONG: + case HANGULJAMO_SUBCLASS_EXTENDED_OLD_JONGSEONG: + return (HANGULJAMO_CLASS_JONGSEONG); + case HANGULJAMO_SUBCLASS_NOT_HANGULJAMO: + return (HANGULJAMO_CLASS_NOT_HANGULJAMO); + } + return (HANGULJAMO_CLASS_NOT_HANGULJAMO); +} + +enum hanguljamo_state +hanguljamo_check_state(const struct utf8_data *p_ud, const struct utf8_data *ud) +{ + const u_char *s; + + if (ud->size != 3) + return (HANGULJAMO_STATE_NOT_HANGULJAMO); + + switch (hanguljamo_get_class(ud->data)) { + case HANGULJAMO_CLASS_CHOSEONG: + return (HANGULJAMO_STATE_CHOSEONG); + case HANGULJAMO_CLASS_JUNGSEONG: + if (p_ud->size < 3) + return (HANGULJAMO_STATE_NOT_COMPOSABLE); + s = p_ud->data + p_ud->size - 3; + if (hanguljamo_get_class(s) == HANGULJAMO_CLASS_CHOSEONG) + return (HANGULJAMO_STATE_COMPOSABLE); + return (HANGULJAMO_STATE_NOT_COMPOSABLE); + case HANGULJAMO_CLASS_JONGSEONG: + if (p_ud->size < 3) + return (HANGULJAMO_STATE_NOT_COMPOSABLE); + s = p_ud->data + p_ud->size - 3; + if (hanguljamo_get_class(s) == HANGULJAMO_CLASS_JUNGSEONG) + return (HANGULJAMO_STATE_COMPOSABLE); + return (HANGULJAMO_STATE_NOT_COMPOSABLE); + case HANGULJAMO_CLASS_NOT_HANGULJAMO: + return (HANGULJAMO_STATE_NOT_HANGULJAMO); + } + return (HANGULJAMO_STATE_NOT_HANGULJAMO); +}