Ignore Hangul filler character. There doesn't seem to be much agreement

on what to do with this but ignoring it seems rightand does improve
things. GitHub issue 3998.
This commit is contained in:
nicm
2025-11-03 09:27:06 +00:00
parent d90b414223
commit 3051076dd1
3 changed files with 16 additions and 2 deletions

View File

@@ -55,7 +55,7 @@ utf8_has_zwj(const struct utf8_data *ud)
return (memcmp(ud->data + ud->size - 3, "\342\200\215", 3) == 0);
}
/* Is this a zero width joiner? */
/* Is this zero width joiner U+200D? */
int
utf8_is_zwj(const struct utf8_data *ud)
{
@@ -64,7 +64,7 @@ utf8_is_zwj(const struct utf8_data *ud)
return (memcmp(ud->data, "\342\200\215", 3) == 0);
}
/* Is this a variation selector? */
/* Is this variation selector U+FE0F? */
int
utf8_is_vs(const struct utf8_data *ud)
{
@@ -73,6 +73,15 @@ utf8_is_vs(const struct utf8_data *ud)
return (memcmp(ud->data, "\357\270\217", 3) == 0);
}
/* Is this Hangul filler U+3164? */
int
utf8_is_hangul_filler(const struct utf8_data *ud)
{
if (ud->size != 3)
return (0);
return (memcmp(ud->data, "\343\205\244", 3) == 0);
}
/* Should these two characters combine? */
int
utf8_should_combine(const struct utf8_data *with, const struct utf8_data *add)