diff --git a/Makefile.am b/Makefile.am index 1b411afd..81dc4c71 100644 --- a/Makefile.am +++ b/Makefile.am @@ -185,7 +185,12 @@ dist_tmux_SOURCES = \ xterm-keys.c nodist_tmux_SOURCES = osdep-@PLATFORM@.c -# Pile in all the compat/ stuff that is needed. +# Add compat file for utf8proc. +if HAVE_UTF8PROC +nodist_tmux_SOURCES += compat/utf8proc.c +endif + +# Add compat for missing or broken functions. if NO_FORKPTY nodist_tmux_SOURCES += compat/forkpty-@PLATFORM@.c endif diff --git a/compat.h b/compat.h index 7f17e193..28ca9c61 100644 --- a/compat.h +++ b/compat.h @@ -279,7 +279,14 @@ int openat(int, const char *, int, ...); #ifndef HAVE_REALLOCARRAY /* reallocarray.c */ -void *reallocarray(void *, size_t, size_t size); +void *reallocarray(void *, size_t, size_t); +#endif + +#ifdef HAVE_UTF8PROC +/* utf8proc.c */ +int utf8proc_wcwidth(wchar_t); +int utf8proc_mbtowc(wchar_t *, const char *, size_t); +int utf8proc_wctomb(char *, wchar_t); #endif #ifdef HAVE_GETOPT diff --git a/compat/utf8proc.c b/compat/utf8proc.c new file mode 100644 index 00000000..023d762a --- /dev/null +++ b/compat/utf8proc.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016 Joshua Rubin + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include + +#include "tmux.h" + +int +utf8proc_wcwidth(wchar_t wc) +{ + int cat; + + cat = utf8proc_category(wc); + if (cat == UTF8PROC_CATEGORY_CO) { + /* + * The private use category is where powerline and similar + * codepoints are stored, they have "ambiguous" width - use 1. + */ + return (1); + } + if (cat == UTF8PROC_CATEGORY_SO) { + /* Symbols, like emoji, should always use width 1. */ + return (1); + } + return (utf8proc_charwidth(wc)); +} + +int +utf8proc_mbtowc(wchar_t *pwc, const char *s, size_t n) +{ + utf8proc_ssize_t slen; + + if (s == NULL) + return (0); + + /* + * *pwc == -1 indicates invalid codepoint + * slen < 0 indicates an error + */ + slen = utf8proc_iterate(s, n, pwc); + if (*pwc == (wchar_t)-1 || slen < 0) + return (-1); + return (slen); +} + +int +utf8proc_wctomb(char *s, wchar_t wc) +{ + if (s == NULL) + return (0); + + if (!utf8proc_codepoint_valid(wc)) + return (-1); + return (utf8proc_encode_char(wc, s)); +} diff --git a/configure.ac b/configure.ac index 636bfcc1..c14a6e61 100644 --- a/configure.ac +++ b/configure.ac @@ -152,7 +152,7 @@ if test "x$found_libevent" = xno; then AC_MSG_ERROR("libevent not found") fi -# Look for ncurses +# Look for ncurses. PKG_CHECK_MODULES( LIBNCURSES, ncurses, @@ -196,6 +196,29 @@ if test "x$found_utempter" = xyes; then fi fi +# Look for utf8proc. +AC_ARG_ENABLE( + utf8proc, + AC_HELP_STRING(--enable-utf8proc, use utf8proc if it is installed), + found_utf8proc=$enable_utf8proc, + found_utf8proc=yes +) +if test "x$found_utf8proc" = xyes; then + AC_CHECK_HEADER(utf8proc.h, found_utf8proc=yes, found_utf8proc=no) + if test "x$found_utf8proc" = xyes; then + AC_SEARCH_LIBS( + utf8proc_charwidth, + utf8proc, + found_utf8proc=yes, + found_utf8proc=no + ) + if test "x$found_utf8proc" = xyes; then + AC_DEFINE(HAVE_UTF8PROC) + fi + fi +fi +AM_CONDITIONAL(HAVE_UTF8PROC, [test "x$found_utf8proc" = xyes]) + # Check for b64_ntop. AC_MSG_CHECKING(for b64_ntop) AC_TRY_LINK( diff --git a/utf8.c b/utf8.c index bb0be34f..eb9b47a9 100644 --- a/utf8.c +++ b/utf8.c @@ -109,7 +109,11 @@ utf8_width(wchar_t wc) { int width; +#ifdef HAVE_UTF8PROC + width = utf8proc_wcwidth(wc); +#else width = wcwidth(wc); +#endif if (width < 0 || width > 0xff) { log_debug("Unicode %04x, wcwidth() %d", wc, width); @@ -135,7 +139,11 @@ utf8_width(wchar_t wc) enum utf8_state utf8_combine(const struct utf8_data *ud, wchar_t *wc) { +#ifdef HAVE_UTF8PROC + switch (utf8proc_mbtowc(wc, ud->data, ud->size)) { +#else switch (mbtowc(wc, ud->data, ud->size)) { +#endif case -1: log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data, errno); @@ -155,7 +163,11 @@ utf8_split(wchar_t wc, struct utf8_data *ud) char s[MB_LEN_MAX]; int slen; +#ifdef HAVE_UTF8PROC + slen = utf8proc_wctomb(s, wc); +#else slen = wctomb(s, wc); +#endif if (slen <= 0 || slen > (int)sizeof ud->data) return (UTF8_ERROR);