Add support for using utf8proc with --enable-utf8proc, useful for platforms

(like OS X) where the system implementation is crap. From Joshua Rubin.
2026-06-23 00:08:01 +00:00 · 2016-09-01 20:40:03 +01:00
parent ae297cb487
commit 6c94774b70
5 changed files with 120 additions and 3 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -185,7 +185,12 @@ dist_tmux_SOURCES = \
 	xterm-keys.c
 nodist_tmux_SOURCES = osdep-@PLATFORM@.c

-# Pile in all the compat/ stuff that is needed.
+# Add compat file for utf8proc.
+if HAVE_UTF8PROC
+nodist_tmux_SOURCES += compat/utf8proc.c
+endif
+
+# Add compat for missing or broken functions.
 if NO_FORKPTY
 nodist_tmux_SOURCES += compat/forkpty-@PLATFORM@.c
 endif
--- a/compat.h
+++ b/compat.h
@@ -279,7 +279,14 @@ int		 openat(int, const char *, int, ...);

 #ifndef HAVE_REALLOCARRAY
 /* reallocarray.c */
-void		*reallocarray(void *, size_t, size_t size);
+void		*reallocarray(void *, size_t, size_t);
+#endif
+
+#ifdef HAVE_UTF8PROC
+/* utf8proc.c */
+int		 utf8proc_wcwidth(wchar_t);
+int		 utf8proc_mbtowc(wchar_t *, const char *, size_t);
+int		 utf8proc_wctomb(char *, wchar_t);
 #endif

 #ifdef HAVE_GETOPT
--- a/compat/utf8proc.c
+++ b/compat/utf8proc.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2016 Joshua Rubin <joshua@rubixconsulting.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+#include <utf8proc.h>
+
+#include "tmux.h"
+
+int
+utf8proc_wcwidth(wchar_t wc)
+{
+	int	cat;
+
+	cat = utf8proc_category(wc);
+	if (cat == UTF8PROC_CATEGORY_CO) {
+		/*
+		 * The private use category is where powerline and similar
+		 * codepoints are stored, they have "ambiguous" width - use 1.
+		 */
+		return (1);
+	}
+	if (cat == UTF8PROC_CATEGORY_SO) {
+		/* Symbols, like emoji, should always use width 1. */
+		return (1);
+	}
+	return (utf8proc_charwidth(wc));
+}
+
+int
+utf8proc_mbtowc(wchar_t *pwc, const char *s, size_t n)
+{
+	utf8proc_ssize_t	slen;
+
+	if (s == NULL)
+		return (0);
+
+	/*
+	 * *pwc == -1 indicates invalid codepoint
+	 * slen < 0 indicates an error
+	 */
+	slen = utf8proc_iterate(s, n, pwc);
+	if (*pwc == (wchar_t)-1 || slen < 0)
+		return (-1);
+	return (slen);
+}
+
+int
+utf8proc_wctomb(char *s, wchar_t wc)
+{
+	if (s == NULL)
+		return (0);
+
+	if (!utf8proc_codepoint_valid(wc))
+		return (-1);
+	return (utf8proc_encode_char(wc, s));
+}
--- a/configure.ac
+++ b/configure.ac
@@ -152,7 +152,7 @@ if test "x$found_libevent" = xno; then
 	AC_MSG_ERROR("libevent not found")
 fi

-# Look for ncurses
+# Look for ncurses.
 PKG_CHECK_MODULES(
 	LIBNCURSES,
 	ncurses,
@@ -196,6 +196,29 @@ if test "x$found_utempter" = xyes; then
 	fi
 fi

+# Look for utf8proc.
+AC_ARG_ENABLE(
+	utf8proc,
+	AC_HELP_STRING(--enable-utf8proc, use utf8proc if it is installed),
+	found_utf8proc=$enable_utf8proc,
+	found_utf8proc=yes
+)
+if test "x$found_utf8proc" = xyes; then
+	AC_CHECK_HEADER(utf8proc.h, found_utf8proc=yes, found_utf8proc=no)
+	if test "x$found_utf8proc" = xyes; then
+		AC_SEARCH_LIBS(
+			utf8proc_charwidth,
+			utf8proc,
+			found_utf8proc=yes,
+			found_utf8proc=no
+		)
+		if test "x$found_utf8proc" = xyes; then
+			AC_DEFINE(HAVE_UTF8PROC)
+		fi
+	fi
+fi
+AM_CONDITIONAL(HAVE_UTF8PROC, [test "x$found_utf8proc" = xyes])
+
 # Check for b64_ntop.
 AC_MSG_CHECKING(for b64_ntop)
 AC_TRY_LINK(
--- a/utf8.c
+++ b/utf8.c
@@ -109,7 +109,11 @@ utf8_width(wchar_t wc)
 {
 	int	width;

+#ifdef HAVE_UTF8PROC
+	width = utf8proc_wcwidth(wc);
+#else
 	width = wcwidth(wc);
+#endif
 	if (width < 0 || width > 0xff) {
 		log_debug("Unicode %04x, wcwidth() %d", wc, width);

@@ -135,7 +139,11 @@ utf8_width(wchar_t wc)
 enum utf8_state
 utf8_combine(const struct utf8_data *ud, wchar_t *wc)
 {
+#ifdef HAVE_UTF8PROC
+	switch (utf8proc_mbtowc(wc, ud->data, ud->size)) {
+#else
 	switch (mbtowc(wc, ud->data, ud->size)) {
+#endif
 	case -1:
 		log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data,
 		    errno);
@@ -155,7 +163,11 @@ utf8_split(wchar_t wc, struct utf8_data *ud)
 	char	s[MB_LEN_MAX];
 	int	slen;

+#ifdef HAVE_UTF8PROC
+	slen = utf8proc_wctomb(s, wc);
+#else
 	slen = wctomb(s, wc);
+#endif
 	if (slen <= 0 || slen > (int)sizeof ud->data)
 		return (UTF8_ERROR);