Add support for using utf8proc with --enable-utf8proc, useful for platforms

(like OS X) where the system implementation is crap. From Joshua Rubin.
This commit is contained in:
Nicholas Marriott 2016-09-01 20:40:03 +01:00
parent ae297cb487
commit 6c94774b70
5 changed files with 120 additions and 3 deletions

View File

@ -185,7 +185,12 @@ dist_tmux_SOURCES = \
xterm-keys.c xterm-keys.c
nodist_tmux_SOURCES = osdep-@PLATFORM@.c nodist_tmux_SOURCES = osdep-@PLATFORM@.c
# Pile in all the compat/ stuff that is needed. # Add compat file for utf8proc.
if HAVE_UTF8PROC
nodist_tmux_SOURCES += compat/utf8proc.c
endif
# Add compat for missing or broken functions.
if NO_FORKPTY if NO_FORKPTY
nodist_tmux_SOURCES += compat/forkpty-@PLATFORM@.c nodist_tmux_SOURCES += compat/forkpty-@PLATFORM@.c
endif endif

View File

@ -279,7 +279,14 @@ int openat(int, const char *, int, ...);
#ifndef HAVE_REALLOCARRAY #ifndef HAVE_REALLOCARRAY
/* reallocarray.c */ /* reallocarray.c */
void *reallocarray(void *, size_t, size_t size); void *reallocarray(void *, size_t, size_t);
#endif
#ifdef HAVE_UTF8PROC
/* utf8proc.c */
int utf8proc_wcwidth(wchar_t);
int utf8proc_mbtowc(wchar_t *, const char *, size_t);
int utf8proc_wctomb(char *, wchar_t);
#endif #endif
#ifdef HAVE_GETOPT #ifdef HAVE_GETOPT

70
compat/utf8proc.c Normal file
View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2016 Joshua Rubin <joshua@rubixconsulting.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
* IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/types.h>
#include <utf8proc.h>
#include "tmux.h"
int
utf8proc_wcwidth(wchar_t wc)
{
int cat;
cat = utf8proc_category(wc);
if (cat == UTF8PROC_CATEGORY_CO) {
/*
* The private use category is where powerline and similar
* codepoints are stored, they have "ambiguous" width - use 1.
*/
return (1);
}
if (cat == UTF8PROC_CATEGORY_SO) {
/* Symbols, like emoji, should always use width 1. */
return (1);
}
return (utf8proc_charwidth(wc));
}
int
utf8proc_mbtowc(wchar_t *pwc, const char *s, size_t n)
{
utf8proc_ssize_t slen;
if (s == NULL)
return (0);
/*
* *pwc == -1 indicates invalid codepoint
* slen < 0 indicates an error
*/
slen = utf8proc_iterate(s, n, pwc);
if (*pwc == (wchar_t)-1 || slen < 0)
return (-1);
return (slen);
}
int
utf8proc_wctomb(char *s, wchar_t wc)
{
if (s == NULL)
return (0);
if (!utf8proc_codepoint_valid(wc))
return (-1);
return (utf8proc_encode_char(wc, s));
}

View File

@ -152,7 +152,7 @@ if test "x$found_libevent" = xno; then
AC_MSG_ERROR("libevent not found") AC_MSG_ERROR("libevent not found")
fi fi
# Look for ncurses # Look for ncurses.
PKG_CHECK_MODULES( PKG_CHECK_MODULES(
LIBNCURSES, LIBNCURSES,
ncurses, ncurses,
@ -196,6 +196,29 @@ if test "x$found_utempter" = xyes; then
fi fi
fi fi
# Look for utf8proc.
AC_ARG_ENABLE(
utf8proc,
AC_HELP_STRING(--enable-utf8proc, use utf8proc if it is installed),
found_utf8proc=$enable_utf8proc,
found_utf8proc=yes
)
if test "x$found_utf8proc" = xyes; then
AC_CHECK_HEADER(utf8proc.h, found_utf8proc=yes, found_utf8proc=no)
if test "x$found_utf8proc" = xyes; then
AC_SEARCH_LIBS(
utf8proc_charwidth,
utf8proc,
found_utf8proc=yes,
found_utf8proc=no
)
if test "x$found_utf8proc" = xyes; then
AC_DEFINE(HAVE_UTF8PROC)
fi
fi
fi
AM_CONDITIONAL(HAVE_UTF8PROC, [test "x$found_utf8proc" = xyes])
# Check for b64_ntop. # Check for b64_ntop.
AC_MSG_CHECKING(for b64_ntop) AC_MSG_CHECKING(for b64_ntop)
AC_TRY_LINK( AC_TRY_LINK(

12
utf8.c
View File

@ -109,7 +109,11 @@ utf8_width(wchar_t wc)
{ {
int width; int width;
#ifdef HAVE_UTF8PROC
width = utf8proc_wcwidth(wc);
#else
width = wcwidth(wc); width = wcwidth(wc);
#endif
if (width < 0 || width > 0xff) { if (width < 0 || width > 0xff) {
log_debug("Unicode %04x, wcwidth() %d", wc, width); log_debug("Unicode %04x, wcwidth() %d", wc, width);
@ -135,7 +139,11 @@ utf8_width(wchar_t wc)
enum utf8_state enum utf8_state
utf8_combine(const struct utf8_data *ud, wchar_t *wc) utf8_combine(const struct utf8_data *ud, wchar_t *wc)
{ {
#ifdef HAVE_UTF8PROC
switch (utf8proc_mbtowc(wc, ud->data, ud->size)) {
#else
switch (mbtowc(wc, ud->data, ud->size)) { switch (mbtowc(wc, ud->data, ud->size)) {
#endif
case -1: case -1:
log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data, log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data,
errno); errno);
@ -155,7 +163,11 @@ utf8_split(wchar_t wc, struct utf8_data *ud)
char s[MB_LEN_MAX]; char s[MB_LEN_MAX];
int slen; int slen;
#ifdef HAVE_UTF8PROC
slen = utf8proc_wctomb(s, wc);
#else
slen = wctomb(s, wc); slen = wctomb(s, wc);
#endif
if (slen <= 0 || slen > (int)sizeof ud->data) if (slen <= 0 || slen > (int)sizeof ud->data)
return (UTF8_ERROR); return (UTF8_ERROR);