Skip to content

Commit

Permalink
replace wcwidth by utf8proc_charwidth (fixes #3721, closes #6939)
Browse files Browse the repository at this point in the history
  • Loading branch information
stevengj committed Mar 28, 2015
1 parent 49d0aa8 commit 7d908f9
Show file tree
Hide file tree
Showing 7 changed files with 14 additions and 336 deletions.
2 changes: 2 additions & 0 deletions Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -596,8 +596,10 @@ endif

ifeq ($(USE_SYSTEM_UTF8PROC), 1)
LIBUTF8PROC = -lutf8proc
UTF8PROC_INC = /usr/include
else
LIBUTF8PROC = $(build_libdir)/libutf8proc.a
UTF8PROC_INC = $(JULIAHOME)/deps/utf8proc
endif

# OS specific stuff
Expand Down
1 change: 0 additions & 1 deletion base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,6 @@ startswith(a::Array{UInt8,1}, b::Array{UInt8,1}) =

## character column width function ##

charwidth(c::Char) = max(0,Int(ccall(:wcwidth, Int32, (UInt32,), c)))
strwidth(s::AbstractString) = (w=0; for c in s; w += charwidth(c); end; w)
strwidth(s::ByteString) = Int(ccall(:u8_strwidth, Csize_t, (Ptr{UInt8},), s.data))
# TODO: implement and use u8_strnwidth that takes a length argument
Expand Down
6 changes: 5 additions & 1 deletion base/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import Base: show, showcompact, ==, hash, string, symbol, isless, length, eltype
export isgraphemebreak

# also exported by Base:
export normalize_string, graphemes, is_valid_char, is_assigned_char,
export normalize_string, graphemes, is_valid_char, is_assigned_char, charwidth,
islower, isupper, isalpha, isdigit, isnumber, isalnum,
iscntrl, ispunct, isspace, isprint, isgraph, isblank

Expand Down Expand Up @@ -116,6 +116,10 @@ end

############################################################################

charwidth(c::Char) = Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), c))

############################################################################

# returns UTF8PROC_CATEGORY code in 1:30 giving Unicode category
function category_code(c)
UInt32(c) > 0x10FFFF && return 0x0000 # see utf8proc_get_property docs
Expand Down
4 changes: 2 additions & 2 deletions src/support/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ SRCS = hashing timefuncs ptrhash operators \
utf8 ios htable bitvector \
int2str libsupportinit arraylist strtod
ifeq ($(OS),WINNT)
SRCS += asprintf wcwidth
SRCS += asprintf
ifeq ($(ARCH),i686)
SRCS += _setjmp.win32 _longjmp.win32
else ifeq ($(ARCH),i386)
Expand All @@ -26,7 +26,7 @@ HEADERS = $(wildcard *.h) $(LIBUV_INC)/uv.h
OBJS = $(SRCS:%=$(BUILDDIR)/%.o)
DOBJS = $(SRCS:%=$(BUILDDIR)/%.dbg.obj)

FLAGS = $(CFLAGS) $(HFILEDIRS:%=-I%) -I$(LIBUV_INC) -DLIBRARY_EXPORTS
FLAGS = $(CFLAGS) $(HFILEDIRS:%=-I%) -I$(LIBUV_INC) -I$(UTF8PROC_INC) -DLIBRARY_EXPORTS
ifneq ($(USEMSVC), 1)
FLAGS += -Wall -Wno-strict-aliasing -fvisibility=hidden
endif
Expand Down
11 changes: 2 additions & 9 deletions src/support/utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <assert.h>

#include "utf8.h"
#include "utf8proc.h"

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -261,17 +262,10 @@ size_t u8_strlen(const char *s)
return count;
}

#if defined(_OS_WINDOWS_)
extern int wcwidth(uint32_t ch);
#elif defined(_OS_LINUX_)
extern int wcwidth(wchar_t ch);
#endif

size_t u8_strwidth(const char *s)
{
uint32_t ch;
size_t nb, tot=0;
int w;
signed char sc;

while ((sc = (signed char)*s) != 0) {
Expand All @@ -293,8 +287,7 @@ size_t u8_strwidth(const char *s)
case 0: ch += (unsigned char)*s++;
}
ch -= offsetsFromUTF8[nb];
w = wcwidth(ch); // might return -1
if (w > 0) tot += w;
tot += utf8proc_charwidth(ch);
}
}
return tot;
Expand Down
Loading

0 comments on commit 7d908f9

Please sign in to comment.