Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RFC/WIP] Leverage compiler-rt for builtins support of llvm intrinsics #17344

Closed
wants to merge 12 commits into from
3 changes: 3 additions & 0 deletions Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ USE_SYSTEM_PATCHELF:=0
# Link to the LLVM shared library
USE_LLVM_SHLIB := 1

# Use the builtins from compiler-rt
BUILD_COMPILER_RT := 1

## Settings for various Intel tools
# Set to 1 to use MKL
USE_INTEL_MKL ?= 0
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ endif
$(build_private_libdir)/%.$(SHLIB_EXT): $(build_private_libdir)/%.o
@$(call PRINT_LINK, $(CXX) $(LDFLAGS) -shared $(fPIC) -L$(build_private_libdir) -L$(build_libdir) -L$(build_shlibdir) -o $@ $< \
$(if $(findstring -debug.$(SHLIB_EXT),$(notdir $@)),-ljulia-debug,-ljulia) \
$$([ $(OS) = WINNT ] && echo '' -lssp))
$$([ $(OS) = WINNT ] && echo '' -lssp) \
$$([ $(BUILD_COMPILER_RT) = 1 ] && echo '' -lcompiler-rt))
@$(INSTALL_NAME_CMD)$(notdir $@) $@
@$(DSYMUTIL) $@

Expand Down
4 changes: 2 additions & 2 deletions base/float.jl
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ function convert(::Type{Float32}, x::Int128)
end

#convert(::Type{Float16}, x::Float32) = box(Float16,fptrunc(Float16,x))
convert(::Type{Float16}, x::Float64) = convert(Float16, convert(Float32,x))
#convert(::Type{Float16}, x::Float64) = convert(Float16, convert(Float32,x))
convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,unbox(Float64,x)))

#convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x))
convert(::Type{Float64}, x::Float16) = convert(Float64, convert(Float32,x))
#convert(::Type{Float64}, x::Float16) = convert(Float64, convert(Float32,x))
convert(::Type{Float64}, x::Float32) = box(Float64,fpext(Float64,unbox(Float32,x)))

convert(::Type{AbstractFloat}, x::Bool) = convert(Float64, x)
Expand Down
123 changes: 25 additions & 98 deletions base/float16.jl
Original file line number Diff line number Diff line change
@@ -1,102 +1,29 @@
# This file is a part of Julia. License is MIT: http://julialang.org/license

function convert(::Type{Float32}, val::Float16)
ival::UInt32 = reinterpret(UInt16, val)
sign::UInt32 = (ival & 0x8000) >> 15
exp::UInt32 = (ival & 0x7c00) >> 10
sig::UInt32 = (ival & 0x3ff) >> 0
ret::UInt32

if exp == 0
if sig == 0
sign = sign << 31
ret = sign | exp | sig
else
n_bit = 1
bit = 0x0200
while (bit & sig) == 0
n_bit = n_bit + 1
bit = bit >> 1
end
sign = sign << 31
exp = (-14 - n_bit + 127) << 23
sig = ((sig & (~bit)) << n_bit) << (23 - 10)
ret = sign | exp | sig
end
elseif exp == 0x1f
if sig == 0 # Inf
if sign == 0
ret = 0x7f800000
else
ret = 0xff800000
end
else # NaN
ret = 0x7fc00000 | (sign<<31)
end
else
sign = sign << 31
exp = (exp - 15 + 127) << 23
sig = sig << (23 - 10)
ret = sign | exp | sig
end
return reinterpret(Float32, ret)
end

# Float32 -> Float16 algorithm from:
# "Fast Half Float Conversion" by Jeroen van der Zijp
# ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf

const basetable = Array{UInt16}(512)
const shifttable = Array{UInt8}(512)

for i = 0:255
e = i - 127
if e < -24 # Very small numbers map to zero
basetable[i|0x000+1] = 0x0000
basetable[i|0x100+1] = 0x8000
shifttable[i|0x000+1] = 24
shifttable[i|0x100+1] = 24
elseif e < -14 # Small numbers map to denorms
basetable[i|0x000+1] = (0x0400>>(-e-14))
basetable[i|0x100+1] = (0x0400>>(-e-14)) | 0x8000
shifttable[i|0x000+1] = -e-1
shifttable[i|0x100+1] = -e-1
elseif e <= 15 # Normal numbers just lose precision
basetable[i|0x000+1] = ((e+15)<<10)
basetable[i|0x100+1] = ((e+15)<<10) | 0x8000
shifttable[i|0x000+1] = 13
shifttable[i|0x100+1] = 13
elseif e < 128 # Large numbers map to Infinity
basetable[i|0x000+1] = 0x7C00
basetable[i|0x100+1] = 0xFC00
shifttable[i|0x000+1] = 24
shifttable[i|0x100+1] = 24
else # Infinity and NaN's stay Infinity and NaN's
basetable[i|0x000+1] = 0x7C00
basetable[i|0x100+1] = 0xFC00
shifttable[i|0x000+1] = 13
shifttable[i|0x100+1] = 13
end
end

function convert(::Type{Float16}, val::Float32)
f = reinterpret(UInt32, val)
i = (f >> 23) & 0x1ff + 1
sh = shifttable[i]
f &= 0x007fffff
h::UInt16 = basetable[i] + (f >> sh)
# round
# NOTE: we maybe should ignore NaNs here, but the payload is
# getting truncated anyway so "rounding" it might not matter
nextbit = (f >> (sh-1)) & 1
if nextbit != 0
if h&1 == 1 || # round halfway to even
(f & ((1<<(sh-1))-1)) != 0 # check lower bits
h += 1
end
end
reinterpret(Float16, h)
end
import Base.llvmcall
# Implement conversion to and from Float16 with llvm intrinsics
convert(::Type{Float32}, val::Float16) =
llvmcall(("""declare float @llvm.convert.from.fp16.f32(i16)""",
"""%2 = call float @llvm.convert.from.fp16.f32(i16 %0)
ret float %2"""),
Float32, Tuple{Float16}, val)

convert(::Type{Float64}, val::Float16) =
llvmcall(("""declare double @llvm.convert.from.fp16.f64(i16)""",
"""%2 = call double @llvm.convert.from.fp16.f64(i16 %0)
ret double %2"""),
Float64, Tuple{Float16}, val)

convert(::Type{Float16}, val::Float32) =
llvmcall(("""declare i16 @llvm.convert.to.fp16.f32(float)""",
"""%2 = call i16 @llvm.convert.to.fp16.f32(float %0)
ret i16 %2"""),
Float16, Tuple{Float32}, val)

convert(::Type{Float16}, val::Float64) =
llvmcall(("""declare i16 @llvm.convert.to.fp16.f64(double)""",
"""%2 = call i16 @llvm.convert.to.fp16.f64(double %0)
ret i16 %2"""),
Float16, Tuple{Float64}, val)

convert(::Type{Bool}, x::Float16) = x==0 ? false : x==1 ? true : throw(InexactError())
convert(::Type{Int128}, x::Float16) = convert(Int128, Float32(x))
Expand Down
4 changes: 2 additions & 2 deletions deps/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ get: $(addprefix get-, $(DEP_LIBS))
configure: $(addprefix configure-, $(DEP_LIBS))
compile: $(addprefix compile-, $(DEP_LIBS))
check: $(addprefix check-, $(DEP_LIBS))
install: $(addprefix install-, $(DEP_LIBS))
install: $(addprefix install-, $(DEP_LIBS)) install-compiler_rt
cleanall: $(addprefix clean-, $(DEP_LIBS))
distcleanall: $(addprefix distclean-, $(DEP_LIBS))
rm -rf $(build_prefix)
Expand Down Expand Up @@ -268,4 +268,4 @@ include $(SRCDIR)/virtualenv.mk

.PHONY: default compile install cleanall distcleanall \
get-* configure-* compile-* check-* install-* \
clean-* distclean-* reinstall-* update-llvm
clean-* distclean-* reinstall-* update-llvm install-compiler_rt
2 changes: 1 addition & 1 deletion deps/Versions.make
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
LLVM_VER = 3.7.1
LLVM_VER = 3.8.0
Copy link
Contributor

@tkelman tkelman Jul 11, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we'll probably do 3.8.1 some time not too long after branching for 0.6-dev, but it'll need CI preparation

edit: and checksum updates

LLVM_LIB_SUFFIX =
PCRE_VER = 10.21
DSFMT_VER = 2.2.3
Expand Down
2 changes: 1 addition & 1 deletion deps/llvm-ver.make
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ifeq ($(LLVM_VER_PATCH),)
LLVM_VER_PATCH := 0
endif

ifeq ($(LLVM_VER_SHORT),$(filter $(LLVM_VER_SHORT),3.3 3.4 3.5 3.6 3.7 3.8))
ifeq ($(LLVM_VER_SHORT),$(filter $(LLVM_VER_SHORT),3.3 3.4 3.5 3.6 3.7))
LLVM_USE_CMAKE := 0
else
LLVM_USE_CMAKE := 1
Expand Down
64 changes: 53 additions & 11 deletions deps/llvm.mk
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ BUILD_LLVM_CLANG := 1
# because it's a build requirement
endif

ifeq ($(BUILD_LLVM_CLANG), 1)
BUILD_COMPILER_RT := 1
# build requirement
endif

ifeq ($(USE_POLLY),1)
ifeq ($(USE_SYSTEM_LLVM),0)
ifneq ($(LLVM_VER),svn)
Expand Down Expand Up @@ -65,13 +70,17 @@ endif # BUILD_LLDB

ifeq ($(BUILD_LLVM_CLANG),1)
LLVM_CLANG_TAR:=$(SRCDIR)/srccache/cfe-$(LLVM_TAR_EXT)
LLVM_COMPILER_RT_TAR:=$(SRCDIR)/srccache/compiler-rt-$(LLVM_TAR_EXT)
else
LLVM_CLANG_TAR:=
LLVM_COMPILER_RT_TAR:=
LLVM_LIBCXX_TAR:=
endif # BUILD_LLVM_CLANG

ifeq ($(BUILD_COMPILER_RT), 1)
LLVM_COMPILER_RT_TAR:=$(SRCDIR)/srccache/compiler-rt-$(LLVM_TAR_EXT)
else
LLVM_COMPILER_RT_TAR:=
endif # BUILD_COMPILER_RT

ifeq ($(BUILD_CUSTOM_LIBCXX),1)
LLVM_LIBCXX_TAR:=$(SRCDIR)/srccache/libcxx-$(LLVM_TAR_EXT)
endif
Expand Down Expand Up @@ -207,19 +216,29 @@ LLVM_FLAGS += LDFLAGS="$(LLVM_LDFLAGS)"
LLVM_MFLAGS += LDFLAGS="$(LLVM_LDFLAGS)"
endif

ifeq ($(BUILD_COMPILER_RT),1)
ifneq ($(BUILD_LLVM_CLANG),1)
# block default building of Clang
LLVM_MFLAGS += OPTIONAL_PARALLEL_DIRS=compiler-rt
endif
else
ifeq ($(LLVM_VER_SHORT),$(filter $(LLVM_VER_SHORT),3.3 3.4 3.5 3.6 3.7))
LLVM_CMAKE += -DLLVM_EXTERNAL_COMPILER_RT_BUILD=OFF
else
LLVM_CMAKE += -DLLVM_TOOL_COMPILER_RT_BUILD=OFF
endif
endif

ifeq ($(BUILD_LLVM_CLANG),1)
LLVM_MFLAGS += OPTIONAL_PARALLEL_DIRS=clang
else
# block default building of Clang
LLVM_MFLAGS += OPTIONAL_PARALLEL_DIRS=
ifeq ($(LLVM_VER_SHORT),$(filter $(LLVM_VER_SHORT),3.3 3.4 3.5 3.6 3.7))
LLVM_CMAKE += -DLLVM_EXTERNAL_CLANG_BUILD=OFF
LLVM_CMAKE += -DLLVM_EXTERNAL_COMPILER_RT_BUILD=OFF
else
LLVM_CMAKE += -DLLVM_TOOL_CLANG_BUILD=OFF
LLVM_CMAKE += -DLLVM_TOOL_COMPILER_RT_BUILD=OFF
endif
endif

ifeq ($(BUILD_LLDB),1)
LLVM_MFLAGS += OPTIONAL_DIRS=lldb
else
Expand Down Expand Up @@ -370,15 +389,17 @@ ifeq ($(BUILD_LLVM_CLANG),1)
git clone $(LLVM_GIT_URL_CLANG) $(LLVM_SRC_DIR)/tools/clang ) || \
(cd $(LLVM_SRC_DIR)/tools/clang && \
git pull --ff-only)
([ ! -d $(LLVM_SRC_DIR)/projects/compiler-rt ] && \
git clone $(LLVM_GIT_URL_COMPILER_RT) $(LLVM_SRC_DIR)/projects/compiler-rt ) || \
(cd $(LLVM_SRC_DIR)/projects/compiler-rt && \
git pull --ff-only)
ifneq ($(LLVM_GIT_VER_CLANG),)
(cd $(LLVM_SRC_DIR)/tools/clang && \
git checkout $(LLVM_GIT_VER_CLANG))
endif # LLVM_GIT_VER_CLANG
endif # BUILD_LLVM_CLANG
ifeq ($(BUILD_COMPILER_RT),1)
([ ! -d $(LLVM_SRC_DIR)/projects/compiler-rt ] && \
git clone $(LLVM_GIT_URL_COMPILER_RT) $(LLVM_SRC_DIR)/projects/compiler-rt ) || \
(cd $(LLVM_SRC_DIR)/projects/compiler-rt && \
git pull --ff-only)
endif # BUILD_COMPILER_RT
ifeq ($(BUILD_LLDB),1)
([ ! -d $(LLVM_SRC_DIR)/tools/lldb ] && \
git clone $(LLVM_GIT_URL_LLDB) $(LLVM_SRC_DIR)/tools/lldb ) || \
Expand Down Expand Up @@ -455,7 +476,7 @@ ifeq ($(LLVM_VER),3.7.1)
ifeq ($(BUILD_LLDB),1)
$(eval $(call LLVM_PATCH,lldb-3.7.1))
endif
ifeq ($(BUILD_LLVM_CLANG),1)
ifeq ($(BUILD_COMPILER_RT),1)
$(eval $(call LLVM_PATCH,compiler-rt-3.7.1))
endif
endif
Expand Down Expand Up @@ -526,6 +547,27 @@ distclean-llvm:
$(LLVM_COMPILER_RT_TAR) $(LLVM_LIBCXX_TAR) $(LLVM_LLDB_TAR) \
$(LLVM_SRC_DIR) $(LLVM_BUILDDIR_withtype)

# COMPILER-RT
CRT_OS := $(call patsubst,%inux,linux,$(OS))
CRT_ARCH := $(call patsubst,i%86,i386,$(ARCH))
CRT_STATIC_NAME := clang_rt.builtins-$(CRT_ARCH)
CRT_OBJ_TARGET := $(build_shlibdir)/libcompiler-rt.$(SHLIB_EXT)

ifeq ($(USE_SYSTEM_LLVM),0)
CRT_BUILD_DIR := $(LLVM_BUILDDIR_withtype)/lib/clang/$(LLVM_VER)/lib/$(CRT_OS)
$(CRT_BUILD_DIR)/lib$(CRT_STATIC_NAME): | $(LLVM_OBJ_TARGET)
touch -c $@
else
CRT_BUILD_DIR := $(shell llvm-config --libdir)/clang/$(shell llvm-config --version)/lib/$(CRT_OS)
$(CRT_BUILD_DIR)/lib$(CRT_STATIC_NAME):
endif

$(CRT_OBJ_TARGET): $(CRT_BUILD_DIR)/lib$(CRT_STATIC_NAME)
$(CC) $(LDFLAGS) -shared $(fPIC) -o $@ -nostdlib -Wl,--whole-archive -L$(CRT_BUILD_DIR) -l$(CRT_STATIC_NAME)
touch -c $@

install-compiler_rt: $(CRT_OBJ_TARGET)

ifneq ($(LLVM_VER),svn)
get-llvm: $(LLVM_TAR) $(LLVM_CLANG_TAR) $(LLVM_COMPILER_RT_TAR) $(LLVM_LIBCXX_TAR) $(LLVM_LLDB_TAR)
else
Expand Down
15 changes: 15 additions & 0 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,19 @@ void NotifyDebugger(jit_code_entry *JITCodeEntry)
}
// ------------------------ END OF TEMPORARY COPY FROM LLVM -----------------

// Resolve compiler-rt functions in the shared library that we created from compiler-rt
static uint64_t resolve_compiler_rt(const char *name)
{
static void *compiler_rt_hdl = jl_load_dynamic_library_e("libcompiler-rt",
JL_RTLD_LOCAL);
static const char *const prefix = "__";
if (!compiler_rt_hdl)
return 0;
if (strncmp(name, prefix, strlen(prefix) != 0))
return 0;
return (uintptr_t)jl_dlsym_e(compiler_rt_hdl, name);
}

#ifdef _OS_LINUX_
// Resolve non-lock free atomic functions in the libatomic library.
// This is the library that provides support for c11/c++11 atomic operations.
Expand Down Expand Up @@ -428,6 +441,8 @@ class JuliaOJIT {
if (uint64_t addr = resolve_atomic(Name.c_str()))
return RuntimeDyld::SymbolInfo(addr, JITSymbolFlags::Exported);
#endif
if (uint64_t addr = resolve_compiler_rt(Name.c_str()))
return RuntimeDyld::SymbolInfo(addr, JITSymbolFlags::Exported);
// Return failure code
return RuntimeDyld::SymbolInfo(nullptr);
},
Expand Down