diff --git a/Make.inc b/Make.inc
index 8d34d4be08880..e8d0976304e15 100644
--- a/Make.inc
+++ b/Make.inc
@@ -42,6 +42,9 @@ USE_SYSTEM_PATCHELF:=0
 # Link to the LLVM shared library
 USE_LLVM_SHLIB := 1
 
+# Use the builtins from compiler-rt
+BUILD_COMPILER_RT := 1
+
 ## Settings for various Intel tools
 # Set to 1 to use MKL
 USE_INTEL_MKL ?= 0
diff --git a/Makefile b/Makefile
index 9d7e02534915c..68ef0e52ad785 100644
--- a/Makefile
+++ b/Makefile
@@ -180,7 +180,8 @@ endif
 $(build_private_libdir)/%.$(SHLIB_EXT): $(build_private_libdir)/%.o
 	@$(call PRINT_LINK, $(CXX) $(LDFLAGS) -shared $(fPIC) -L$(build_private_libdir) -L$(build_libdir) -L$(build_shlibdir) -o $@ $< \
 		$(if $(findstring -debug.$(SHLIB_EXT),$(notdir $@)),-ljulia-debug,-ljulia) \
-		$$([ $(OS) = WINNT ] && echo '' -lssp))
+		$$([ $(OS) = WINNT ] && echo '' -lssp) \
+		$$([ $(BUILD_COMPILER_RT) = 1 ] && echo '' -lcompiler-rt))
 	@$(INSTALL_NAME_CMD)$(notdir $@) $@
 	@$(DSYMUTIL) $@
 
diff --git a/base/float.jl b/base/float.jl
index 6d507f2d6b34b..103a56d430cc0 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -99,11 +99,11 @@ function convert(::Type{Float32}, x::Int128)
 end
 
 #convert(::Type{Float16}, x::Float32) = box(Float16,fptrunc(Float16,x))
-convert(::Type{Float16}, x::Float64) = convert(Float16, convert(Float32,x))
+#convert(::Type{Float16}, x::Float64) = convert(Float16, convert(Float32,x))
 convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,unbox(Float64,x)))
 
 #convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x))
-convert(::Type{Float64}, x::Float16) = convert(Float64, convert(Float32,x))
+#convert(::Type{Float64}, x::Float16) = convert(Float64, convert(Float32,x))
 convert(::Type{Float64}, x::Float32) = box(Float64,fpext(Float64,unbox(Float32,x)))
 
 convert(::Type{AbstractFloat}, x::Bool)    = convert(Float64, x)
diff --git a/base/float16.jl b/base/float16.jl
index 46dcb8bfaa0d7..b33239c0d660e 100644
--- a/base/float16.jl
+++ b/base/float16.jl
@@ -1,102 +1,29 @@
 # This file is a part of Julia. License is MIT: http://julialang.org/license
-
-function convert(::Type{Float32}, val::Float16)
-    ival::UInt32 = reinterpret(UInt16, val)
-    sign::UInt32 = (ival & 0x8000) >> 15
-    exp::UInt32  = (ival & 0x7c00) >> 10
-    sig::UInt32  = (ival & 0x3ff) >> 0
-    ret::UInt32
-
-    if exp == 0
-        if sig == 0
-            sign = sign << 31
-            ret = sign | exp | sig
-        else
-            n_bit = 1
-            bit = 0x0200
-            while (bit & sig) == 0
-                n_bit = n_bit + 1
-                bit = bit >> 1
-            end
-            sign = sign << 31
-            exp = (-14 - n_bit + 127) << 23
-            sig = ((sig & (~bit)) << n_bit) << (23 - 10)
-            ret = sign | exp | sig
-        end
-    elseif exp == 0x1f
-        if sig == 0  # Inf
-            if sign == 0
-                ret = 0x7f800000
-            else
-                ret = 0xff800000
-            end
-        else  # NaN
-            ret = 0x7fc00000 | (sign<<31)
-        end
-    else
-        sign = sign << 31
-        exp  = (exp - 15 + 127) << 23
-        sig  = sig << (23 - 10)
-        ret = sign | exp | sig
-    end
-    return reinterpret(Float32, ret)
-end
-
-# Float32 -> Float16 algorithm from:
-#   "Fast Half Float Conversion" by Jeroen van der Zijp
-#   ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
-
-const basetable = Array{UInt16}(512)
-const shifttable = Array{UInt8}(512)
-
-for i = 0:255
-    e = i - 127
-    if e < -24  # Very small numbers map to zero
-        basetable[i|0x000+1] = 0x0000
-        basetable[i|0x100+1] = 0x8000
-        shifttable[i|0x000+1] = 24
-        shifttable[i|0x100+1] = 24
-    elseif e < -14  # Small numbers map to denorms
-        basetable[i|0x000+1] = (0x0400>>(-e-14))
-        basetable[i|0x100+1] = (0x0400>>(-e-14)) | 0x8000
-        shifttable[i|0x000+1] = -e-1
-        shifttable[i|0x100+1] = -e-1
-    elseif e <= 15  # Normal numbers just lose precision
-        basetable[i|0x000+1] = ((e+15)<<10)
-        basetable[i|0x100+1] = ((e+15)<<10) | 0x8000
-        shifttable[i|0x000+1] = 13
-        shifttable[i|0x100+1] = 13
-    elseif e < 128  # Large numbers map to Infinity
-        basetable[i|0x000+1] = 0x7C00
-        basetable[i|0x100+1] = 0xFC00
-        shifttable[i|0x000+1] = 24
-        shifttable[i|0x100+1] = 24
-    else  # Infinity and NaN's stay Infinity and NaN's
-        basetable[i|0x000+1] = 0x7C00
-        basetable[i|0x100+1] = 0xFC00
-        shifttable[i|0x000+1] = 13
-        shifttable[i|0x100+1] = 13
-    end
-end
-
-function convert(::Type{Float16}, val::Float32)
-    f = reinterpret(UInt32, val)
-    i = (f >> 23) & 0x1ff + 1
-    sh = shifttable[i]
-    f &= 0x007fffff
-    h::UInt16 = basetable[i] + (f >> sh)
-    # round
-    # NOTE: we maybe should ignore NaNs here, but the payload is
-    # getting truncated anyway so "rounding" it might not matter
-    nextbit = (f >> (sh-1)) & 1
-    if nextbit != 0
-        if h&1 == 1 ||  # round halfway to even
-            (f & ((1<<(sh-1))-1)) != 0  # check lower bits
-            h += 1
-        end
-    end
-    reinterpret(Float16, h)
-end
+import Base.llvmcall
+# Implement conversion to and from Float16 with llvm intrinsics
+convert(::Type{Float32}, val::Float16) =
+    llvmcall(("""declare float @llvm.convert.from.fp16.f32(i16)""",
+              """%2 = call float @llvm.convert.from.fp16.f32(i16 %0)
+                 ret float %2"""),
+              Float32, Tuple{Float16}, val)
+
+convert(::Type{Float64}, val::Float16) =
+    llvmcall(("""declare double @llvm.convert.from.fp16.f64(i16)""",
+              """%2 = call double @llvm.convert.from.fp16.f64(i16 %0)
+                 ret double %2"""),
+              Float64, Tuple{Float16}, val)
+
+convert(::Type{Float16}, val::Float32) =
+    llvmcall(("""declare i16 @llvm.convert.to.fp16.f32(float)""",
+              """%2 = call i16 @llvm.convert.to.fp16.f32(float %0)
+                 ret i16 %2"""),
+              Float16, Tuple{Float32}, val)
+
+convert(::Type{Float16}, val::Float64) =
+    llvmcall(("""declare i16 @llvm.convert.to.fp16.f64(double)""",
+              """%2 = call i16 @llvm.convert.to.fp16.f64(double %0)
+                 ret i16 %2"""),
+              Float16, Tuple{Float64}, val)
 
 convert(::Type{Bool},    x::Float16) = x==0 ? false : x==1 ? true : throw(InexactError())
 convert(::Type{Int128},  x::Float16) = convert(Int128, Float32(x))
diff --git a/deps/Makefile b/deps/Makefile
index e376b07671922..f1c351adaacc3 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -209,7 +209,7 @@ get: $(addprefix get-, $(DEP_LIBS))
 configure: $(addprefix configure-, $(DEP_LIBS))
 compile: $(addprefix compile-, $(DEP_LIBS))
 check: $(addprefix check-, $(DEP_LIBS))
-install: $(addprefix install-, $(DEP_LIBS))
+install: $(addprefix install-, $(DEP_LIBS)) install-compiler_rt
 cleanall: $(addprefix clean-, $(DEP_LIBS))
 distcleanall: $(addprefix distclean-, $(DEP_LIBS))
 	rm -rf $(build_prefix)
@@ -268,4 +268,4 @@ include $(SRCDIR)/virtualenv.mk
 
 .PHONY: default compile install cleanall distcleanall \
 	get-* configure-* compile-* check-* install-* \
-	clean-* distclean-* reinstall-* update-llvm
+	clean-* distclean-* reinstall-* update-llvm install-compiler_rt
diff --git a/deps/Versions.make b/deps/Versions.make
index 3cf63f3c7eaf4..b83cb58ae09f4 100644
--- a/deps/Versions.make
+++ b/deps/Versions.make
@@ -1,4 +1,4 @@
-LLVM_VER = 3.7.1
+LLVM_VER = 3.8.0
 LLVM_LIB_SUFFIX =
 PCRE_VER = 10.21
 DSFMT_VER = 2.2.3
diff --git a/deps/llvm-ver.make b/deps/llvm-ver.make
index 323ac9423fb51..c0f595722dec3 100644
--- a/deps/llvm-ver.make
+++ b/deps/llvm-ver.make
@@ -11,7 +11,7 @@ ifeq ($(LLVM_VER_PATCH),)
 LLVM_VER_PATCH := 0
 endif
 
-ifeq ($(LLVM_VER_SHORT),$(filter $(LLVM_VER_SHORT),3.3 3.4 3.5 3.6 3.7 3.8))
+ifeq ($(LLVM_VER_SHORT),$(filter $(LLVM_VER_SHORT),3.3 3.4 3.5 3.6 3.7))
 LLVM_USE_CMAKE := 0
 else
 LLVM_USE_CMAKE := 1
diff --git a/deps/llvm.mk b/deps/llvm.mk
index bf33bc4ca354b..f7be6067599bd 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -13,6 +13,11 @@ BUILD_LLVM_CLANG := 1
 # because it's a build requirement
 endif
 
+ifeq ($(BUILD_LLVM_CLANG), 1)
+BUILD_COMPILER_RT := 1
+# build requirement
+endif
+
 ifeq ($(USE_POLLY),1)
 ifeq ($(USE_SYSTEM_LLVM),0)
 ifneq ($(LLVM_VER),svn)
@@ -65,13 +70,17 @@ endif # BUILD_LLDB
 
 ifeq ($(BUILD_LLVM_CLANG),1)
 LLVM_CLANG_TAR:=$(SRCDIR)/srccache/cfe-$(LLVM_TAR_EXT)
-LLVM_COMPILER_RT_TAR:=$(SRCDIR)/srccache/compiler-rt-$(LLVM_TAR_EXT)
 else
 LLVM_CLANG_TAR:=
-LLVM_COMPILER_RT_TAR:=
 LLVM_LIBCXX_TAR:=
 endif # BUILD_LLVM_CLANG
 
+ifeq ($(BUILD_COMPILER_RT), 1)
+LLVM_COMPILER_RT_TAR:=$(SRCDIR)/srccache/compiler-rt-$(LLVM_TAR_EXT)
+else
+LLVM_COMPILER_RT_TAR:=
+endif # BUILD_COMPILER_RT
+
 ifeq ($(BUILD_CUSTOM_LIBCXX),1)
 LLVM_LIBCXX_TAR:=$(SRCDIR)/srccache/libcxx-$(LLVM_TAR_EXT)
 endif
@@ -207,19 +216,29 @@ LLVM_FLAGS += LDFLAGS="$(LLVM_LDFLAGS)"
 LLVM_MFLAGS += LDFLAGS="$(LLVM_LDFLAGS)"
 endif
 
+ifeq ($(BUILD_COMPILER_RT),1)
+ifneq ($(BUILD_LLVM_CLANG),1)
+# block default building of Clang
+LLVM_MFLAGS += OPTIONAL_PARALLEL_DIRS=compiler-rt
+endif
+else
+ifeq ($(LLVM_VER_SHORT),$(filter $(LLVM_VER_SHORT),3.3 3.4 3.5 3.6 3.7))
+LLVM_CMAKE += -DLLVM_EXTERNAL_COMPILER_RT_BUILD=OFF
+else
+LLVM_CMAKE += -DLLVM_TOOL_COMPILER_RT_BUILD=OFF
+endif
+endif
+
 ifeq ($(BUILD_LLVM_CLANG),1)
 LLVM_MFLAGS += OPTIONAL_PARALLEL_DIRS=clang
 else
-# block default building of Clang
-LLVM_MFLAGS += OPTIONAL_PARALLEL_DIRS=
 ifeq ($(LLVM_VER_SHORT),$(filter $(LLVM_VER_SHORT),3.3 3.4 3.5 3.6 3.7))
 LLVM_CMAKE += -DLLVM_EXTERNAL_CLANG_BUILD=OFF
-LLVM_CMAKE += -DLLVM_EXTERNAL_COMPILER_RT_BUILD=OFF
 else
 LLVM_CMAKE += -DLLVM_TOOL_CLANG_BUILD=OFF
-LLVM_CMAKE += -DLLVM_TOOL_COMPILER_RT_BUILD=OFF
 endif
 endif
+
 ifeq ($(BUILD_LLDB),1)
 LLVM_MFLAGS += OPTIONAL_DIRS=lldb
 else
@@ -370,15 +389,17 @@ ifeq ($(BUILD_LLVM_CLANG),1)
 		git clone $(LLVM_GIT_URL_CLANG) $(LLVM_SRC_DIR)/tools/clang  ) || \
 		(cd $(LLVM_SRC_DIR)/tools/clang  && \
 		git pull --ff-only)
-	([ ! -d $(LLVM_SRC_DIR)/projects/compiler-rt ] && \
-		git clone $(LLVM_GIT_URL_COMPILER_RT) $(LLVM_SRC_DIR)/projects/compiler-rt  ) || \
-		(cd $(LLVM_SRC_DIR)/projects/compiler-rt  && \
-		git pull --ff-only)
 ifneq ($(LLVM_GIT_VER_CLANG),)
 	(cd $(LLVM_SRC_DIR)/tools/clang && \
 		git checkout $(LLVM_GIT_VER_CLANG))
 endif # LLVM_GIT_VER_CLANG
 endif # BUILD_LLVM_CLANG
+ifeq ($(BUILD_COMPILER_RT),1)
+	([ ! -d $(LLVM_SRC_DIR)/projects/compiler-rt ] && \
+		git clone $(LLVM_GIT_URL_COMPILER_RT) $(LLVM_SRC_DIR)/projects/compiler-rt  ) || \
+		(cd $(LLVM_SRC_DIR)/projects/compiler-rt  && \
+		git pull --ff-only)
+endif # BUILD_COMPILER_RT
 ifeq ($(BUILD_LLDB),1)
 	([ ! -d $(LLVM_SRC_DIR)/tools/lldb ] && \
 		git clone $(LLVM_GIT_URL_LLDB) $(LLVM_SRC_DIR)/tools/lldb  ) || \
@@ -455,7 +476,7 @@ ifeq ($(LLVM_VER),3.7.1)
 ifeq ($(BUILD_LLDB),1)
 $(eval $(call LLVM_PATCH,lldb-3.7.1))
 endif
-ifeq ($(BUILD_LLVM_CLANG),1)
+ifeq ($(BUILD_COMPILER_RT),1)
 $(eval $(call LLVM_PATCH,compiler-rt-3.7.1))
 endif
 endif
@@ -526,6 +547,27 @@ distclean-llvm:
 		$(LLVM_COMPILER_RT_TAR) $(LLVM_LIBCXX_TAR) $(LLVM_LLDB_TAR) \
 		$(LLVM_SRC_DIR) $(LLVM_BUILDDIR_withtype)
 
+# COMPILER-RT
+CRT_OS := $(call patsubst,%inux,linux,$(OS))
+CRT_ARCH := $(call patsubst,i%86,i386,$(ARCH))
+CRT_STATIC_NAME := clang_rt.builtins-$(CRT_ARCH)
+CRT_OBJ_TARGET := $(build_shlibdir)/libcompiler-rt.$(SHLIB_EXT)
+
+ifeq ($(USE_SYSTEM_LLVM),0)
+CRT_BUILD_DIR := $(LLVM_BUILDDIR_withtype)/lib/clang/$(LLVM_VER)/lib/$(CRT_OS)
+$(CRT_BUILD_DIR)/lib$(CRT_STATIC_NAME): | $(LLVM_OBJ_TARGET)
+	touch -c $@
+else
+CRT_BUILD_DIR := $(shell llvm-config --libdir)/clang/$(shell llvm-config --version)/lib/$(CRT_OS)
+$(CRT_BUILD_DIR)/lib$(CRT_STATIC_NAME):
+endif
+
+$(CRT_OBJ_TARGET): $(CRT_BUILD_DIR)/lib$(CRT_STATIC_NAME)
+	$(CC) $(LDFLAGS) -shared $(fPIC) -o $@ -nostdlib -Wl,--whole-archive -L$(CRT_BUILD_DIR) -l$(CRT_STATIC_NAME)
+	touch -c $@
+
+install-compiler_rt: $(CRT_OBJ_TARGET)
+
 ifneq ($(LLVM_VER),svn)
 get-llvm: $(LLVM_TAR) $(LLVM_CLANG_TAR) $(LLVM_COMPILER_RT_TAR) $(LLVM_LIBCXX_TAR) $(LLVM_LLDB_TAR)
 else
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index c0eeb33aabd72..5dfa1ac5841f6 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -198,6 +198,19 @@ void NotifyDebugger(jit_code_entry *JITCodeEntry)
 }
 // ------------------------ END OF TEMPORARY COPY FROM LLVM -----------------
 
+// Resolve compiler-rt functions in the shared library that we created from compiler-rt
+static uint64_t resolve_compiler_rt(const char *name)
+{
+    static void *compiler_rt_hdl = jl_load_dynamic_library_e("libcompiler-rt",
+                                                             JL_RTLD_LOCAL);
+    static const char *const prefix = "__";
+    if (!compiler_rt_hdl)
+        return 0;
+    if (strncmp(name, prefix, strlen(prefix) != 0))
+        return 0;
+    return (uintptr_t)jl_dlsym_e(compiler_rt_hdl, name);
+}
+
 #ifdef _OS_LINUX_
 // Resolve non-lock free atomic functions in the libatomic library.
 // This is the library that provides support for c11/c++11 atomic operations.
@@ -428,6 +441,8 @@ class JuliaOJIT {
                             if (uint64_t addr = resolve_atomic(Name.c_str()))
                                 return RuntimeDyld::SymbolInfo(addr, JITSymbolFlags::Exported);
 #endif
+                            if (uint64_t addr = resolve_compiler_rt(Name.c_str()))
+                                return RuntimeDyld::SymbolInfo(addr, JITSymbolFlags::Exported);
                             // Return failure code
                             return RuntimeDyld::SymbolInfo(nullptr);
                           },